No OneTemporary
Actions

Size

5 MB

Referenced Files

None

Subscribers

None

View Options

This file is larger than 256 KB, so syntax highlighting was skipped.

	diff --git a/contrib/llvm-project/clang/lib/Basic/Targets/X86.cpp b/contrib/llvm-project/clang/lib/Basic/Targets/X86.cpp
	index 18e6dbf03e00..072c97e6c8c6 100644
	--- a/contrib/llvm-project/clang/lib/Basic/Targets/X86.cpp
	+++ b/contrib/llvm-project/clang/lib/Basic/Targets/X86.cpp
	@@ -1,1804 +1,1808 @@
	//===--- X86.cpp - Implement X86 target feature support -------------------===//
	//
	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
	// See https://llvm.org/LICENSE.txt for license information.
	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
	//
	//===----------------------------------------------------------------------===//
	//
	// This file implements X86 TargetInfo objects.
	//
	//===----------------------------------------------------------------------===//

	#include "X86.h"
	#include "clang/Basic/Builtins.h"
	#include "clang/Basic/Diagnostic.h"
	#include "clang/Basic/TargetBuiltins.h"
	#include "llvm/ADT/StringExtras.h"
	#include "llvm/ADT/StringRef.h"
	#include "llvm/ADT/StringSwitch.h"
	#include "llvm/TargetParser/X86TargetParser.h"
	#include <optional>

	namespace clang {
	namespace targets {

	static constexpr Builtin::Info BuiltinInfoX86[] = {
	#define BUILTIN(ID, TYPE, ATTRS) \
	{#ID, TYPE, ATTRS, nullptr, HeaderDesc::NO_HEADER, ALL_LANGUAGES},
	#define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE) \
	{#ID, TYPE, ATTRS, FEATURE, HeaderDesc::NO_HEADER, ALL_LANGUAGES},
	#define TARGET_HEADER_BUILTIN(ID, TYPE, ATTRS, HEADER, LANGS, FEATURE) \
	{#ID, TYPE, ATTRS, FEATURE, HeaderDesc::HEADER, LANGS},
	#include "clang/Basic/BuiltinsX86.def"

	#define BUILTIN(ID, TYPE, ATTRS) \
	{#ID, TYPE, ATTRS, nullptr, HeaderDesc::NO_HEADER, ALL_LANGUAGES},
	#define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE) \
	{#ID, TYPE, ATTRS, FEATURE, HeaderDesc::NO_HEADER, ALL_LANGUAGES},
	#define TARGET_HEADER_BUILTIN(ID, TYPE, ATTRS, HEADER, LANGS, FEATURE) \
	{#ID, TYPE, ATTRS, FEATURE, HeaderDesc::HEADER, LANGS},
	#include "clang/Basic/BuiltinsX86_64.def"
	};

	static const char *const GCCRegNames[] = {
	"ax", "dx", "cx", "bx", "si", "di", "bp", "sp",
	"st", "st(1)", "st(2)", "st(3)", "st(4)", "st(5)", "st(6)", "st(7)",
	"argp", "flags", "fpcr", "fpsr", "dirflag", "frame", "xmm0", "xmm1",
	"xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7", "mm0", "mm1",
	"mm2", "mm3", "mm4", "mm5", "mm6", "mm7", "r8", "r9",
	"r10", "r11", "r12", "r13", "r14", "r15", "xmm8", "xmm9",
	"xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", "ymm0", "ymm1",
	"ymm2", "ymm3", "ymm4", "ymm5", "ymm6", "ymm7", "ymm8", "ymm9",
	"ymm10", "ymm11", "ymm12", "ymm13", "ymm14", "ymm15", "xmm16", "xmm17",
	"xmm18", "xmm19", "xmm20", "xmm21", "xmm22", "xmm23", "xmm24", "xmm25",
	"xmm26", "xmm27", "xmm28", "xmm29", "xmm30", "xmm31", "ymm16", "ymm17",
	"ymm18", "ymm19", "ymm20", "ymm21", "ymm22", "ymm23", "ymm24", "ymm25",
	"ymm26", "ymm27", "ymm28", "ymm29", "ymm30", "ymm31", "zmm0", "zmm1",
	"zmm2", "zmm3", "zmm4", "zmm5", "zmm6", "zmm7", "zmm8", "zmm9",
	"zmm10", "zmm11", "zmm12", "zmm13", "zmm14", "zmm15", "zmm16", "zmm17",
	"zmm18", "zmm19", "zmm20", "zmm21", "zmm22", "zmm23", "zmm24", "zmm25",
	"zmm26", "zmm27", "zmm28", "zmm29", "zmm30", "zmm31", "k0", "k1",
	"k2", "k3", "k4", "k5", "k6", "k7",
	"cr0", "cr2", "cr3", "cr4", "cr8",
	"dr0", "dr1", "dr2", "dr3", "dr6", "dr7",
	"bnd0", "bnd1", "bnd2", "bnd3",
	"tmm0", "tmm1", "tmm2", "tmm3", "tmm4", "tmm5", "tmm6", "tmm7",
	"r16", "r17", "r18", "r19", "r20", "r21", "r22", "r23",
	"r24", "r25", "r26", "r27", "r28", "r29", "r30", "r31",
	};

	const TargetInfo::AddlRegName AddlRegNames[] = {
	{{"al", "ah", "eax", "rax"}, 0},
	{{"bl", "bh", "ebx", "rbx"}, 3},
	{{"cl", "ch", "ecx", "rcx"}, 2},
	{{"dl", "dh", "edx", "rdx"}, 1},
	{{"esi", "rsi"}, 4},
	{{"edi", "rdi"}, 5},
	{{"esp", "rsp"}, 7},
	{{"ebp", "rbp"}, 6},
	{{"r8d", "r8w", "r8b"}, 38},
	{{"r9d", "r9w", "r9b"}, 39},
	{{"r10d", "r10w", "r10b"}, 40},
	{{"r11d", "r11w", "r11b"}, 41},
	{{"r12d", "r12w", "r12b"}, 42},
	{{"r13d", "r13w", "r13b"}, 43},
	{{"r14d", "r14w", "r14b"}, 44},
	{{"r15d", "r15w", "r15b"}, 45},
	{{"r16d", "r16w", "r16b"}, 165},
	{{"r17d", "r17w", "r17b"}, 166},
	{{"r18d", "r18w", "r18b"}, 167},
	{{"r19d", "r19w", "r19b"}, 168},
	{{"r20d", "r20w", "r20b"}, 169},
	{{"r21d", "r21w", "r21b"}, 170},
	{{"r22d", "r22w", "r22b"}, 171},
	{{"r23d", "r23w", "r23b"}, 172},
	{{"r24d", "r24w", "r24b"}, 173},
	{{"r25d", "r25w", "r25b"}, 174},
	{{"r26d", "r26w", "r26b"}, 175},
	{{"r27d", "r27w", "r27b"}, 176},
	{{"r28d", "r28w", "r28b"}, 177},
	{{"r29d", "r29w", "r29b"}, 178},
	{{"r30d", "r30w", "r30b"}, 179},
	{{"r31d", "r31w", "r31b"}, 180},
	};
	} // namespace targets
	} // namespace clang

	using namespace clang;
	using namespace clang::targets;

	bool X86TargetInfo::setFPMath(StringRef Name) {
	if (Name == "387") {
	FPMath = FP_387;
	return true;
	}
	if (Name == "sse") {
	FPMath = FP_SSE;
	return true;
	}
	return false;
	}

	bool X86TargetInfo::initFeatureMap(
	llvm::StringMap<bool> &Features, DiagnosticsEngine &Diags, StringRef CPU,
	const std::vector<std::string> &FeaturesVec) const {
	// FIXME: This really should not be here.
	// X86_64 always has SSE2.
	if (getTriple().getArch() == llvm::Triple::x86_64)
	setFeatureEnabled(Features, "sse2", true);

	using namespace llvm::X86;

	SmallVector<StringRef, 16> CPUFeatures;
	getFeaturesForCPU(CPU, CPUFeatures);
	for (auto &F : CPUFeatures)
	setFeatureEnabled(Features, F, true);

	std::vector<std::string> UpdatedFeaturesVec;
	std::vector<std::string> UpdatedAVX10FeaturesVec;
	enum { FE_NOSET = -1, FE_FALSE, FE_TRUE };
	int HasEVEX512 = FE_NOSET;
	bool HasAVX512F = Features.lookup("avx512f");
	bool HasAVX10 = Features.lookup("avx10.1-256");
	bool HasAVX10_512 = Features.lookup("avx10.1-512");
	std::string LastAVX10;
	std::string LastAVX512;
	for (const auto &Feature : FeaturesVec) {
	// Expand general-regs-only to -x86, -mmx and -sse
	if (Feature == "+general-regs-only") {
	UpdatedFeaturesVec.push_back("-x87");
	UpdatedFeaturesVec.push_back("-mmx");
	UpdatedFeaturesVec.push_back("-sse");
	continue;
	}

	if (Feature.substr(1, 6) == "avx10.") {
	if (Feature[0] == '+') {
	HasAVX10 = true;
	if (StringRef(Feature).ends_with("512"))
	HasAVX10_512 = true;
	LastAVX10 = Feature;
	} else if (HasAVX10 && Feature == "-avx10.1-256") {
	HasAVX10 = false;
	HasAVX10_512 = false;
	} else if (HasAVX10_512 && Feature == "-avx10.1-512") {
	HasAVX10_512 = false;
	}
	// Postpone AVX10 features handling after AVX512 settled.
	UpdatedAVX10FeaturesVec.push_back(Feature);
	continue;
	} else if (!HasAVX512F && StringRef(Feature).starts_with("+avx512")) {
	HasAVX512F = true;
	LastAVX512 = Feature;
	} else if (HasAVX512F && Feature == "-avx512f") {
	HasAVX512F = false;
	} else if (HasEVEX512 != FE_TRUE && Feature == "+evex512") {
	HasEVEX512 = FE_TRUE;
	continue;
	} else if (HasEVEX512 != FE_FALSE && Feature == "-evex512") {
	HasEVEX512 = FE_FALSE;
	continue;
	}

	UpdatedFeaturesVec.push_back(Feature);
	}
	llvm::append_range(UpdatedFeaturesVec, UpdatedAVX10FeaturesVec);
	// HasEVEX512 is a three-states flag. We need to turn it into [+-]evex512
	// according to other features.
	if (HasAVX512F) {
	UpdatedFeaturesVec.push_back(HasEVEX512 == FE_FALSE ? "-evex512"
	: "+evex512");
	if (HasAVX10 && !HasAVX10_512 && HasEVEX512 != FE_FALSE)
	Diags.Report(diag::warn_invalid_feature_combination)
	<< LastAVX512 + " " + LastAVX10 + "; will be promoted to avx10.1-512";
	} else if (HasAVX10) {
	if (HasEVEX512 != FE_NOSET)
	Diags.Report(diag::warn_invalid_feature_combination)
	<< LastAVX10 + (HasEVEX512 == FE_TRUE ? " +evex512" : " -evex512");
	UpdatedFeaturesVec.push_back(HasAVX10_512 ? "+evex512" : "-evex512");
	}

	if (!TargetInfo::initFeatureMap(Features, Diags, CPU, UpdatedFeaturesVec))
	return false;

	// Can't do this earlier because we need to be able to explicitly enable
	// or disable these features and the things that they depend upon.

	// Enable popcnt if sse4.2 is enabled and popcnt is not explicitly disabled.
	auto I = Features.find("sse4.2");
	if (I != Features.end() && I->getValue() &&
	!llvm::is_contained(UpdatedFeaturesVec, "-popcnt"))
	Features["popcnt"] = true;

	// Additionally, if SSE is enabled and mmx is not explicitly disabled,
	// then enable MMX.
	I = Features.find("sse");
	if (I != Features.end() && I->getValue() &&
	!llvm::is_contained(UpdatedFeaturesVec, "-mmx"))
	Features["mmx"] = true;

	// Enable xsave if avx is enabled and xsave is not explicitly disabled.
	I = Features.find("avx");
	if (I != Features.end() && I->getValue() &&
	!llvm::is_contained(UpdatedFeaturesVec, "-xsave"))
	Features["xsave"] = true;

	// Enable CRC32 if SSE4.2 is enabled and CRC32 is not explicitly disabled.
	I = Features.find("sse4.2");
	if (I != Features.end() && I->getValue() &&
	!llvm::is_contained(UpdatedFeaturesVec, "-crc32"))
	Features["crc32"] = true;

	return true;
	}

	void X86TargetInfo::setFeatureEnabled(llvm::StringMap<bool> &Features,
	StringRef Name, bool Enabled) const {
	if (Name == "sse4") {
	// We can get here via the __target__ attribute since that's not controlled
	// via the -msse4/-mno-sse4 command line alias. Handle this the same way
	// here - turn on the sse4.2 if enabled, turn off the sse4.1 level if
	// disabled.
	if (Enabled)
	Name = "sse4.2";
	else
	Name = "sse4.1";
	}

	Features[Name] = Enabled;
	llvm::X86::updateImpliedFeatures(Name, Enabled, Features);
	}

	/// handleTargetFeatures - Perform initialization based on the user
	/// configured set of features.
	bool X86TargetInfo::handleTargetFeatures(std::vector<std::string> &Features,
	DiagnosticsEngine &Diags) {
	for (const auto &Feature : Features) {
	if (Feature[0] != '+')
	continue;

	if (Feature == "+mmx") {
	HasMMX = true;
	} else if (Feature == "+aes") {
	HasAES = true;
	} else if (Feature == "+vaes") {
	HasVAES = true;
	} else if (Feature == "+pclmul") {
	HasPCLMUL = true;
	} else if (Feature == "+vpclmulqdq") {
	HasVPCLMULQDQ = true;
	} else if (Feature == "+lzcnt") {
	HasLZCNT = true;
	} else if (Feature == "+rdrnd") {
	HasRDRND = true;
	} else if (Feature == "+fsgsbase") {
	HasFSGSBASE = true;
	} else if (Feature == "+bmi") {
	HasBMI = true;
	} else if (Feature == "+bmi2") {
	HasBMI2 = true;
	} else if (Feature == "+popcnt") {
	HasPOPCNT = true;
	} else if (Feature == "+rtm") {
	HasRTM = true;
	} else if (Feature == "+prfchw") {
	HasPRFCHW = true;
	} else if (Feature == "+rdseed") {
	HasRDSEED = true;
	} else if (Feature == "+adx") {
	HasADX = true;
	} else if (Feature == "+tbm") {
	HasTBM = true;
	} else if (Feature == "+lwp") {
	HasLWP = true;
	} else if (Feature == "+fma") {
	HasFMA = true;
	} else if (Feature == "+f16c") {
	HasF16C = true;
	} else if (Feature == "+gfni") {
	HasGFNI = true;
	} else if (Feature == "+evex512") {
	HasEVEX512 = true;
	} else if (Feature == "+avx10.1-256") {
	HasAVX10_1 = true;
	} else if (Feature == "+avx10.1-512") {
	HasAVX10_1_512 = true;
	} else if (Feature == "+avx512cd") {
	HasAVX512CD = true;
	} else if (Feature == "+avx512vpopcntdq") {
	HasAVX512VPOPCNTDQ = true;
	} else if (Feature == "+avx512vnni") {
	HasAVX512VNNI = true;
	} else if (Feature == "+avx512bf16") {
	HasAVX512BF16 = true;
	} else if (Feature == "+avx512fp16") {
	HasAVX512FP16 = true;
	HasLegalHalfType = true;
	} else if (Feature == "+avx512dq") {
	HasAVX512DQ = true;
	} else if (Feature == "+avx512bitalg") {
	HasAVX512BITALG = true;
	} else if (Feature == "+avx512bw") {
	HasAVX512BW = true;
	} else if (Feature == "+avx512vl") {
	HasAVX512VL = true;
	} else if (Feature == "+avx512vbmi") {
	HasAVX512VBMI = true;
	} else if (Feature == "+avx512vbmi2") {
	HasAVX512VBMI2 = true;
	} else if (Feature == "+avx512ifma") {
	HasAVX512IFMA = true;
	} else if (Feature == "+avx512vp2intersect") {
	HasAVX512VP2INTERSECT = true;
	} else if (Feature == "+sha") {
	HasSHA = true;
	} else if (Feature == "+sha512") {
	HasSHA512 = true;
	} else if (Feature == "+shstk") {
	HasSHSTK = true;
	} else if (Feature == "+sm3") {
	HasSM3 = true;
	} else if (Feature == "+sm4") {
	HasSM4 = true;
	} else if (Feature == "+movbe") {
	HasMOVBE = true;
	} else if (Feature == "+sgx") {
	HasSGX = true;
	} else if (Feature == "+cx8") {
	HasCX8 = true;
	} else if (Feature == "+cx16") {
	HasCX16 = true;
	} else if (Feature == "+fxsr") {
	HasFXSR = true;
	} else if (Feature == "+xsave") {
	HasXSAVE = true;
	} else if (Feature == "+xsaveopt") {
	HasXSAVEOPT = true;
	} else if (Feature == "+xsavec") {
	HasXSAVEC = true;
	} else if (Feature == "+xsaves") {
	HasXSAVES = true;
	} else if (Feature == "+mwaitx") {
	HasMWAITX = true;
	} else if (Feature == "+pku") {
	HasPKU = true;
	} else if (Feature == "+clflushopt") {
	HasCLFLUSHOPT = true;
	} else if (Feature == "+clwb") {
	HasCLWB = true;
	} else if (Feature == "+wbnoinvd") {
	HasWBNOINVD = true;
	} else if (Feature == "+prefetchi") {
	HasPREFETCHI = true;
	} else if (Feature == "+clzero") {
	HasCLZERO = true;
	} else if (Feature == "+cldemote") {
	HasCLDEMOTE = true;
	} else if (Feature == "+rdpid") {
	HasRDPID = true;
	} else if (Feature == "+rdpru") {
	HasRDPRU = true;
	} else if (Feature == "+kl") {
	HasKL = true;
	} else if (Feature == "+widekl") {
	HasWIDEKL = true;
	} else if (Feature == "+retpoline-external-thunk") {
	HasRetpolineExternalThunk = true;
	} else if (Feature == "+sahf") {
	HasLAHFSAHF = true;
	} else if (Feature == "+waitpkg") {
	HasWAITPKG = true;
	} else if (Feature == "+movdiri") {
	HasMOVDIRI = true;
	} else if (Feature == "+movdir64b") {
	HasMOVDIR64B = true;
	} else if (Feature == "+pconfig") {
	HasPCONFIG = true;
	} else if (Feature == "+ptwrite") {
	HasPTWRITE = true;
	} else if (Feature == "+invpcid") {
	HasINVPCID = true;
	} else if (Feature == "+enqcmd") {
	HasENQCMD = true;
	} else if (Feature == "+hreset") {
	HasHRESET = true;
	} else if (Feature == "+amx-bf16") {
	HasAMXBF16 = true;
	} else if (Feature == "+amx-fp16") {
	HasAMXFP16 = true;
	} else if (Feature == "+amx-int8") {
	HasAMXINT8 = true;
	} else if (Feature == "+amx-tile") {
	HasAMXTILE = true;
	} else if (Feature == "+amx-complex") {
	HasAMXCOMPLEX = true;
	} else if (Feature == "+cmpccxadd") {
	HasCMPCCXADD = true;
	} else if (Feature == "+raoint") {
	HasRAOINT = true;
	} else if (Feature == "+avxifma") {
	HasAVXIFMA = true;
	} else if (Feature == "+avxneconvert") {
	HasAVXNECONVERT= true;
	} else if (Feature == "+avxvnni") {
	HasAVXVNNI = true;
	} else if (Feature == "+avxvnniint16") {
	HasAVXVNNIINT16 = true;
	} else if (Feature == "+avxvnniint8") {
	HasAVXVNNIINT8 = true;
	} else if (Feature == "+serialize") {
	HasSERIALIZE = true;
	} else if (Feature == "+tsxldtrk") {
	HasTSXLDTRK = true;
	} else if (Feature == "+uintr") {
	HasUINTR = true;
	} else if (Feature == "+usermsr") {
	HasUSERMSR = true;
	} else if (Feature == "+crc32") {
	HasCRC32 = true;
	} else if (Feature == "+x87") {
	HasX87 = true;
	} else if (Feature == "+fullbf16") {
	HasFullBFloat16 = true;
	} else if (Feature == "+egpr") {
	HasEGPR = true;
	} else if (Feature == "+inline-asm-use-gpr32") {
	HasInlineAsmUseGPR32 = true;
	} else if (Feature == "+push2pop2") {
	HasPush2Pop2 = true;
	} else if (Feature == "+ppx") {
	HasPPX = true;
	} else if (Feature == "+ndd") {
	HasNDD = true;
	} else if (Feature == "+ccmp") {
	HasCCMP = true;
	} else if (Feature == "+nf") {
	HasNF = true;
	} else if (Feature == "+cf") {
	HasCF = true;
	} else if (Feature == "+zu") {
	HasZU = true;
	} else if (Feature == "+branch-hint") {
	HasBranchHint = true;
	}

	X86SSEEnum Level = llvm::StringSwitch<X86SSEEnum>(Feature)
	.Case("+avx512f", AVX512F)
	.Case("+avx2", AVX2)
	.Case("+avx", AVX)
	.Case("+sse4.2", SSE42)
	.Case("+sse4.1", SSE41)
	.Case("+ssse3", SSSE3)
	.Case("+sse3", SSE3)
	.Case("+sse2", SSE2)
	.Case("+sse", SSE1)
	.Default(NoSSE);
	SSELevel = std::max(SSELevel, Level);

	HasFloat16 = SSELevel >= SSE2;

	// X86 target has bfloat16 emulation support in the backend, where
	// bfloat16 is treated as a 32-bit float, arithmetic operations are
	// performed in 32-bit, and the result is converted back to bfloat16.
	// Truncation and extension between bfloat16 and 32-bit float are supported
	// by the compiler-rt library. However, native bfloat16 support is currently
	// not available in the X86 target. Hence, HasFullBFloat16 will be false
	// until native bfloat16 support is available. HasFullBFloat16 is used to
	// determine whether to automatically use excess floating point precision
	// for bfloat16 arithmetic operations in the front-end.
	HasBFloat16 = SSELevel >= SSE2;

	XOPEnum XLevel = llvm::StringSwitch<XOPEnum>(Feature)
	.Case("+xop", XOP)
	.Case("+fma4", FMA4)
	.Case("+sse4a", SSE4A)
	.Default(NoXOP);
	XOPLevel = std::max(XOPLevel, XLevel);
	}

	// LLVM doesn't have a separate switch for fpmath, so only accept it if it
	// matches the selected sse level.
	if ((FPMath == FP_SSE && SSELevel < SSE1) \|\|
	(FPMath == FP_387 && SSELevel >= SSE1)) {
	Diags.Report(diag::err_target_unsupported_fpmath)
	<< (FPMath == FP_SSE ? "sse" : "387");
	return false;
	}

	// FIXME: We should allow long double type on 32-bits to match with GCC.
	// This requires backend to be able to lower f80 without x87 first.
	if (!HasX87 && LongDoubleFormat == &llvm::APFloat::x87DoubleExtended())
	HasLongDouble = false;

	return true;
	}

	/// X86TargetInfo::getTargetDefines - Return the set of the X86-specific macro
	/// definitions for this particular subtarget.
	void X86TargetInfo::getTargetDefines(const LangOptions &Opts,
	MacroBuilder &Builder) const {
	// Inline assembly supports X86 flag outputs.
	Builder.defineMacro("__GCC_ASM_FLAG_OUTPUTS__");

	std::string CodeModel = getTargetOpts().CodeModel;
	if (CodeModel == "default")
	CodeModel = "small";
	Builder.defineMacro("__code_model_" + CodeModel + "__");

	// Target identification.
	if (getTriple().getArch() == llvm::Triple::x86_64) {
	Builder.defineMacro("__amd64__");
	Builder.defineMacro("__amd64");
	Builder.defineMacro("__x86_64");
	Builder.defineMacro("__x86_64__");
	if (getTriple().getArchName() == "x86_64h") {
	Builder.defineMacro("__x86_64h");
	Builder.defineMacro("__x86_64h__");
	}
	} else {
	DefineStd(Builder, "i386", Opts);
	}

	Builder.defineMacro("__SEG_GS");
	Builder.defineMacro("__SEG_FS");
	Builder.defineMacro("__seg_gs", "__attribute__((address_space(256)))");
	Builder.defineMacro("__seg_fs", "__attribute__((address_space(257)))");

	// Subtarget options.
	// FIXME: We are hard-coding the tune parameters based on the CPU, but they
	// truly should be based on -mtune options.
	using namespace llvm::X86;
	switch (CPU) {
	case CK_None:
	break;
	case CK_i386:
	// The rest are coming from the i386 define above.
	Builder.defineMacro("__tune_i386__");
	break;
	case CK_i486:
	case CK_WinChipC6:
	case CK_WinChip2:
	case CK_C3:
	defineCPUMacros(Builder, "i486");
	break;
	case CK_PentiumMMX:
	Builder.defineMacro("__pentium_mmx__");
	Builder.defineMacro("__tune_pentium_mmx__");
	[[fallthrough]];
	case CK_i586:
	case CK_Pentium:
	defineCPUMacros(Builder, "i586");
	defineCPUMacros(Builder, "pentium");
	break;
	case CK_Pentium3:
	case CK_PentiumM:
	Builder.defineMacro("__tune_pentium3__");
	[[fallthrough]];
	case CK_Pentium2:
	case CK_C3_2:
	Builder.defineMacro("__tune_pentium2__");
	[[fallthrough]];
	case CK_PentiumPro:
	case CK_i686:
	defineCPUMacros(Builder, "i686");
	defineCPUMacros(Builder, "pentiumpro");
	break;
	case CK_Pentium4:
	defineCPUMacros(Builder, "pentium4");
	break;
	case CK_Yonah:
	case CK_Prescott:
	case CK_Nocona:
	defineCPUMacros(Builder, "nocona");
	break;
	case CK_Core2:
	case CK_Penryn:
	defineCPUMacros(Builder, "core2");
	break;
	case CK_Bonnell:
	defineCPUMacros(Builder, "atom");
	break;
	case CK_Silvermont:
	defineCPUMacros(Builder, "slm");
	break;
	case CK_Goldmont:
	defineCPUMacros(Builder, "goldmont");
	break;
	case CK_GoldmontPlus:
	defineCPUMacros(Builder, "goldmont_plus");
	break;
	case CK_Tremont:
	defineCPUMacros(Builder, "tremont");
	break;
	// Gracemont and later atom-cores use P-core cpu macros.
	case CK_Gracemont:
	case CK_Nehalem:
	case CK_Westmere:
	case CK_SandyBridge:
	case CK_IvyBridge:
	case CK_Haswell:
	case CK_Broadwell:
	case CK_SkylakeClient:
	case CK_SkylakeServer:
	case CK_Cascadelake:
	case CK_Cooperlake:
	case CK_Cannonlake:
	case CK_IcelakeClient:
	case CK_Rocketlake:
	case CK_IcelakeServer:
	case CK_Tigerlake:
	case CK_SapphireRapids:
	case CK_Alderlake:
	case CK_Raptorlake:
	case CK_Meteorlake:
	case CK_Arrowlake:
	case CK_ArrowlakeS:
	case CK_Lunarlake:
	case CK_Pantherlake:
	case CK_Sierraforest:
	case CK_Grandridge:
	case CK_Graniterapids:
	case CK_GraniterapidsD:
	case CK_Emeraldrapids:
	case CK_Clearwaterforest:
	// FIXME: Historically, we defined this legacy name, it would be nice to
	// remove it at some point. We've never exposed fine-grained names for
	// recent primary x86 CPUs, and we should keep it that way.
	defineCPUMacros(Builder, "corei7");
	break;
	case CK_KNL:
	defineCPUMacros(Builder, "knl");
	break;
	case CK_KNM:
	break;
	case CK_Lakemont:
	defineCPUMacros(Builder, "i586", /Tuning/false);
	defineCPUMacros(Builder, "pentium", /Tuning/false);
	Builder.defineMacro("__tune_lakemont__");
	break;
	case CK_K6_2:
	Builder.defineMacro("__k6_2__");
	Builder.defineMacro("__tune_k6_2__");
	[[fallthrough]];
	case CK_K6_3:
	if (CPU != CK_K6_2) { // In case of fallthrough
	// FIXME: GCC may be enabling these in cases where some other k6
	// architecture is specified but -m3dnow is explicitly provided. The
	// exact semantics need to be determined and emulated here.
	Builder.defineMacro("__k6_3__");
	Builder.defineMacro("__tune_k6_3__");
	}
	[[fallthrough]];
	case CK_K6:
	defineCPUMacros(Builder, "k6");
	break;
	case CK_Athlon:
	case CK_AthlonXP:
	defineCPUMacros(Builder, "athlon");
	if (SSELevel != NoSSE) {
	Builder.defineMacro("__athlon_sse__");
	Builder.defineMacro("__tune_athlon_sse__");
	}
	break;
	case CK_K8:
	case CK_K8SSE3:
	case CK_x86_64:
	defineCPUMacros(Builder, "k8");
	break;
	case CK_x86_64_v2:
	case CK_x86_64_v3:
	case CK_x86_64_v4:
	break;
	case CK_AMDFAM10:
	defineCPUMacros(Builder, "amdfam10");
	break;
	case CK_BTVER1:
	defineCPUMacros(Builder, "btver1");
	break;
	case CK_BTVER2:
	defineCPUMacros(Builder, "btver2");
	break;
	case CK_BDVER1:
	defineCPUMacros(Builder, "bdver1");
	break;
	case CK_BDVER2:
	defineCPUMacros(Builder, "bdver2");
	break;
	case CK_BDVER3:
	defineCPUMacros(Builder, "bdver3");
	break;
	case CK_BDVER4:
	defineCPUMacros(Builder, "bdver4");
	break;
	case CK_ZNVER1:
	defineCPUMacros(Builder, "znver1");
	break;
	case CK_ZNVER2:
	defineCPUMacros(Builder, "znver2");
	break;
	case CK_ZNVER3:
	defineCPUMacros(Builder, "znver3");
	break;
	case CK_ZNVER4:
	defineCPUMacros(Builder, "znver4");
	break;
	+ case CK_ZNVER5:
	+ defineCPUMacros(Builder, "znver5");
	+ break;
	case CK_Geode:
	defineCPUMacros(Builder, "geode");
	break;
	}

	// Target properties.
	Builder.defineMacro("__REGISTER_PREFIX__", "");

	// Define __NO_MATH_INLINES on linux/x86 so that we don't get inline
	// functions in glibc header files that use FP Stack inline asm which the
	// backend can't deal with (PR879).
	Builder.defineMacro("__NO_MATH_INLINES");

	if (HasAES)
	Builder.defineMacro("__AES__");

	if (HasVAES)
	Builder.defineMacro("__VAES__");

	if (HasPCLMUL)
	Builder.defineMacro("__PCLMUL__");

	if (HasVPCLMULQDQ)
	Builder.defineMacro("__VPCLMULQDQ__");

	// Note, in 32-bit mode, GCC does not define the macro if -mno-sahf. In LLVM,
	// the feature flag only applies to 64-bit mode.
	if (HasLAHFSAHF \|\| getTriple().getArch() == llvm::Triple::x86)
	Builder.defineMacro("__LAHF_SAHF__");

	if (HasLZCNT)
	Builder.defineMacro("__LZCNT__");

	if (HasRDRND)
	Builder.defineMacro("__RDRND__");

	if (HasFSGSBASE)
	Builder.defineMacro("__FSGSBASE__");

	if (HasBMI)
	Builder.defineMacro("__BMI__");

	if (HasBMI2)
	Builder.defineMacro("__BMI2__");

	if (HasPOPCNT)
	Builder.defineMacro("__POPCNT__");

	if (HasRTM)
	Builder.defineMacro("__RTM__");

	if (HasPRFCHW)
	Builder.defineMacro("__PRFCHW__");

	if (HasRDSEED)
	Builder.defineMacro("__RDSEED__");

	if (HasADX)
	Builder.defineMacro("__ADX__");

	if (HasTBM)
	Builder.defineMacro("__TBM__");

	if (HasLWP)
	Builder.defineMacro("__LWP__");

	if (HasMWAITX)
	Builder.defineMacro("__MWAITX__");

	if (HasMOVBE)
	Builder.defineMacro("__MOVBE__");

	switch (XOPLevel) {
	case XOP:
	Builder.defineMacro("__XOP__");
	[[fallthrough]];
	case FMA4:
	Builder.defineMacro("__FMA4__");
	[[fallthrough]];
	case SSE4A:
	Builder.defineMacro("__SSE4A__");
	[[fallthrough]];
	case NoXOP:
	break;
	}

	if (HasFMA)
	Builder.defineMacro("__FMA__");

	if (HasF16C)
	Builder.defineMacro("__F16C__");

	if (HasGFNI)
	Builder.defineMacro("__GFNI__");

	if (HasEVEX512)
	Builder.defineMacro("__EVEX512__");
	if (HasAVX10_1)
	Builder.defineMacro("__AVX10_1__");
	if (HasAVX10_1_512)
	Builder.defineMacro("__AVX10_1_512__");
	if (HasAVX512CD)
	Builder.defineMacro("__AVX512CD__");
	if (HasAVX512VPOPCNTDQ)
	Builder.defineMacro("__AVX512VPOPCNTDQ__");
	if (HasAVX512VNNI)
	Builder.defineMacro("__AVX512VNNI__");
	if (HasAVX512BF16)
	Builder.defineMacro("__AVX512BF16__");
	if (HasAVX512FP16)
	Builder.defineMacro("__AVX512FP16__");
	if (HasAVX512DQ)
	Builder.defineMacro("__AVX512DQ__");
	if (HasAVX512BITALG)
	Builder.defineMacro("__AVX512BITALG__");
	if (HasAVX512BW)
	Builder.defineMacro("__AVX512BW__");
	if (HasAVX512VL) {
	Builder.defineMacro("__AVX512VL__");
	Builder.defineMacro("__EVEX256__");
	}
	if (HasAVX512VBMI)
	Builder.defineMacro("__AVX512VBMI__");
	if (HasAVX512VBMI2)
	Builder.defineMacro("__AVX512VBMI2__");
	if (HasAVX512IFMA)
	Builder.defineMacro("__AVX512IFMA__");
	if (HasAVX512VP2INTERSECT)
	Builder.defineMacro("__AVX512VP2INTERSECT__");
	if (HasSHA)
	Builder.defineMacro("__SHA__");
	if (HasSHA512)
	Builder.defineMacro("__SHA512__");

	if (HasFXSR)
	Builder.defineMacro("__FXSR__");
	if (HasXSAVE)
	Builder.defineMacro("__XSAVE__");
	if (HasXSAVEOPT)
	Builder.defineMacro("__XSAVEOPT__");
	if (HasXSAVEC)
	Builder.defineMacro("__XSAVEC__");
	if (HasXSAVES)
	Builder.defineMacro("__XSAVES__");
	if (HasPKU)
	Builder.defineMacro("__PKU__");
	if (HasCLFLUSHOPT)
	Builder.defineMacro("__CLFLUSHOPT__");
	if (HasCLWB)
	Builder.defineMacro("__CLWB__");
	if (HasWBNOINVD)
	Builder.defineMacro("__WBNOINVD__");
	if (HasSHSTK)
	Builder.defineMacro("__SHSTK__");
	if (HasSGX)
	Builder.defineMacro("__SGX__");
	if (HasSM3)
	Builder.defineMacro("__SM3__");
	if (HasSM4)
	Builder.defineMacro("__SM4__");
	if (HasPREFETCHI)
	Builder.defineMacro("__PREFETCHI__");
	if (HasCLZERO)
	Builder.defineMacro("__CLZERO__");
	if (HasKL)
	Builder.defineMacro("__KL__");
	if (HasWIDEKL)
	Builder.defineMacro("__WIDEKL__");
	if (HasRDPID)
	Builder.defineMacro("__RDPID__");
	if (HasRDPRU)
	Builder.defineMacro("__RDPRU__");
	if (HasCLDEMOTE)
	Builder.defineMacro("__CLDEMOTE__");
	if (HasWAITPKG)
	Builder.defineMacro("__WAITPKG__");
	if (HasMOVDIRI)
	Builder.defineMacro("__MOVDIRI__");
	if (HasMOVDIR64B)
	Builder.defineMacro("__MOVDIR64B__");
	if (HasPCONFIG)
	Builder.defineMacro("__PCONFIG__");
	if (HasPTWRITE)
	Builder.defineMacro("__PTWRITE__");
	if (HasINVPCID)
	Builder.defineMacro("__INVPCID__");
	if (HasENQCMD)
	Builder.defineMacro("__ENQCMD__");
	if (HasHRESET)
	Builder.defineMacro("__HRESET__");
	if (HasAMXTILE)
	Builder.defineMacro("__AMX_TILE__");
	if (HasAMXINT8)
	Builder.defineMacro("__AMX_INT8__");
	if (HasAMXBF16)
	Builder.defineMacro("__AMX_BF16__");
	if (HasAMXFP16)
	Builder.defineMacro("__AMX_FP16__");
	if (HasAMXCOMPLEX)
	Builder.defineMacro("__AMX_COMPLEX__");
	if (HasCMPCCXADD)
	Builder.defineMacro("__CMPCCXADD__");
	if (HasRAOINT)
	Builder.defineMacro("__RAOINT__");
	if (HasAVXIFMA)
	Builder.defineMacro("__AVXIFMA__");
	if (HasAVXNECONVERT)
	Builder.defineMacro("__AVXNECONVERT__");
	if (HasAVXVNNI)
	Builder.defineMacro("__AVXVNNI__");
	if (HasAVXVNNIINT16)
	Builder.defineMacro("__AVXVNNIINT16__");
	if (HasAVXVNNIINT8)
	Builder.defineMacro("__AVXVNNIINT8__");
	if (HasSERIALIZE)
	Builder.defineMacro("__SERIALIZE__");
	if (HasTSXLDTRK)
	Builder.defineMacro("__TSXLDTRK__");
	if (HasUINTR)
	Builder.defineMacro("__UINTR__");
	if (HasUSERMSR)
	Builder.defineMacro("__USERMSR__");
	if (HasCRC32)
	Builder.defineMacro("__CRC32__");
	if (HasEGPR)
	Builder.defineMacro("__EGPR__");
	if (HasPush2Pop2)
	Builder.defineMacro("__PUSH2POP2__");
	if (HasPPX)
	Builder.defineMacro("__PPX__");
	if (HasNDD)
	Builder.defineMacro("__NDD__");
	if (HasCCMP)
	Builder.defineMacro("__CCMP__");
	if (HasNF)
	Builder.defineMacro("__NF__");
	if (HasCF)
	Builder.defineMacro("__CF__");
	if (HasZU)
	Builder.defineMacro("__ZU__");
	if (HasEGPR && HasPush2Pop2 && HasPPX && HasNDD && HasCCMP && HasNF &&
	HasCF && HasZU)
	Builder.defineMacro("__APX_F__");
	if (HasEGPR && HasInlineAsmUseGPR32)
	Builder.defineMacro("__APX_INLINE_ASM_USE_GPR32__");

	// Each case falls through to the previous one here.
	switch (SSELevel) {
	case AVX512F:
	Builder.defineMacro("__AVX512F__");
	[[fallthrough]];
	case AVX2:
	Builder.defineMacro("__AVX2__");
	[[fallthrough]];
	case AVX:
	Builder.defineMacro("__AVX__");
	[[fallthrough]];
	case SSE42:
	Builder.defineMacro("__SSE4_2__");
	[[fallthrough]];
	case SSE41:
	Builder.defineMacro("__SSE4_1__");
	[[fallthrough]];
	case SSSE3:
	Builder.defineMacro("__SSSE3__");
	[[fallthrough]];
	case SSE3:
	Builder.defineMacro("__SSE3__");
	[[fallthrough]];
	case SSE2:
	Builder.defineMacro("__SSE2__");
	Builder.defineMacro("__SSE2_MATH__"); // -mfp-math=sse always implied.
	[[fallthrough]];
	case SSE1:
	Builder.defineMacro("__SSE__");
	Builder.defineMacro("__SSE_MATH__"); // -mfp-math=sse always implied.
	[[fallthrough]];
	case NoSSE:
	break;
	}

	if (Opts.MicrosoftExt && getTriple().getArch() == llvm::Triple::x86) {
	switch (SSELevel) {
	case AVX512F:
	case AVX2:
	case AVX:
	case SSE42:
	case SSE41:
	case SSSE3:
	case SSE3:
	case SSE2:
	Builder.defineMacro("_M_IX86_FP", Twine(2));
	break;
	case SSE1:
	Builder.defineMacro("_M_IX86_FP", Twine(1));
	break;
	default:
	Builder.defineMacro("_M_IX86_FP", Twine(0));
	break;
	}
	}

	// Each case falls through to the previous one here.
	if (HasMMX) {
	Builder.defineMacro("__MMX__");
	}

	if (CPU >= CK_i486 \|\| CPU == CK_None) {
	Builder.defineMacro("__GCC_HAVE_SYNC_COMPARE_AND_SWAP_1");
	Builder.defineMacro("__GCC_HAVE_SYNC_COMPARE_AND_SWAP_2");
	Builder.defineMacro("__GCC_HAVE_SYNC_COMPARE_AND_SWAP_4");
	}
	if (HasCX8)
	Builder.defineMacro("__GCC_HAVE_SYNC_COMPARE_AND_SWAP_8");
	if (HasCX16 && getTriple().getArch() == llvm::Triple::x86_64)
	Builder.defineMacro("__GCC_HAVE_SYNC_COMPARE_AND_SWAP_16");

	if (HasFloat128)
	Builder.defineMacro("__SIZEOF_FLOAT128__", "16");
	}

	bool X86TargetInfo::isValidFeatureName(StringRef Name) const {
	return llvm::StringSwitch<bool>(Name)
	.Case("adx", true)
	.Case("aes", true)
	.Case("amx-bf16", true)
	.Case("amx-complex", true)
	.Case("amx-fp16", true)
	.Case("amx-int8", true)
	.Case("amx-tile", true)
	.Case("avx", true)
	.Case("avx10.1-256", true)
	.Case("avx10.1-512", true)
	.Case("avx2", true)
	.Case("avx512f", true)
	.Case("avx512cd", true)
	.Case("avx512vpopcntdq", true)
	.Case("avx512vnni", true)
	.Case("avx512bf16", true)
	.Case("avx512fp16", true)
	.Case("avx512dq", true)
	.Case("avx512bitalg", true)
	.Case("avx512bw", true)
	.Case("avx512vl", true)
	.Case("avx512vbmi", true)
	.Case("avx512vbmi2", true)
	.Case("avx512ifma", true)
	.Case("avx512vp2intersect", true)
	.Case("avxifma", true)
	.Case("avxneconvert", true)
	.Case("avxvnni", true)
	.Case("avxvnniint16", true)
	.Case("avxvnniint8", true)
	.Case("bmi", true)
	.Case("bmi2", true)
	.Case("cldemote", true)
	.Case("clflushopt", true)
	.Case("clwb", true)
	.Case("clzero", true)
	.Case("cmpccxadd", true)
	.Case("crc32", true)
	.Case("cx16", true)
	.Case("enqcmd", true)
	.Case("evex512", true)
	.Case("f16c", true)
	.Case("fma", true)
	.Case("fma4", true)
	.Case("fsgsbase", true)
	.Case("fxsr", true)
	.Case("general-regs-only", true)
	.Case("gfni", true)
	.Case("hreset", true)
	.Case("invpcid", true)
	.Case("kl", true)
	.Case("widekl", true)
	.Case("lwp", true)
	.Case("lzcnt", true)
	.Case("mmx", true)
	.Case("movbe", true)
	.Case("movdiri", true)
	.Case("movdir64b", true)
	.Case("mwaitx", true)
	.Case("pclmul", true)
	.Case("pconfig", true)
	.Case("pku", true)
	.Case("popcnt", true)
	.Case("prefetchi", true)
	.Case("prfchw", true)
	.Case("ptwrite", true)
	.Case("raoint", true)
	.Case("rdpid", true)
	.Case("rdpru", true)
	.Case("rdrnd", true)
	.Case("rdseed", true)
	.Case("rtm", true)
	.Case("sahf", true)
	.Case("serialize", true)
	.Case("sgx", true)
	.Case("sha", true)
	.Case("sha512", true)
	.Case("shstk", true)
	.Case("sm3", true)
	.Case("sm4", true)
	.Case("sse", true)
	.Case("sse2", true)
	.Case("sse3", true)
	.Case("ssse3", true)
	.Case("sse4", true)
	.Case("sse4.1", true)
	.Case("sse4.2", true)
	.Case("sse4a", true)
	.Case("tbm", true)
	.Case("tsxldtrk", true)
	.Case("uintr", true)
	.Case("usermsr", true)
	.Case("vaes", true)
	.Case("vpclmulqdq", true)
	.Case("wbnoinvd", true)
	.Case("waitpkg", true)
	.Case("x87", true)
	.Case("xop", true)
	.Case("xsave", true)
	.Case("xsavec", true)
	.Case("xsaves", true)
	.Case("xsaveopt", true)
	.Case("egpr", true)
	.Case("push2pop2", true)
	.Case("ppx", true)
	.Case("ndd", true)
	.Case("ccmp", true)
	.Case("nf", true)
	.Case("cf", true)
	.Case("zu", true)
	.Default(false);
	}

	bool X86TargetInfo::hasFeature(StringRef Feature) const {
	return llvm::StringSwitch<bool>(Feature)
	.Case("adx", HasADX)
	.Case("aes", HasAES)
	.Case("amx-bf16", HasAMXBF16)
	.Case("amx-complex", HasAMXCOMPLEX)
	.Case("amx-fp16", HasAMXFP16)
	.Case("amx-int8", HasAMXINT8)
	.Case("amx-tile", HasAMXTILE)
	.Case("avx", SSELevel >= AVX)
	.Case("avx10.1-256", HasAVX10_1)
	.Case("avx10.1-512", HasAVX10_1_512)
	.Case("avx2", SSELevel >= AVX2)
	.Case("avx512f", SSELevel >= AVX512F)
	.Case("avx512cd", HasAVX512CD)
	.Case("avx512vpopcntdq", HasAVX512VPOPCNTDQ)
	.Case("avx512vnni", HasAVX512VNNI)
	.Case("avx512bf16", HasAVX512BF16)
	.Case("avx512fp16", HasAVX512FP16)
	.Case("avx512dq", HasAVX512DQ)
	.Case("avx512bitalg", HasAVX512BITALG)
	.Case("avx512bw", HasAVX512BW)
	.Case("avx512vl", HasAVX512VL)
	.Case("avx512vbmi", HasAVX512VBMI)
	.Case("avx512vbmi2", HasAVX512VBMI2)
	.Case("avx512ifma", HasAVX512IFMA)
	.Case("avx512vp2intersect", HasAVX512VP2INTERSECT)
	.Case("avxifma", HasAVXIFMA)
	.Case("avxneconvert", HasAVXNECONVERT)
	.Case("avxvnni", HasAVXVNNI)
	.Case("avxvnniint16", HasAVXVNNIINT16)
	.Case("avxvnniint8", HasAVXVNNIINT8)
	.Case("bmi", HasBMI)
	.Case("bmi2", HasBMI2)
	.Case("cldemote", HasCLDEMOTE)
	.Case("clflushopt", HasCLFLUSHOPT)
	.Case("clwb", HasCLWB)
	.Case("clzero", HasCLZERO)
	.Case("cmpccxadd", HasCMPCCXADD)
	.Case("crc32", HasCRC32)
	.Case("cx8", HasCX8)
	.Case("cx16", HasCX16)
	.Case("enqcmd", HasENQCMD)
	.Case("evex512", HasEVEX512)
	.Case("f16c", HasF16C)
	.Case("fma", HasFMA)
	.Case("fma4", XOPLevel >= FMA4)
	.Case("fsgsbase", HasFSGSBASE)
	.Case("fxsr", HasFXSR)
	.Case("gfni", HasGFNI)
	.Case("hreset", HasHRESET)
	.Case("invpcid", HasINVPCID)
	.Case("kl", HasKL)
	.Case("widekl", HasWIDEKL)
	.Case("lwp", HasLWP)
	.Case("lzcnt", HasLZCNT)
	.Case("mmx", HasMMX)
	.Case("movbe", HasMOVBE)
	.Case("movdiri", HasMOVDIRI)
	.Case("movdir64b", HasMOVDIR64B)
	.Case("mwaitx", HasMWAITX)
	.Case("pclmul", HasPCLMUL)
	.Case("pconfig", HasPCONFIG)
	.Case("pku", HasPKU)
	.Case("popcnt", HasPOPCNT)
	.Case("prefetchi", HasPREFETCHI)
	.Case("prfchw", HasPRFCHW)
	.Case("ptwrite", HasPTWRITE)
	.Case("raoint", HasRAOINT)
	.Case("rdpid", HasRDPID)
	.Case("rdpru", HasRDPRU)
	.Case("rdrnd", HasRDRND)
	.Case("rdseed", HasRDSEED)
	.Case("retpoline-external-thunk", HasRetpolineExternalThunk)
	.Case("rtm", HasRTM)
	.Case("sahf", HasLAHFSAHF)
	.Case("serialize", HasSERIALIZE)
	.Case("sgx", HasSGX)
	.Case("sha", HasSHA)
	.Case("sha512", HasSHA512)
	.Case("shstk", HasSHSTK)
	.Case("sm3", HasSM3)
	.Case("sm4", HasSM4)
	.Case("sse", SSELevel >= SSE1)
	.Case("sse2", SSELevel >= SSE2)
	.Case("sse3", SSELevel >= SSE3)
	.Case("ssse3", SSELevel >= SSSE3)
	.Case("sse4.1", SSELevel >= SSE41)
	.Case("sse4.2", SSELevel >= SSE42)
	.Case("sse4a", XOPLevel >= SSE4A)
	.Case("tbm", HasTBM)
	.Case("tsxldtrk", HasTSXLDTRK)
	.Case("uintr", HasUINTR)
	.Case("usermsr", HasUSERMSR)
	.Case("vaes", HasVAES)
	.Case("vpclmulqdq", HasVPCLMULQDQ)
	.Case("wbnoinvd", HasWBNOINVD)
	.Case("waitpkg", HasWAITPKG)
	.Case("x86", true)
	.Case("x86_32", getTriple().getArch() == llvm::Triple::x86)
	.Case("x86_64", getTriple().getArch() == llvm::Triple::x86_64)
	.Case("x87", HasX87)
	.Case("xop", XOPLevel >= XOP)
	.Case("xsave", HasXSAVE)
	.Case("xsavec", HasXSAVEC)
	.Case("xsaves", HasXSAVES)
	.Case("xsaveopt", HasXSAVEOPT)
	.Case("fullbf16", HasFullBFloat16)
	.Case("egpr", HasEGPR)
	.Case("push2pop2", HasPush2Pop2)
	.Case("ppx", HasPPX)
	.Case("ndd", HasNDD)
	.Case("ccmp", HasCCMP)
	.Case("nf", HasNF)
	.Case("cf", HasCF)
	.Case("zu", HasZU)
	.Case("branch-hint", HasBranchHint)
	.Default(false);
	}

	// We can't use a generic validation scheme for the features accepted here
	// versus subtarget features accepted in the target attribute because the
	// bitfield structure that's initialized in the runtime only supports the
	// below currently rather than the full range of subtarget features. (See
	// X86TargetInfo::hasFeature for a somewhat comprehensive list).
	bool X86TargetInfo::validateCpuSupports(StringRef FeatureStr) const {
	return llvm::StringSwitch<bool>(FeatureStr)
	#define X86_FEATURE_COMPAT(ENUM, STR, PRIORITY) .Case(STR, true)
	#define X86_MICROARCH_LEVEL(ENUM, STR, PRIORITY) .Case(STR, true)
	#include "llvm/TargetParser/X86TargetParser.def"
	.Default(false);
	}

	static llvm::X86::ProcessorFeatures getFeature(StringRef Name) {
	return llvm::StringSwitch<llvm::X86::ProcessorFeatures>(Name)
	#define X86_FEATURE_COMPAT(ENUM, STR, PRIORITY) \
	.Case(STR, llvm::X86::FEATURE_##ENUM)

	#include "llvm/TargetParser/X86TargetParser.def"
	;
	// Note, this function should only be used after ensuring the value is
	// correct, so it asserts if the value is out of range.
	}

	unsigned X86TargetInfo::multiVersionSortPriority(StringRef Name) const {
	// Valid CPUs have a 'key feature' that compares just better than its key
	// feature.
	using namespace llvm::X86;
	CPUKind Kind = parseArchX86(Name);
	if (Kind != CK_None) {
	ProcessorFeatures KeyFeature = getKeyFeature(Kind);
	return (getFeaturePriority(KeyFeature) << 1) + 1;
	}

	// Now we know we have a feature, so get its priority and shift it a few so
	// that we have sufficient room for the CPUs (above).
	return getFeaturePriority(getFeature(Name)) << 1;
	}

	bool X86TargetInfo::validateCPUSpecificCPUDispatch(StringRef Name) const {
	return llvm::X86::validateCPUSpecificCPUDispatch(Name);
	}

	char X86TargetInfo::CPUSpecificManglingCharacter(StringRef Name) const {
	return llvm::X86::getCPUDispatchMangling(Name);
	}

	void X86TargetInfo::getCPUSpecificCPUDispatchFeatures(
	StringRef Name, llvm::SmallVectorImpl<StringRef> &Features) const {
	SmallVector<StringRef, 32> TargetCPUFeatures;
	llvm::X86::getFeaturesForCPU(Name, TargetCPUFeatures, true);
	for (auto &F : TargetCPUFeatures)
	Features.push_back(F);
	}

	// We can't use a generic validation scheme for the cpus accepted here
	// versus subtarget cpus accepted in the target attribute because the
	// variables intitialized by the runtime only support the below currently
	// rather than the full range of cpus.
	bool X86TargetInfo::validateCpuIs(StringRef FeatureStr) const {
	return llvm::StringSwitch<bool>(FeatureStr)
	#define X86_VENDOR(ENUM, STRING) .Case(STRING, true)
	#define X86_CPU_TYPE_ALIAS(ENUM, ALIAS) .Case(ALIAS, true)
	#define X86_CPU_TYPE(ENUM, STR) .Case(STR, true)
	#define X86_CPU_SUBTYPE_ALIAS(ENUM, ALIAS) .Case(ALIAS, true)
	#define X86_CPU_SUBTYPE(ENUM, STR) .Case(STR, true)
	#include "llvm/TargetParser/X86TargetParser.def"
	.Default(false);
	}

	static unsigned matchAsmCCConstraint(const char *Name) {
	auto RV = llvm::StringSwitch<unsigned>(Name)
	.Case("@cca", 4)
	.Case("@ccae", 5)
	.Case("@ccb", 4)
	.Case("@ccbe", 5)
	.Case("@ccc", 4)
	.Case("@cce", 4)
	.Case("@ccz", 4)
	.Case("@ccg", 4)
	.Case("@ccge", 5)
	.Case("@ccl", 4)
	.Case("@ccle", 5)
	.Case("@ccna", 5)
	.Case("@ccnae", 6)
	.Case("@ccnb", 5)
	.Case("@ccnbe", 6)
	.Case("@ccnc", 5)
	.Case("@ccne", 5)
	.Case("@ccnz", 5)
	.Case("@ccng", 5)
	.Case("@ccnge", 6)
	.Case("@ccnl", 5)
	.Case("@ccnle", 6)
	.Case("@ccno", 5)
	.Case("@ccnp", 5)
	.Case("@ccns", 5)
	.Case("@cco", 4)
	.Case("@ccp", 4)
	.Case("@ccs", 4)
	.Default(0);
	return RV;
	}

	bool X86TargetInfo::validateAsmConstraint(
	const char *&Name, TargetInfo::ConstraintInfo &Info) const {
	switch (*Name) {
	default:
	return false;
	// Constant constraints.
	case 'e': // 32-bit signed integer constant for use with sign-extending x86_64
	// instructions.
	case 'Z': // 32-bit unsigned integer constant for use with zero-extending
	// x86_64 instructions.
	case 's':
	Info.setRequiresImmediate();
	return true;
	case 'I':
	Info.setRequiresImmediate(0, 31);
	return true;
	case 'J':
	Info.setRequiresImmediate(0, 63);
	return true;
	case 'K':
	Info.setRequiresImmediate(-128, 127);
	return true;
	case 'L':
	Info.setRequiresImmediate({int(0xff), int(0xffff), int(0xffffffff)});
	return true;
	case 'M':
	Info.setRequiresImmediate(0, 3);
	return true;
	case 'N':
	Info.setRequiresImmediate(0, 255);
	return true;
	case 'O':
	Info.setRequiresImmediate(0, 127);
	return true;
	case 'W':
	switch (*++Name) {
	default:
	return false;
	case 's':
	Info.setAllowsRegister();
	return true;
	}
	// Register constraints.
	case 'Y': // 'Y' is the first character for several 2-character constraints.
	// Shift the pointer to the second character of the constraint.
	Name++;
	switch (*Name) {
	default:
	return false;
	case 'z': // First SSE register.
	case '2':
	case 't': // Any SSE register, when SSE2 is enabled.
	case 'i': // Any SSE register, when SSE2 and inter-unit moves enabled.
	case 'm': // Any MMX register, when inter-unit moves enabled.
	case 'k': // AVX512 arch mask registers: k1-k7.
	Info.setAllowsRegister();
	return true;
	}
	case 'f': // Any x87 floating point stack register.
	// Constraint 'f' cannot be used for output operands.
	if (Info.ConstraintStr[0] == '=')
	return false;
	Info.setAllowsRegister();
	return true;
	case 'a': // eax.
	case 'b': // ebx.
	case 'c': // ecx.
	case 'd': // edx.
	case 'S': // esi.
	case 'D': // edi.
	case 'A': // edx:eax.
	case 't': // Top of floating point stack.
	case 'u': // Second from top of floating point stack.
	case 'q': // Any register accessible as [r]l: a, b, c, and d.
	case 'y': // Any MMX register.
	case 'v': // Any {X,Y,Z}MM register (Arch & context dependent)
	case 'x': // Any SSE register.
	case 'k': // Any AVX512 mask register (same as Yk, additionally allows k0
	// for intermideate k reg operations).
	case 'Q': // Any register accessible as [r]h: a, b, c, and d.
	case 'R': // "Legacy" registers: ax, bx, cx, dx, di, si, sp, bp.
	case 'l': // "Index" registers: any general register that can be used as an
	// index in a base+index memory access.
	Info.setAllowsRegister();
	return true;
	// Floating point constant constraints.
	case 'C': // SSE floating point constant.
	case 'G': // x87 floating point constant.
	return true;
	case 'j':
	Name++;
	switch (*Name) {
	default:
	return false;
	case 'r':
	Info.setAllowsRegister();
	return true;
	case 'R':
	Info.setAllowsRegister();
	return true;
	}
	case '@':
	// CC condition changes.
	if (auto Len = matchAsmCCConstraint(Name)) {
	Name += Len - 1;
	Info.setAllowsRegister();
	return true;
	}
	return false;
	}
	}

	// Below is based on the following information:
	// +------------------------------------+-------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------+
	// \| Processor Name \| Cache Line Size (Bytes) \| Source \|
	// +------------------------------------+-------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------+
	// \| i386 \| 64 \| https://www.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-optimization-manual.pdf \|
	// \| i486 \| 16 \| "four doublewords" (doubleword = 32 bits, 4 bits * 32 bits = 16 bytes) https://en.wikichip.org/w/images/d/d3/i486_MICROPROCESSOR_HARDWARE_REFERENCE_MANUAL_%281990%29.pdf and http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.126.4216&rep=rep1&type=pdf (page 29) \|
	// \| i586/Pentium MMX \| 32 \| https://www.7-cpu.com/cpu/P-MMX.html \|
	// \| i686/Pentium \| 32 \| https://www.7-cpu.com/cpu/P6.html \|
	// \| Netburst/Pentium4 \| 64 \| https://www.7-cpu.com/cpu/P4-180.html \|
	// \| Atom \| 64 \| https://www.7-cpu.com/cpu/Atom.html \|
	// \| Westmere \| 64 \| https://en.wikichip.org/wiki/intel/microarchitectures/sandy_bridge_(client) "Cache Architecture" \|
	// \| Sandy Bridge \| 64 \| https://en.wikipedia.org/wiki/Sandy_Bridge and https://www.7-cpu.com/cpu/SandyBridge.html \|
	// \| Ivy Bridge \| 64 \| https://blog.stuffedcow.net/2013/01/ivb-cache-replacement/ and https://www.7-cpu.com/cpu/IvyBridge.html \|
	// \| Haswell \| 64 \| https://www.7-cpu.com/cpu/Haswell.html \|
	// \| Broadwell \| 64 \| https://www.7-cpu.com/cpu/Broadwell.html \|
	// \| Skylake (including skylake-avx512) \| 64 \| https://www.nas.nasa.gov/hecc/support/kb/skylake-processors_550.html "Cache Hierarchy" \|
	// \| Cascade Lake \| 64 \| https://www.nas.nasa.gov/hecc/support/kb/cascade-lake-processors_579.html "Cache Hierarchy" \|
	// \| Skylake \| 64 \| https://en.wikichip.org/wiki/intel/microarchitectures/kaby_lake "Memory Hierarchy" \|
	// \| Ice Lake \| 64 \| https://www.7-cpu.com/cpu/Ice_Lake.html \|
	// \| Knights Landing \| 64 \| https://software.intel.com/en-us/articles/intel-xeon-phi-processor-7200-family-memory-management-optimizations "The Intel® Xeon Phi™ Processor Architecture" \|
	// \| Knights Mill \| 64 \| https://software.intel.com/sites/default/files/managed/9e/bc/64-ia-32-architectures-optimization-manual.pdf?countrylabel=Colombia "2.5.5.2 L1 DCache " \|
	// +------------------------------------+-------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------+
	std::optional<unsigned> X86TargetInfo::getCPUCacheLineSize() const {
	using namespace llvm::X86;
	switch (CPU) {
	// i386
	case CK_i386:
	// i486
	case CK_i486:
	case CK_WinChipC6:
	case CK_WinChip2:
	case CK_C3:
	// Lakemont
	case CK_Lakemont:
	return 16;

	// i586
	case CK_i586:
	case CK_Pentium:
	case CK_PentiumMMX:
	// i686
	case CK_PentiumPro:
	case CK_i686:
	case CK_Pentium2:
	case CK_Pentium3:
	case CK_PentiumM:
	case CK_C3_2:
	// K6
	case CK_K6:
	case CK_K6_2:
	case CK_K6_3:
	// Geode
	case CK_Geode:
	return 32;

	// Netburst
	case CK_Pentium4:
	case CK_Prescott:
	case CK_Nocona:
	// Atom
	case CK_Bonnell:
	case CK_Silvermont:
	case CK_Goldmont:
	case CK_GoldmontPlus:
	case CK_Tremont:
	case CK_Gracemont:

	case CK_Westmere:
	case CK_SandyBridge:
	case CK_IvyBridge:
	case CK_Haswell:
	case CK_Broadwell:
	case CK_SkylakeClient:
	case CK_SkylakeServer:
	case CK_Cascadelake:
	case CK_Nehalem:
	case CK_Cooperlake:
	case CK_Cannonlake:
	case CK_Tigerlake:
	case CK_SapphireRapids:
	case CK_IcelakeClient:
	case CK_Rocketlake:
	case CK_IcelakeServer:
	case CK_Alderlake:
	case CK_Raptorlake:
	case CK_Meteorlake:
	case CK_Arrowlake:
	case CK_ArrowlakeS:
	case CK_Lunarlake:
	case CK_Pantherlake:
	case CK_Sierraforest:
	case CK_Grandridge:
	case CK_Graniterapids:
	case CK_GraniterapidsD:
	case CK_Emeraldrapids:
	case CK_Clearwaterforest:
	case CK_KNL:
	case CK_KNM:
	// K7
	case CK_Athlon:
	case CK_AthlonXP:
	// K8
	case CK_K8:
	case CK_K8SSE3:
	case CK_AMDFAM10:
	// Bobcat
	case CK_BTVER1:
	case CK_BTVER2:
	// Bulldozer
	case CK_BDVER1:
	case CK_BDVER2:
	case CK_BDVER3:
	case CK_BDVER4:
	// Zen
	case CK_ZNVER1:
	case CK_ZNVER2:
	case CK_ZNVER3:
	case CK_ZNVER4:
	+ case CK_ZNVER5:
	// Deprecated
	case CK_x86_64:
	case CK_x86_64_v2:
	case CK_x86_64_v3:
	case CK_x86_64_v4:
	case CK_Yonah:
	case CK_Penryn:
	case CK_Core2:
	return 64;

	// The following currently have unknown cache line sizes (but they are probably all 64):
	// Core
	case CK_None:
	return std::nullopt;
	}
	llvm_unreachable("Unknown CPU kind");
	}

	bool X86TargetInfo::validateOutputSize(const llvm::StringMap<bool> &FeatureMap,
	StringRef Constraint,
	unsigned Size) const {
	// Strip off constraint modifiers.
	Constraint = Constraint.ltrim("=+&");

	return validateOperandSize(FeatureMap, Constraint, Size);
	}

	bool X86TargetInfo::validateInputSize(const llvm::StringMap<bool> &FeatureMap,
	StringRef Constraint,
	unsigned Size) const {
	return validateOperandSize(FeatureMap, Constraint, Size);
	}

	bool X86TargetInfo::validateOperandSize(const llvm::StringMap<bool> &FeatureMap,
	StringRef Constraint,
	unsigned Size) const {
	switch (Constraint[0]) {
	default:
	break;
	case 'k':
	// Registers k0-k7 (AVX512) size limit is 64 bit.
	case 'y':
	return Size <= 64;
	case 'f':
	case 't':
	case 'u':
	return Size <= 128;
	case 'Y':
	// 'Y' is the first character for several 2-character constraints.
	switch (Constraint[1]) {
	default:
	return false;
	case 'm':
	// 'Ym' is synonymous with 'y'.
	case 'k':
	return Size <= 64;
	case 'z':
	// XMM0/YMM/ZMM0
	if (hasFeatureEnabled(FeatureMap, "avx512f") &&
	hasFeatureEnabled(FeatureMap, "evex512"))
	// ZMM0 can be used if target supports AVX512F and EVEX512 is set.
	return Size <= 512U;
	else if (hasFeatureEnabled(FeatureMap, "avx"))
	// YMM0 can be used if target supports AVX.
	return Size <= 256U;
	else if (hasFeatureEnabled(FeatureMap, "sse"))
	return Size <= 128U;
	return false;
	case 'i':
	case 't':
	case '2':
	// 'Yi','Yt','Y2' are synonymous with 'x' when SSE2 is enabled.
	if (SSELevel < SSE2)
	return false;
	break;
	}
	break;
	case 'v':
	case 'x':
	if (hasFeatureEnabled(FeatureMap, "avx512f") &&
	hasFeatureEnabled(FeatureMap, "evex512"))
	// 512-bit zmm registers can be used if target supports AVX512F and
	// EVEX512 is set.
	return Size <= 512U;
	else if (hasFeatureEnabled(FeatureMap, "avx"))
	// 256-bit ymm registers can be used if target supports AVX.
	return Size <= 256U;
	return Size <= 128U;

	}

	return true;
	}

	std::string X86TargetInfo::convertConstraint(const char *&Constraint) const {
	switch (*Constraint) {
	case '@':
	if (auto Len = matchAsmCCConstraint(Constraint)) {
	std::string Converted = "{" + std::string(Constraint, Len) + "}";
	Constraint += Len - 1;
	return Converted;
	}
	return std::string(1, *Constraint);
	case 'a':
	return std::string("{ax}");
	case 'b':
	return std::string("{bx}");
	case 'c':
	return std::string("{cx}");
	case 'd':
	return std::string("{dx}");
	case 'S':
	return std::string("{si}");
	case 'D':
	return std::string("{di}");
	case 'p': // Keep 'p' constraint (address).
	return std::string("p");
	case 't': // top of floating point stack.
	return std::string("{st}");
	case 'u': // second from top of floating point stack.
	return std::string("{st(1)}"); // second from top of floating point stack.
	case 'W':
	assert(Constraint[1] == 's');
	return '^' + std::string(Constraint++, 2);
	case 'Y':
	switch (Constraint[1]) {
	default:
	// Break from inner switch and fall through (copy single char),
	// continue parsing after copying the current constraint into
	// the return string.
	break;
	case 'k':
	case 'm':
	case 'i':
	case 't':
	case 'z':
	case '2':
	// "^" hints llvm that this is a 2 letter constraint.
	// "Constraint++" is used to promote the string iterator
	// to the next constraint.
	return std::string("^") + std::string(Constraint++, 2);
	}
	[[fallthrough]];
	case 'j':
	switch (Constraint[1]) {
	default:
	// Break from inner switch and fall through (copy single char),
	// continue parsing after copying the current constraint into
	// the return string.
	break;
	case 'r':
	case 'R':
	// "^" hints llvm that this is a 2 letter constraint.
	// "Constraint++" is used to promote the string iterator
	// to the next constraint.
	return std::string("^") + std::string(Constraint++, 2);
	}
	[[fallthrough]];
	default:
	return std::string(1, *Constraint);
	}
	}

	void X86TargetInfo::fillValidCPUList(SmallVectorImpl<StringRef> &Values) const {
	bool Only64Bit = getTriple().getArch() != llvm::Triple::x86;
	llvm::X86::fillValidCPUArchList(Values, Only64Bit);
	}

	void X86TargetInfo::fillValidTuneCPUList(SmallVectorImpl<StringRef> &Values) const {
	llvm::X86::fillValidTuneCPUList(Values);
	}

	ArrayRef<const char *> X86TargetInfo::getGCCRegNames() const {
	return llvm::ArrayRef(GCCRegNames);
	}

	ArrayRef<TargetInfo::AddlRegName> X86TargetInfo::getGCCAddlRegNames() const {
	return llvm::ArrayRef(AddlRegNames);
	}

	ArrayRef<Builtin::Info> X86_32TargetInfo::getTargetBuiltins() const {
	return llvm::ArrayRef(BuiltinInfoX86, clang::X86::LastX86CommonBuiltin -
	Builtin::FirstTSBuiltin + 1);
	}

	ArrayRef<Builtin::Info> X86_64TargetInfo::getTargetBuiltins() const {
	return llvm::ArrayRef(BuiltinInfoX86,
	X86::LastTSBuiltin - Builtin::FirstTSBuiltin);
	}
	diff --git a/contrib/llvm-project/clang/lib/CodeGen/CGCall.cpp b/contrib/llvm-project/clang/lib/CodeGen/CGCall.cpp
	index 6e69e84a2344..d7ebffa8c5e4 100644
	--- a/contrib/llvm-project/clang/lib/CodeGen/CGCall.cpp
	+++ b/contrib/llvm-project/clang/lib/CodeGen/CGCall.cpp
	@@ -1,6051 +1,6027 @@
	//===--- CGCall.cpp - Encapsulate calling convention details --------------===//
	//
	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
	// See https://llvm.org/LICENSE.txt for license information.
	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
	//
	//===----------------------------------------------------------------------===//
	//
	// These classes wrap the information about a call or function
	// definition used to handle ABI compliancy.
	//
	//===----------------------------------------------------------------------===//

	#include "CGCall.h"
	#include "ABIInfo.h"
	#include "ABIInfoImpl.h"
	#include "CGBlocks.h"
	#include "CGCXXABI.h"
	#include "CGCleanup.h"
	#include "CGRecordLayout.h"
	#include "CodeGenFunction.h"
	#include "CodeGenModule.h"
	#include "TargetInfo.h"
	#include "clang/AST/Attr.h"
	#include "clang/AST/Decl.h"
	#include "clang/AST/DeclCXX.h"
	#include "clang/AST/DeclObjC.h"
	#include "clang/Basic/CodeGenOptions.h"
	#include "clang/Basic/TargetInfo.h"
	#include "clang/CodeGen/CGFunctionInfo.h"
	#include "clang/CodeGen/SwiftCallingConv.h"
	#include "llvm/ADT/StringExtras.h"
	#include "llvm/Analysis/ValueTracking.h"
	#include "llvm/IR/Assumptions.h"
	#include "llvm/IR/AttributeMask.h"
	#include "llvm/IR/Attributes.h"
	#include "llvm/IR/CallingConv.h"
	#include "llvm/IR/DataLayout.h"
	#include "llvm/IR/InlineAsm.h"
	#include "llvm/IR/IntrinsicInst.h"
	#include "llvm/IR/Intrinsics.h"
	#include "llvm/IR/Type.h"
	#include "llvm/Transforms/Utils/Local.h"
	#include <optional>
	using namespace clang;
	using namespace CodeGen;

	/***/

	unsigned CodeGenTypes::ClangCallConvToLLVMCallConv(CallingConv CC) {
	switch (CC) {
	default: return llvm::CallingConv::C;
	case CC_X86StdCall: return llvm::CallingConv::X86_StdCall;
	case CC_X86FastCall: return llvm::CallingConv::X86_FastCall;
	case CC_X86RegCall: return llvm::CallingConv::X86_RegCall;
	case CC_X86ThisCall: return llvm::CallingConv::X86_ThisCall;
	case CC_Win64: return llvm::CallingConv::Win64;
	case CC_X86_64SysV: return llvm::CallingConv::X86_64_SysV;
	case CC_AAPCS: return llvm::CallingConv::ARM_AAPCS;
	case CC_AAPCS_VFP: return llvm::CallingConv::ARM_AAPCS_VFP;
	case CC_IntelOclBicc: return llvm::CallingConv::Intel_OCL_BI;
	// TODO: Add support for __pascal to LLVM.
	case CC_X86Pascal: return llvm::CallingConv::C;
	// TODO: Add support for __vectorcall to LLVM.
	case CC_X86VectorCall: return llvm::CallingConv::X86_VectorCall;
	case CC_AArch64VectorCall: return llvm::CallingConv::AArch64_VectorCall;
	case CC_AArch64SVEPCS: return llvm::CallingConv::AArch64_SVE_VectorCall;
	case CC_AMDGPUKernelCall: return llvm::CallingConv::AMDGPU_KERNEL;
	case CC_SpirFunction: return llvm::CallingConv::SPIR_FUNC;
	case CC_OpenCLKernel: return CGM.getTargetCodeGenInfo().getOpenCLKernelCallingConv();
	case CC_PreserveMost: return llvm::CallingConv::PreserveMost;
	case CC_PreserveAll: return llvm::CallingConv::PreserveAll;
	case CC_Swift: return llvm::CallingConv::Swift;
	case CC_SwiftAsync: return llvm::CallingConv::SwiftTail;
	case CC_M68kRTD: return llvm::CallingConv::M68k_RTD;
	case CC_PreserveNone: return llvm::CallingConv::PreserveNone;
	// clang-format off
	case CC_RISCVVectorCall: return llvm::CallingConv::RISCV_VectorCall;
	// clang-format on
	}
	}

	/// Derives the 'this' type for codegen purposes, i.e. ignoring method CVR
	/// qualification. Either or both of RD and MD may be null. A null RD indicates
	/// that there is no meaningful 'this' type, and a null MD can occur when
	/// calling a method pointer.
	CanQualType CodeGenTypes::DeriveThisType(const CXXRecordDecl *RD,
	const CXXMethodDecl *MD) {
	QualType RecTy;
	if (RD)
	RecTy = Context.getTagDeclType(RD)->getCanonicalTypeInternal();
	else
	RecTy = Context.VoidTy;

	if (MD)
	RecTy = Context.getAddrSpaceQualType(RecTy, MD->getMethodQualifiers().getAddressSpace());
	return Context.getPointerType(CanQualType::CreateUnsafe(RecTy));
	}

	/// Returns the canonical formal type of the given C++ method.
	static CanQual<FunctionProtoType> GetFormalType(const CXXMethodDecl *MD) {
	return MD->getType()->getCanonicalTypeUnqualified()
	.getAs<FunctionProtoType>();
	}

	/// Returns the "extra-canonicalized" return type, which discards
	/// qualifiers on the return type. Codegen doesn't care about them,
	/// and it makes ABI code a little easier to be able to assume that
	/// all parameter and return types are top-level unqualified.
	static CanQualType GetReturnType(QualType RetTy) {
	return RetTy->getCanonicalTypeUnqualified().getUnqualifiedType();
	}

	/// Arrange the argument and result information for a value of the given
	/// unprototyped freestanding function type.
	const CGFunctionInfo &
	CodeGenTypes::arrangeFreeFunctionType(CanQual<FunctionNoProtoType> FTNP) {
	// When translating an unprototyped function type, always use a
	// variadic type.
	return arrangeLLVMFunctionInfo(FTNP->getReturnType().getUnqualifiedType(),
	FnInfoOpts::None, std::nullopt,
	FTNP->getExtInfo(), {}, RequiredArgs(0));
	}

	static void addExtParameterInfosForCall(
	llvm::SmallVectorImpl<FunctionProtoType::ExtParameterInfo> &paramInfos,
	const FunctionProtoType *proto,
	unsigned prefixArgs,
	unsigned totalArgs) {
	assert(proto->hasExtParameterInfos());
	assert(paramInfos.size() <= prefixArgs);
	assert(proto->getNumParams() + prefixArgs <= totalArgs);

	paramInfos.reserve(totalArgs);

	// Add default infos for any prefix args that don't already have infos.
	paramInfos.resize(prefixArgs);

	// Add infos for the prototype.
	for (const auto &ParamInfo : proto->getExtParameterInfos()) {
	paramInfos.push_back(ParamInfo);
	// pass_object_size params have no parameter info.
	if (ParamInfo.hasPassObjectSize())
	paramInfos.emplace_back();
	}

	assert(paramInfos.size() <= totalArgs &&
	"Did we forget to insert pass_object_size args?");
	// Add default infos for the variadic and/or suffix arguments.
	paramInfos.resize(totalArgs);
	}

	/// Adds the formal parameters in FPT to the given prefix. If any parameter in
	/// FPT has pass_object_size attrs, then we'll add parameters for those, too.
	static void appendParameterTypes(const CodeGenTypes &CGT,
	SmallVectorImpl<CanQualType> &prefix,
	SmallVectorImpl<FunctionProtoType::ExtParameterInfo> &paramInfos,
	CanQual<FunctionProtoType> FPT) {
	// Fast path: don't touch param info if we don't need to.
	if (!FPT->hasExtParameterInfos()) {
	assert(paramInfos.empty() &&
	"We have paramInfos, but the prototype doesn't?");
	prefix.append(FPT->param_type_begin(), FPT->param_type_end());
	return;
	}

	unsigned PrefixSize = prefix.size();
	// In the vast majority of cases, we'll have precisely FPT->getNumParams()
	// parameters; the only thing that can change this is the presence of
	// pass_object_size. So, we preallocate for the common case.
	prefix.reserve(prefix.size() + FPT->getNumParams());

	auto ExtInfos = FPT->getExtParameterInfos();
	assert(ExtInfos.size() == FPT->getNumParams());
	for (unsigned I = 0, E = FPT->getNumParams(); I != E; ++I) {
	prefix.push_back(FPT->getParamType(I));
	if (ExtInfos[I].hasPassObjectSize())
	prefix.push_back(CGT.getContext().getSizeType());
	}

	addExtParameterInfosForCall(paramInfos, FPT.getTypePtr(), PrefixSize,
	prefix.size());
	}

	/// Arrange the LLVM function layout for a value of the given function
	/// type, on top of any implicit parameters already stored.
	static const CGFunctionInfo &
	arrangeLLVMFunctionInfo(CodeGenTypes &CGT, bool instanceMethod,
	SmallVectorImpl<CanQualType> &prefix,
	CanQual<FunctionProtoType> FTP) {
	SmallVector<FunctionProtoType::ExtParameterInfo, 16> paramInfos;
	RequiredArgs Required = RequiredArgs::forPrototypePlus(FTP, prefix.size());
	// FIXME: Kill copy.
	appendParameterTypes(CGT, prefix, paramInfos, FTP);
	CanQualType resultType = FTP->getReturnType().getUnqualifiedType();

	FnInfoOpts opts =
	instanceMethod ? FnInfoOpts::IsInstanceMethod : FnInfoOpts::None;
	return CGT.arrangeLLVMFunctionInfo(resultType, opts, prefix,
	FTP->getExtInfo(), paramInfos, Required);
	}

	/// Arrange the argument and result information for a value of the
	/// given freestanding function type.
	const CGFunctionInfo &
	CodeGenTypes::arrangeFreeFunctionType(CanQual<FunctionProtoType> FTP) {
	SmallVector<CanQualType, 16> argTypes;
	return ::arrangeLLVMFunctionInfo(this, /instanceMethod=*/false, argTypes,
	FTP);
	}

	static CallingConv getCallingConventionForDecl(const ObjCMethodDecl *D,
	bool IsWindows) {
	// Set the appropriate calling convention for the Function.
	if (D->hasAttr<StdCallAttr>())
	return CC_X86StdCall;

	if (D->hasAttr<FastCallAttr>())
	return CC_X86FastCall;

	if (D->hasAttr<RegCallAttr>())
	return CC_X86RegCall;

	if (D->hasAttr<ThisCallAttr>())
	return CC_X86ThisCall;

	if (D->hasAttr<VectorCallAttr>())
	return CC_X86VectorCall;

	if (D->hasAttr<PascalAttr>())
	return CC_X86Pascal;

	if (PcsAttr *PCS = D->getAttr<PcsAttr>())
	return (PCS->getPCS() == PcsAttr::AAPCS ? CC_AAPCS : CC_AAPCS_VFP);

	if (D->hasAttr<AArch64VectorPcsAttr>())
	return CC_AArch64VectorCall;

	if (D->hasAttr<AArch64SVEPcsAttr>())
	return CC_AArch64SVEPCS;

	if (D->hasAttr<AMDGPUKernelCallAttr>())
	return CC_AMDGPUKernelCall;

	if (D->hasAttr<IntelOclBiccAttr>())
	return CC_IntelOclBicc;

	if (D->hasAttr<MSABIAttr>())
	return IsWindows ? CC_C : CC_Win64;

	if (D->hasAttr<SysVABIAttr>())
	return IsWindows ? CC_X86_64SysV : CC_C;

	if (D->hasAttr<PreserveMostAttr>())
	return CC_PreserveMost;

	if (D->hasAttr<PreserveAllAttr>())
	return CC_PreserveAll;

	if (D->hasAttr<M68kRTDAttr>())
	return CC_M68kRTD;

	if (D->hasAttr<PreserveNoneAttr>())
	return CC_PreserveNone;

	if (D->hasAttr<RISCVVectorCCAttr>())
	return CC_RISCVVectorCall;

	return CC_C;
	}

	/// Arrange the argument and result information for a call to an
	/// unknown C++ non-static member function of the given abstract type.
	/// (A null RD means we don't have any meaningful "this" argument type,
	/// so fall back to a generic pointer type).
	/// The member function must be an ordinary function, i.e. not a
	/// constructor or destructor.
	const CGFunctionInfo &
	CodeGenTypes::arrangeCXXMethodType(const CXXRecordDecl *RD,
	const FunctionProtoType *FTP,
	const CXXMethodDecl *MD) {
	SmallVector<CanQualType, 16> argTypes;

	// Add the 'this' pointer.
	argTypes.push_back(DeriveThisType(RD, MD));

	return ::arrangeLLVMFunctionInfo(
	this, /instanceMethod=*/true, argTypes,
	FTP->getCanonicalTypeUnqualified().getAs<FunctionProtoType>());
	}

	/// Set calling convention for CUDA/HIP kernel.
	static void setCUDAKernelCallingConvention(CanQualType &FTy, CodeGenModule &CGM,
	const FunctionDecl *FD) {
	if (FD->hasAttr<CUDAGlobalAttr>()) {
	const FunctionType *FT = FTy->getAs<FunctionType>();
	CGM.getTargetCodeGenInfo().setCUDAKernelCallingConvention(FT);
	FTy = FT->getCanonicalTypeUnqualified();
	}
	}

	/// Arrange the argument and result information for a declaration or
	/// definition of the given C++ non-static member function. The
	/// member function must be an ordinary function, i.e. not a
	/// constructor or destructor.
	const CGFunctionInfo &
	CodeGenTypes::arrangeCXXMethodDeclaration(const CXXMethodDecl *MD) {
	assert(!isa<CXXConstructorDecl>(MD) && "wrong method for constructors!");
	assert(!isa<CXXDestructorDecl>(MD) && "wrong method for destructors!");

	CanQualType FT = GetFormalType(MD).getAs<Type>();
	setCUDAKernelCallingConvention(FT, CGM, MD);
	auto prototype = FT.getAs<FunctionProtoType>();

	if (MD->isImplicitObjectMemberFunction()) {
	// The abstract case is perfectly fine.
	const CXXRecordDecl *ThisType = TheCXXABI.getThisArgumentTypeForMethod(MD);
	return arrangeCXXMethodType(ThisType, prototype.getTypePtr(), MD);
	}

	return arrangeFreeFunctionType(prototype);
	}

	bool CodeGenTypes::inheritingCtorHasParams(
	const InheritedConstructor &Inherited, CXXCtorType Type) {
	// Parameters are unnecessary if we're constructing a base class subobject
	// and the inherited constructor lives in a virtual base.
	return Type == Ctor_Complete \|\|
	!Inherited.getShadowDecl()->constructsVirtualBase() \|\|
	!Target.getCXXABI().hasConstructorVariants();
	}

	const CGFunctionInfo &
	CodeGenTypes::arrangeCXXStructorDeclaration(GlobalDecl GD) {
	auto *MD = cast<CXXMethodDecl>(GD.getDecl());

	SmallVector<CanQualType, 16> argTypes;
	SmallVector<FunctionProtoType::ExtParameterInfo, 16> paramInfos;

	const CXXRecordDecl *ThisType = TheCXXABI.getThisArgumentTypeForMethod(GD);
	argTypes.push_back(DeriveThisType(ThisType, MD));

	bool PassParams = true;

	if (auto *CD = dyn_cast<CXXConstructorDecl>(MD)) {
	// A base class inheriting constructor doesn't get forwarded arguments
	// needed to construct a virtual base (or base class thereof).
	if (auto Inherited = CD->getInheritedConstructor())
	PassParams = inheritingCtorHasParams(Inherited, GD.getCtorType());
	}

	CanQual<FunctionProtoType> FTP = GetFormalType(MD);

	// Add the formal parameters.
	if (PassParams)
	appendParameterTypes(*this, argTypes, paramInfos, FTP);

	CGCXXABI::AddedStructorArgCounts AddedArgs =
	TheCXXABI.buildStructorSignature(GD, argTypes);
	if (!paramInfos.empty()) {
	// Note: prefix implies after the first param.
	if (AddedArgs.Prefix)
	paramInfos.insert(paramInfos.begin() + 1, AddedArgs.Prefix,
	FunctionProtoType::ExtParameterInfo{});
	if (AddedArgs.Suffix)
	paramInfos.append(AddedArgs.Suffix,
	FunctionProtoType::ExtParameterInfo{});
	}

	RequiredArgs required =
	(PassParams && MD->isVariadic() ? RequiredArgs(argTypes.size())
	: RequiredArgs::All);

	FunctionType::ExtInfo extInfo = FTP->getExtInfo();
	CanQualType resultType = TheCXXABI.HasThisReturn(GD)
	? argTypes.front()
	: TheCXXABI.hasMostDerivedReturn(GD)
	? CGM.getContext().VoidPtrTy
	: Context.VoidTy;
	return arrangeLLVMFunctionInfo(resultType, FnInfoOpts::IsInstanceMethod,
	argTypes, extInfo, paramInfos, required);
	}

	static SmallVector<CanQualType, 16>
	getArgTypesForCall(ASTContext &ctx, const CallArgList &args) {
	SmallVector<CanQualType, 16> argTypes;
	for (auto &arg : args)
	argTypes.push_back(ctx.getCanonicalParamType(arg.Ty));
	return argTypes;
	}

	static SmallVector<CanQualType, 16>
	getArgTypesForDeclaration(ASTContext &ctx, const FunctionArgList &args) {
	SmallVector<CanQualType, 16> argTypes;
	for (auto &arg : args)
	argTypes.push_back(ctx.getCanonicalParamType(arg->getType()));
	return argTypes;
	}

	static llvm::SmallVector<FunctionProtoType::ExtParameterInfo, 16>
	getExtParameterInfosForCall(const FunctionProtoType *proto,
	unsigned prefixArgs, unsigned totalArgs) {
	llvm::SmallVector<FunctionProtoType::ExtParameterInfo, 16> result;
	if (proto->hasExtParameterInfos()) {
	addExtParameterInfosForCall(result, proto, prefixArgs, totalArgs);
	}
	return result;
	}

	/// Arrange a call to a C++ method, passing the given arguments.
	///
	/// ExtraPrefixArgs is the number of ABI-specific args passed after the `this`
	/// parameter.
	/// ExtraSuffixArgs is the number of ABI-specific args passed at the end of
	/// args.
	/// PassProtoArgs indicates whether `args` has args for the parameters in the
	/// given CXXConstructorDecl.
	const CGFunctionInfo &
	CodeGenTypes::arrangeCXXConstructorCall(const CallArgList &args,
	const CXXConstructorDecl *D,
	CXXCtorType CtorKind,
	unsigned ExtraPrefixArgs,
	unsigned ExtraSuffixArgs,
	bool PassProtoArgs) {
	// FIXME: Kill copy.
	SmallVector<CanQualType, 16> ArgTypes;
	for (const auto &Arg : args)
	ArgTypes.push_back(Context.getCanonicalParamType(Arg.Ty));

	// +1 for implicit this, which should always be args[0].
	unsigned TotalPrefixArgs = 1 + ExtraPrefixArgs;

	CanQual<FunctionProtoType> FPT = GetFormalType(D);
	RequiredArgs Required = PassProtoArgs
	? RequiredArgs::forPrototypePlus(
	FPT, TotalPrefixArgs + ExtraSuffixArgs)
	: RequiredArgs::All;

	GlobalDecl GD(D, CtorKind);
	CanQualType ResultType = TheCXXABI.HasThisReturn(GD)
	? ArgTypes.front()
	: TheCXXABI.hasMostDerivedReturn(GD)
	? CGM.getContext().VoidPtrTy
	: Context.VoidTy;

	FunctionType::ExtInfo Info = FPT->getExtInfo();
	llvm::SmallVector<FunctionProtoType::ExtParameterInfo, 16> ParamInfos;
	// If the prototype args are elided, we should only have ABI-specific args,
	// which never have param info.
	if (PassProtoArgs && FPT->hasExtParameterInfos()) {
	// ABI-specific suffix arguments are treated the same as variadic arguments.
	addExtParameterInfosForCall(ParamInfos, FPT.getTypePtr(), TotalPrefixArgs,
	ArgTypes.size());
	}

	return arrangeLLVMFunctionInfo(ResultType, FnInfoOpts::IsInstanceMethod,
	ArgTypes, Info, ParamInfos, Required);
	}

	/// Arrange the argument and result information for the declaration or
	/// definition of the given function.
	const CGFunctionInfo &
	CodeGenTypes::arrangeFunctionDeclaration(const FunctionDecl *FD) {
	if (const CXXMethodDecl *MD = dyn_cast<CXXMethodDecl>(FD))
	if (MD->isImplicitObjectMemberFunction())
	return arrangeCXXMethodDeclaration(MD);

	CanQualType FTy = FD->getType()->getCanonicalTypeUnqualified();

	assert(isa<FunctionType>(FTy));
	setCUDAKernelCallingConvention(FTy, CGM, FD);

	// When declaring a function without a prototype, always use a
	// non-variadic type.
	if (CanQual<FunctionNoProtoType> noProto = FTy.getAs<FunctionNoProtoType>()) {
	return arrangeLLVMFunctionInfo(noProto->getReturnType(), FnInfoOpts::None,
	std::nullopt, noProto->getExtInfo(), {},
	RequiredArgs::All);
	}

	return arrangeFreeFunctionType(FTy.castAs<FunctionProtoType>());
	}

	/// Arrange the argument and result information for the declaration or
	/// definition of an Objective-C method.
	const CGFunctionInfo &
	CodeGenTypes::arrangeObjCMethodDeclaration(const ObjCMethodDecl *MD) {
	// It happens that this is the same as a call with no optional
	// arguments, except also using the formal 'self' type.
	return arrangeObjCMessageSendSignature(MD, MD->getSelfDecl()->getType());
	}

	/// Arrange the argument and result information for the function type
	/// through which to perform a send to the given Objective-C method,
	/// using the given receiver type. The receiver type is not always
	/// the 'self' type of the method or even an Objective-C pointer type.
	/// This is not the right method for actually performing such a
	/// message send, due to the possibility of optional arguments.
	const CGFunctionInfo &
	CodeGenTypes::arrangeObjCMessageSendSignature(const ObjCMethodDecl *MD,
	QualType receiverType) {
	SmallVector<CanQualType, 16> argTys;
	SmallVector<FunctionProtoType::ExtParameterInfo, 4> extParamInfos(
	MD->isDirectMethod() ? 1 : 2);
	argTys.push_back(Context.getCanonicalParamType(receiverType));
	if (!MD->isDirectMethod())
	argTys.push_back(Context.getCanonicalParamType(Context.getObjCSelType()));
	// FIXME: Kill copy?
	for (const auto *I : MD->parameters()) {
	argTys.push_back(Context.getCanonicalParamType(I->getType()));
	auto extParamInfo = FunctionProtoType::ExtParameterInfo().withIsNoEscape(
	I->hasAttr<NoEscapeAttr>());
	extParamInfos.push_back(extParamInfo);
	}

	FunctionType::ExtInfo einfo;
	bool IsWindows = getContext().getTargetInfo().getTriple().isOSWindows();
	einfo = einfo.withCallingConv(getCallingConventionForDecl(MD, IsWindows));

	if (getContext().getLangOpts().ObjCAutoRefCount &&
	MD->hasAttr<NSReturnsRetainedAttr>())
	einfo = einfo.withProducesResult(true);

	RequiredArgs required =
	(MD->isVariadic() ? RequiredArgs(argTys.size()) : RequiredArgs::All);

	return arrangeLLVMFunctionInfo(GetReturnType(MD->getReturnType()),
	FnInfoOpts::None, argTys, einfo, extParamInfos,
	required);
	}

	const CGFunctionInfo &
	CodeGenTypes::arrangeUnprototypedObjCMessageSend(QualType returnType,
	const CallArgList &args) {
	auto argTypes = getArgTypesForCall(Context, args);
	FunctionType::ExtInfo einfo;

	return arrangeLLVMFunctionInfo(GetReturnType(returnType), FnInfoOpts::None,
	argTypes, einfo, {}, RequiredArgs::All);
	}

	const CGFunctionInfo &
	CodeGenTypes::arrangeGlobalDeclaration(GlobalDecl GD) {
	// FIXME: Do we need to handle ObjCMethodDecl?
	const FunctionDecl *FD = cast<FunctionDecl>(GD.getDecl());

	if (isa<CXXConstructorDecl>(GD.getDecl()) \|\|
	isa<CXXDestructorDecl>(GD.getDecl()))
	return arrangeCXXStructorDeclaration(GD);

	return arrangeFunctionDeclaration(FD);
	}

	/// Arrange a thunk that takes 'this' as the first parameter followed by
	/// varargs. Return a void pointer, regardless of the actual return type.
	/// The body of the thunk will end in a musttail call to a function of the
	/// correct type, and the caller will bitcast the function to the correct
	/// prototype.
	const CGFunctionInfo &
	CodeGenTypes::arrangeUnprototypedMustTailThunk(const CXXMethodDecl *MD) {
	assert(MD->isVirtual() && "only methods have thunks");
	CanQual<FunctionProtoType> FTP = GetFormalType(MD);
	CanQualType ArgTys[] = {DeriveThisType(MD->getParent(), MD)};
	return arrangeLLVMFunctionInfo(Context.VoidTy, FnInfoOpts::None, ArgTys,
	FTP->getExtInfo(), {}, RequiredArgs(1));
	}

	const CGFunctionInfo &
	CodeGenTypes::arrangeMSCtorClosure(const CXXConstructorDecl *CD,
	CXXCtorType CT) {
	assert(CT == Ctor_CopyingClosure \|\| CT == Ctor_DefaultClosure);

	CanQual<FunctionProtoType> FTP = GetFormalType(CD);
	SmallVector<CanQualType, 2> ArgTys;
	const CXXRecordDecl *RD = CD->getParent();
	ArgTys.push_back(DeriveThisType(RD, CD));
	if (CT == Ctor_CopyingClosure)
	ArgTys.push_back(*FTP->param_type_begin());
	if (RD->getNumVBases() > 0)
	ArgTys.push_back(Context.IntTy);
	CallingConv CC = Context.getDefaultCallingConvention(
	/IsVariadic=/false, /IsCXXMethod=/true);
	return arrangeLLVMFunctionInfo(Context.VoidTy, FnInfoOpts::IsInstanceMethod,
	ArgTys, FunctionType::ExtInfo(CC), {},
	RequiredArgs::All);
	}

	/// Arrange a call as unto a free function, except possibly with an
	/// additional number of formal parameters considered required.
	static const CGFunctionInfo &
	arrangeFreeFunctionLikeCall(CodeGenTypes &CGT,
	CodeGenModule &CGM,
	const CallArgList &args,
	const FunctionType *fnType,
	unsigned numExtraRequiredArgs,
	bool chainCall) {
	assert(args.size() >= numExtraRequiredArgs);

	llvm::SmallVector<FunctionProtoType::ExtParameterInfo, 16> paramInfos;

	// In most cases, there are no optional arguments.
	RequiredArgs required = RequiredArgs::All;

	// If we have a variadic prototype, the required arguments are the
	// extra prefix plus the arguments in the prototype.
	if (const FunctionProtoType *proto = dyn_cast<FunctionProtoType>(fnType)) {
	if (proto->isVariadic())
	required = RequiredArgs::forPrototypePlus(proto, numExtraRequiredArgs);

	if (proto->hasExtParameterInfos())
	addExtParameterInfosForCall(paramInfos, proto, numExtraRequiredArgs,
	args.size());

	// If we don't have a prototype at all, but we're supposed to
	// explicitly use the variadic convention for unprototyped calls,
	// treat all of the arguments as required but preserve the nominal
	// possibility of variadics.
	} else if (CGM.getTargetCodeGenInfo()
	.isNoProtoCallVariadic(args,
	cast<FunctionNoProtoType>(fnType))) {
	required = RequiredArgs(args.size());
	}

	// FIXME: Kill copy.
	SmallVector<CanQualType, 16> argTypes;
	for (const auto &arg : args)
	argTypes.push_back(CGT.getContext().getCanonicalParamType(arg.Ty));
	FnInfoOpts opts = chainCall ? FnInfoOpts::IsChainCall : FnInfoOpts::None;
	return CGT.arrangeLLVMFunctionInfo(GetReturnType(fnType->getReturnType()),
	opts, argTypes, fnType->getExtInfo(),
	paramInfos, required);
	}

	/// Figure out the rules for calling a function with the given formal
	/// type using the given arguments. The arguments are necessary
	/// because the function might be unprototyped, in which case it's
	/// target-dependent in crazy ways.
	const CGFunctionInfo &
	CodeGenTypes::arrangeFreeFunctionCall(const CallArgList &args,
	const FunctionType *fnType,
	bool chainCall) {
	return arrangeFreeFunctionLikeCall(*this, CGM, args, fnType,
	chainCall ? 1 : 0, chainCall);
	}

	/// A block function is essentially a free function with an
	/// extra implicit argument.
	const CGFunctionInfo &
	CodeGenTypes::arrangeBlockFunctionCall(const CallArgList &args,
	const FunctionType *fnType) {
	return arrangeFreeFunctionLikeCall(*this, CGM, args, fnType, 1,
	/chainCall=/false);
	}

	const CGFunctionInfo &
	CodeGenTypes::arrangeBlockFunctionDeclaration(const FunctionProtoType *proto,
	const FunctionArgList &params) {
	auto paramInfos = getExtParameterInfosForCall(proto, 1, params.size());
	auto argTypes = getArgTypesForDeclaration(Context, params);

	return arrangeLLVMFunctionInfo(GetReturnType(proto->getReturnType()),
	FnInfoOpts::None, argTypes,
	proto->getExtInfo(), paramInfos,
	RequiredArgs::forPrototypePlus(proto, 1));
	}

	const CGFunctionInfo &
	CodeGenTypes::arrangeBuiltinFunctionCall(QualType resultType,
	const CallArgList &args) {
	// FIXME: Kill copy.
	SmallVector<CanQualType, 16> argTypes;
	for (const auto &Arg : args)
	argTypes.push_back(Context.getCanonicalParamType(Arg.Ty));
	return arrangeLLVMFunctionInfo(GetReturnType(resultType), FnInfoOpts::None,
	argTypes, FunctionType::ExtInfo(),
	/paramInfos=/{}, RequiredArgs::All);
	}

	const CGFunctionInfo &
	CodeGenTypes::arrangeBuiltinFunctionDeclaration(QualType resultType,
	const FunctionArgList &args) {
	auto argTypes = getArgTypesForDeclaration(Context, args);

	return arrangeLLVMFunctionInfo(GetReturnType(resultType), FnInfoOpts::None,
	argTypes, FunctionType::ExtInfo(), {},
	RequiredArgs::All);
	}

	const CGFunctionInfo &
	CodeGenTypes::arrangeBuiltinFunctionDeclaration(CanQualType resultType,
	ArrayRef<CanQualType> argTypes) {
	return arrangeLLVMFunctionInfo(resultType, FnInfoOpts::None, argTypes,
	FunctionType::ExtInfo(), {},
	RequiredArgs::All);
	}

	/// Arrange a call to a C++ method, passing the given arguments.
	///
	/// numPrefixArgs is the number of ABI-specific prefix arguments we have. It
	/// does not count `this`.
	const CGFunctionInfo &
	CodeGenTypes::arrangeCXXMethodCall(const CallArgList &args,
	const FunctionProtoType *proto,
	RequiredArgs required,
	unsigned numPrefixArgs) {
	assert(numPrefixArgs + 1 <= args.size() &&
	"Emitting a call with less args than the required prefix?");
	// Add one to account for `this`. It's a bit awkward here, but we don't count
	// `this` in similar places elsewhere.
	auto paramInfos =
	getExtParameterInfosForCall(proto, numPrefixArgs + 1, args.size());

	// FIXME: Kill copy.
	auto argTypes = getArgTypesForCall(Context, args);

	FunctionType::ExtInfo info = proto->getExtInfo();
	return arrangeLLVMFunctionInfo(GetReturnType(proto->getReturnType()),
	FnInfoOpts::IsInstanceMethod, argTypes, info,
	paramInfos, required);
	}

	const CGFunctionInfo &CodeGenTypes::arrangeNullaryFunction() {
	return arrangeLLVMFunctionInfo(getContext().VoidTy, FnInfoOpts::None,
	std::nullopt, FunctionType::ExtInfo(), {},
	RequiredArgs::All);
	}

	const CGFunctionInfo &
	CodeGenTypes::arrangeCall(const CGFunctionInfo &signature,
	const CallArgList &args) {
	assert(signature.arg_size() <= args.size());
	if (signature.arg_size() == args.size())
	return signature;

	SmallVector<FunctionProtoType::ExtParameterInfo, 16> paramInfos;
	auto sigParamInfos = signature.getExtParameterInfos();
	if (!sigParamInfos.empty()) {
	paramInfos.append(sigParamInfos.begin(), sigParamInfos.end());
	paramInfos.resize(args.size());
	}

	auto argTypes = getArgTypesForCall(Context, args);

	assert(signature.getRequiredArgs().allowsOptionalArgs());
	FnInfoOpts opts = FnInfoOpts::None;
	if (signature.isInstanceMethod())
	opts \|= FnInfoOpts::IsInstanceMethod;
	if (signature.isChainCall())
	opts \|= FnInfoOpts::IsChainCall;
	if (signature.isDelegateCall())
	opts \|= FnInfoOpts::IsDelegateCall;
	return arrangeLLVMFunctionInfo(signature.getReturnType(), opts, argTypes,
	signature.getExtInfo(), paramInfos,
	signature.getRequiredArgs());
	}

	namespace clang {
	namespace CodeGen {
	void computeSPIRKernelABIInfo(CodeGenModule &CGM, CGFunctionInfo &FI);
	}
	}

	/// Arrange the argument and result information for an abstract value
	/// of a given function type. This is the method which all of the
	/// above functions ultimately defer to.
	const CGFunctionInfo &CodeGenTypes::arrangeLLVMFunctionInfo(
	CanQualType resultType, FnInfoOpts opts, ArrayRef<CanQualType> argTypes,
	FunctionType::ExtInfo info,
	ArrayRef<FunctionProtoType::ExtParameterInfo> paramInfos,
	RequiredArgs required) {
	assert(llvm::all_of(argTypes,
	[](CanQualType T) { return T.isCanonicalAsParam(); }));

	// Lookup or create unique function info.
	llvm::FoldingSetNodeID ID;
	bool isInstanceMethod =
	(opts & FnInfoOpts::IsInstanceMethod) == FnInfoOpts::IsInstanceMethod;
	bool isChainCall =
	(opts & FnInfoOpts::IsChainCall) == FnInfoOpts::IsChainCall;
	bool isDelegateCall =
	(opts & FnInfoOpts::IsDelegateCall) == FnInfoOpts::IsDelegateCall;
	CGFunctionInfo::Profile(ID, isInstanceMethod, isChainCall, isDelegateCall,
	info, paramInfos, required, resultType, argTypes);

	void *insertPos = nullptr;
	CGFunctionInfo *FI = FunctionInfos.FindNodeOrInsertPos(ID, insertPos);
	if (FI)
	return *FI;

	unsigned CC = ClangCallConvToLLVMCallConv(info.getCC());

	// Construct the function info. We co-allocate the ArgInfos.
	FI = CGFunctionInfo::create(CC, isInstanceMethod, isChainCall, isDelegateCall,
	info, paramInfos, resultType, argTypes, required);
	FunctionInfos.InsertNode(FI, insertPos);

	bool inserted = FunctionsBeingProcessed.insert(FI).second;
	(void)inserted;
	assert(inserted && "Recursively being processed?");

	// Compute ABI information.
	if (CC == llvm::CallingConv::SPIR_KERNEL) {
	// Force target independent argument handling for the host visible
	// kernel functions.
	computeSPIRKernelABIInfo(CGM, *FI);
	} else if (info.getCC() == CC_Swift \|\| info.getCC() == CC_SwiftAsync) {
	swiftcall::computeABIInfo(CGM, *FI);
	} else {
	getABIInfo().computeInfo(*FI);
	}

	// Loop over all of the computed argument and return value info. If any of
	// them are direct or extend without a specified coerce type, specify the
	// default now.
	ABIArgInfo &retInfo = FI->getReturnInfo();
	if (retInfo.canHaveCoerceToType() && retInfo.getCoerceToType() == nullptr)
	retInfo.setCoerceToType(ConvertType(FI->getReturnType()));

	for (auto &I : FI->arguments())
	if (I.info.canHaveCoerceToType() && I.info.getCoerceToType() == nullptr)
	I.info.setCoerceToType(ConvertType(I.type));

	bool erased = FunctionsBeingProcessed.erase(FI); (void)erased;
	assert(erased && "Not in set?");

	return *FI;
	}

	CGFunctionInfo *CGFunctionInfo::create(unsigned llvmCC, bool instanceMethod,
	bool chainCall, bool delegateCall,
	const FunctionType::ExtInfo &info,
	ArrayRef<ExtParameterInfo> paramInfos,
	CanQualType resultType,
	ArrayRef<CanQualType> argTypes,
	RequiredArgs required) {
	assert(paramInfos.empty() \|\| paramInfos.size() == argTypes.size());
	assert(!required.allowsOptionalArgs() \|\|
	required.getNumRequiredArgs() <= argTypes.size());

	void *buffer =
	operator new(totalSizeToAlloc<ArgInfo, ExtParameterInfo>(
	argTypes.size() + 1, paramInfos.size()));

	CGFunctionInfo *FI = new(buffer) CGFunctionInfo();
	FI->CallingConvention = llvmCC;
	FI->EffectiveCallingConvention = llvmCC;
	FI->ASTCallingConvention = info.getCC();
	FI->InstanceMethod = instanceMethod;
	FI->ChainCall = chainCall;
	FI->DelegateCall = delegateCall;
	FI->CmseNSCall = info.getCmseNSCall();
	FI->NoReturn = info.getNoReturn();
	FI->ReturnsRetained = info.getProducesResult();
	FI->NoCallerSavedRegs = info.getNoCallerSavedRegs();
	FI->NoCfCheck = info.getNoCfCheck();
	FI->Required = required;
	FI->HasRegParm = info.getHasRegParm();
	FI->RegParm = info.getRegParm();
	FI->ArgStruct = nullptr;
	FI->ArgStructAlign = 0;
	FI->NumArgs = argTypes.size();
	FI->HasExtParameterInfos = !paramInfos.empty();
	FI->getArgsBuffer()[0].type = resultType;
	FI->MaxVectorWidth = 0;
	for (unsigned i = 0, e = argTypes.size(); i != e; ++i)
	FI->getArgsBuffer()[i + 1].type = argTypes[i];
	for (unsigned i = 0, e = paramInfos.size(); i != e; ++i)
	FI->getExtParameterInfosBuffer()[i] = paramInfos[i];
	return FI;
	}

	/***/

	namespace {
	// ABIArgInfo::Expand implementation.

	// Specifies the way QualType passed as ABIArgInfo::Expand is expanded.
	struct TypeExpansion {
	enum TypeExpansionKind {
	// Elements of constant arrays are expanded recursively.
	TEK_ConstantArray,
	// Record fields are expanded recursively (but if record is a union, only
	// the field with the largest size is expanded).
	TEK_Record,
	// For complex types, real and imaginary parts are expanded recursively.
	TEK_Complex,
	// All other types are not expandable.
	TEK_None
	};

	const TypeExpansionKind Kind;

	TypeExpansion(TypeExpansionKind K) : Kind(K) {}
	virtual ~TypeExpansion() {}
	};

	struct ConstantArrayExpansion : TypeExpansion {
	QualType EltTy;
	uint64_t NumElts;

	ConstantArrayExpansion(QualType EltTy, uint64_t NumElts)
	: TypeExpansion(TEK_ConstantArray), EltTy(EltTy), NumElts(NumElts) {}
	static bool classof(const TypeExpansion *TE) {
	return TE->Kind == TEK_ConstantArray;
	}
	};

	struct RecordExpansion : TypeExpansion {
	SmallVector<const CXXBaseSpecifier *, 1> Bases;

	SmallVector<const FieldDecl *, 1> Fields;

	RecordExpansion(SmallVector<const CXXBaseSpecifier *, 1> &&Bases,
	SmallVector<const FieldDecl *, 1> &&Fields)
	: TypeExpansion(TEK_Record), Bases(std::move(Bases)),
	Fields(std::move(Fields)) {}
	static bool classof(const TypeExpansion *TE) {
	return TE->Kind == TEK_Record;
	}
	};

	struct ComplexExpansion : TypeExpansion {
	QualType EltTy;

	ComplexExpansion(QualType EltTy) : TypeExpansion(TEK_Complex), EltTy(EltTy) {}
	static bool classof(const TypeExpansion *TE) {
	return TE->Kind == TEK_Complex;
	}
	};

	struct NoExpansion : TypeExpansion {
	NoExpansion() : TypeExpansion(TEK_None) {}
	static bool classof(const TypeExpansion *TE) {
	return TE->Kind == TEK_None;
	}
	};
	} // namespace

	static std::unique_ptr<TypeExpansion>
	getTypeExpansion(QualType Ty, const ASTContext &Context) {
	if (const ConstantArrayType *AT = Context.getAsConstantArrayType(Ty)) {
	return std::make_unique<ConstantArrayExpansion>(AT->getElementType(),
	AT->getZExtSize());
	}
	if (const RecordType *RT = Ty->getAs<RecordType>()) {
	SmallVector<const CXXBaseSpecifier *, 1> Bases;
	SmallVector<const FieldDecl *, 1> Fields;
	const RecordDecl *RD = RT->getDecl();
	assert(!RD->hasFlexibleArrayMember() &&
	"Cannot expand structure with flexible array.");
	if (RD->isUnion()) {
	// Unions can be here only in degenerative cases - all the fields are same
	// after flattening. Thus we have to use the "largest" field.
	const FieldDecl *LargestFD = nullptr;
	CharUnits UnionSize = CharUnits::Zero();

	for (const auto *FD : RD->fields()) {
	if (FD->isZeroLengthBitField(Context))
	continue;
	assert(!FD->isBitField() &&
	"Cannot expand structure with bit-field members.");
	CharUnits FieldSize = Context.getTypeSizeInChars(FD->getType());
	if (UnionSize < FieldSize) {
	UnionSize = FieldSize;
	LargestFD = FD;
	}
	}
	if (LargestFD)
	Fields.push_back(LargestFD);
	} else {
	if (const auto *CXXRD = dyn_cast<CXXRecordDecl>(RD)) {
	assert(!CXXRD->isDynamicClass() &&
	"cannot expand vtable pointers in dynamic classes");
	llvm::append_range(Bases, llvm::make_pointer_range(CXXRD->bases()));
	}

	for (const auto *FD : RD->fields()) {
	if (FD->isZeroLengthBitField(Context))
	continue;
	assert(!FD->isBitField() &&
	"Cannot expand structure with bit-field members.");
	Fields.push_back(FD);
	}
	}
	return std::make_unique<RecordExpansion>(std::move(Bases),
	std::move(Fields));
	}
	if (const ComplexType *CT = Ty->getAs<ComplexType>()) {
	return std::make_unique<ComplexExpansion>(CT->getElementType());
	}
	return std::make_unique<NoExpansion>();
	}

	static int getExpansionSize(QualType Ty, const ASTContext &Context) {
	auto Exp = getTypeExpansion(Ty, Context);
	if (auto CAExp = dyn_cast<ConstantArrayExpansion>(Exp.get())) {
	return CAExp->NumElts * getExpansionSize(CAExp->EltTy, Context);
	}
	if (auto RExp = dyn_cast<RecordExpansion>(Exp.get())) {
	int Res = 0;
	for (auto BS : RExp->Bases)
	Res += getExpansionSize(BS->getType(), Context);
	for (auto FD : RExp->Fields)
	Res += getExpansionSize(FD->getType(), Context);
	return Res;
	}
	if (isa<ComplexExpansion>(Exp.get()))
	return 2;
	assert(isa<NoExpansion>(Exp.get()));
	return 1;
	}

	void
	CodeGenTypes::getExpandedTypes(QualType Ty,
	SmallVectorImpl<llvm::Type *>::iterator &TI) {
	auto Exp = getTypeExpansion(Ty, Context);
	if (auto CAExp = dyn_cast<ConstantArrayExpansion>(Exp.get())) {
	for (int i = 0, n = CAExp->NumElts; i < n; i++) {
	getExpandedTypes(CAExp->EltTy, TI);
	}
	} else if (auto RExp = dyn_cast<RecordExpansion>(Exp.get())) {
	for (auto BS : RExp->Bases)
	getExpandedTypes(BS->getType(), TI);
	for (auto FD : RExp->Fields)
	getExpandedTypes(FD->getType(), TI);
	} else if (auto CExp = dyn_cast<ComplexExpansion>(Exp.get())) {
	llvm::Type *EltTy = ConvertType(CExp->EltTy);
	*TI++ = EltTy;
	*TI++ = EltTy;
	} else {
	assert(isa<NoExpansion>(Exp.get()));
	*TI++ = ConvertType(Ty);
	}
	}

	static void forConstantArrayExpansion(CodeGenFunction &CGF,
	ConstantArrayExpansion *CAE,
	Address BaseAddr,
	llvm::function_ref<void(Address)> Fn) {
	for (int i = 0, n = CAE->NumElts; i < n; i++) {
	Address EltAddr = CGF.Builder.CreateConstGEP2_32(BaseAddr, 0, i);
	Fn(EltAddr);
	}
	}

	void CodeGenFunction::ExpandTypeFromArgs(QualType Ty, LValue LV,
	llvm::Function::arg_iterator &AI) {
	assert(LV.isSimple() &&
	"Unexpected non-simple lvalue during struct expansion.");

	auto Exp = getTypeExpansion(Ty, getContext());
	if (auto CAExp = dyn_cast<ConstantArrayExpansion>(Exp.get())) {
	forConstantArrayExpansion(
	*this, CAExp, LV.getAddress(), [&](Address EltAddr) {
	LValue LV = MakeAddrLValue(EltAddr, CAExp->EltTy);
	ExpandTypeFromArgs(CAExp->EltTy, LV, AI);
	});
	} else if (auto RExp = dyn_cast<RecordExpansion>(Exp.get())) {
	Address This = LV.getAddress();
	for (const CXXBaseSpecifier *BS : RExp->Bases) {
	// Perform a single step derived-to-base conversion.
	Address Base =
	GetAddressOfBaseClass(This, Ty->getAsCXXRecordDecl(), &BS, &BS + 1,
	/NullCheckValue=/false, SourceLocation());
	LValue SubLV = MakeAddrLValue(Base, BS->getType());

	// Recurse onto bases.
	ExpandTypeFromArgs(BS->getType(), SubLV, AI);
	}
	for (auto FD : RExp->Fields) {
	// FIXME: What are the right qualifiers here?
	LValue SubLV = EmitLValueForFieldInitialization(LV, FD);
	ExpandTypeFromArgs(FD->getType(), SubLV, AI);
	}
	} else if (isa<ComplexExpansion>(Exp.get())) {
	auto realValue = &*AI++;
	auto imagValue = &*AI++;
	EmitStoreOfComplex(ComplexPairTy(realValue, imagValue), LV, /init/ true);
	} else {
	// Call EmitStoreOfScalar except when the lvalue is a bitfield to emit a
	// primitive store.
	assert(isa<NoExpansion>(Exp.get()));
	llvm::Value Arg = &AI++;
	if (LV.isBitField()) {
	EmitStoreThroughLValue(RValue::get(Arg), LV);
	} else {
	// TODO: currently there are some places are inconsistent in what LLVM
	// pointer type they use (see D118744). Once clang uses opaque pointers
	// all LLVM pointer types will be the same and we can remove this check.
	if (Arg->getType()->isPointerTy()) {
	Address Addr = LV.getAddress();
	Arg = Builder.CreateBitCast(Arg, Addr.getElementType());
	}
	EmitStoreOfScalar(Arg, LV);
	}
	}
	}

	void CodeGenFunction::ExpandTypeToArgs(
	QualType Ty, CallArg Arg, llvm::FunctionType *IRFuncTy,
	SmallVectorImpl<llvm::Value *> &IRCallArgs, unsigned &IRCallArgPos) {
	auto Exp = getTypeExpansion(Ty, getContext());
	if (auto CAExp = dyn_cast<ConstantArrayExpansion>(Exp.get())) {
	Address Addr = Arg.hasLValue() ? Arg.getKnownLValue().getAddress()
	: Arg.getKnownRValue().getAggregateAddress();
	forConstantArrayExpansion(
	*this, CAExp, Addr, [&](Address EltAddr) {
	CallArg EltArg = CallArg(
	convertTempToRValue(EltAddr, CAExp->EltTy, SourceLocation()),
	CAExp->EltTy);
	ExpandTypeToArgs(CAExp->EltTy, EltArg, IRFuncTy, IRCallArgs,
	IRCallArgPos);
	});
	} else if (auto RExp = dyn_cast<RecordExpansion>(Exp.get())) {
	Address This = Arg.hasLValue() ? Arg.getKnownLValue().getAddress()
	: Arg.getKnownRValue().getAggregateAddress();
	for (const CXXBaseSpecifier *BS : RExp->Bases) {
	// Perform a single step derived-to-base conversion.
	Address Base =
	GetAddressOfBaseClass(This, Ty->getAsCXXRecordDecl(), &BS, &BS + 1,
	/NullCheckValue=/false, SourceLocation());
	CallArg BaseArg = CallArg(RValue::getAggregate(Base), BS->getType());

	// Recurse onto bases.
	ExpandTypeToArgs(BS->getType(), BaseArg, IRFuncTy, IRCallArgs,
	IRCallArgPos);
	}

	LValue LV = MakeAddrLValue(This, Ty);
	for (auto FD : RExp->Fields) {
	CallArg FldArg =
	CallArg(EmitRValueForField(LV, FD, SourceLocation()), FD->getType());
	ExpandTypeToArgs(FD->getType(), FldArg, IRFuncTy, IRCallArgs,
	IRCallArgPos);
	}
	} else if (isa<ComplexExpansion>(Exp.get())) {
	ComplexPairTy CV = Arg.getKnownRValue().getComplexVal();
	IRCallArgs[IRCallArgPos++] = CV.first;
	IRCallArgs[IRCallArgPos++] = CV.second;
	} else {
	assert(isa<NoExpansion>(Exp.get()));
	auto RV = Arg.getKnownRValue();
	assert(RV.isScalar() &&
	"Unexpected non-scalar rvalue during struct expansion.");

	// Insert a bitcast as needed.
	llvm::Value *V = RV.getScalarVal();
	if (IRCallArgPos < IRFuncTy->getNumParams() &&
	V->getType() != IRFuncTy->getParamType(IRCallArgPos))
	V = Builder.CreateBitCast(V, IRFuncTy->getParamType(IRCallArgPos));

	IRCallArgs[IRCallArgPos++] = V;
	}
	}

	/// Create a temporary allocation for the purposes of coercion.
	static RawAddress CreateTempAllocaForCoercion(CodeGenFunction &CGF,
	llvm::Type *Ty,
	CharUnits MinAlign,
	const Twine &Name = "tmp") {
	// Don't use an alignment that's worse than what LLVM would prefer.
	auto PrefAlign = CGF.CGM.getDataLayout().getPrefTypeAlign(Ty);
	CharUnits Align = std::max(MinAlign, CharUnits::fromQuantity(PrefAlign));

	return CGF.CreateTempAlloca(Ty, Align, Name + ".coerce");
	}

	/// EnterStructPointerForCoercedAccess - Given a struct pointer that we are
	/// accessing some number of bytes out of it, try to gep into the struct to get
	/// at its inner goodness. Dive as deep as possible without entering an element
	/// with an in-memory size smaller than DstSize.
	static Address
	EnterStructPointerForCoercedAccess(Address SrcPtr,
	llvm::StructType *SrcSTy,
	uint64_t DstSize, CodeGenFunction &CGF) {
	// We can't dive into a zero-element struct.
	if (SrcSTy->getNumElements() == 0) return SrcPtr;

	llvm::Type *FirstElt = SrcSTy->getElementType(0);

	// If the first elt is at least as large as what we're looking for, or if the
	// first element is the same size as the whole struct, we can enter it. The
	// comparison must be made on the store size and not the alloca size. Using
	// the alloca size may overstate the size of the load.
	uint64_t FirstEltSize =
	CGF.CGM.getDataLayout().getTypeStoreSize(FirstElt);
	if (FirstEltSize < DstSize &&
	FirstEltSize < CGF.CGM.getDataLayout().getTypeStoreSize(SrcSTy))
	return SrcPtr;

	// GEP into the first element.
	SrcPtr = CGF.Builder.CreateStructGEP(SrcPtr, 0, "coerce.dive");

	// If the first element is a struct, recurse.
	llvm::Type *SrcTy = SrcPtr.getElementType();
	if (llvm::StructType *SrcSTy = dyn_cast<llvm::StructType>(SrcTy))
	return EnterStructPointerForCoercedAccess(SrcPtr, SrcSTy, DstSize, CGF);

	return SrcPtr;
	}

	/// CoerceIntOrPtrToIntOrPtr - Convert a value Val to the specific Ty where both
	/// are either integers or pointers. This does a truncation of the value if it
	/// is too large or a zero extension if it is too small.
	///
	/// This behaves as if the value were coerced through memory, so on big-endian
	/// targets the high bits are preserved in a truncation, while little-endian
	/// targets preserve the low bits.
	static llvm::Value CoerceIntOrPtrToIntOrPtr(llvm::Value Val,
	llvm::Type *Ty,
	CodeGenFunction &CGF) {
	if (Val->getType() == Ty)
	return Val;

	if (isa<llvm::PointerType>(Val->getType())) {
	// If this is Pointer->Pointer avoid conversion to and from int.
	if (isa<llvm::PointerType>(Ty))
	return CGF.Builder.CreateBitCast(Val, Ty, "coerce.val");

	// Convert the pointer to an integer so we can play with its width.
	Val = CGF.Builder.CreatePtrToInt(Val, CGF.IntPtrTy, "coerce.val.pi");
	}

	llvm::Type *DestIntTy = Ty;
	if (isa<llvm::PointerType>(DestIntTy))
	DestIntTy = CGF.IntPtrTy;

	if (Val->getType() != DestIntTy) {
	const llvm::DataLayout &DL = CGF.CGM.getDataLayout();
	if (DL.isBigEndian()) {
	// Preserve the high bits on big-endian targets.
	// That is what memory coercion does.
	uint64_t SrcSize = DL.getTypeSizeInBits(Val->getType());
	uint64_t DstSize = DL.getTypeSizeInBits(DestIntTy);

	if (SrcSize > DstSize) {
	Val = CGF.Builder.CreateLShr(Val, SrcSize - DstSize, "coerce.highbits");
	Val = CGF.Builder.CreateTrunc(Val, DestIntTy, "coerce.val.ii");
	} else {
	Val = CGF.Builder.CreateZExt(Val, DestIntTy, "coerce.val.ii");
	Val = CGF.Builder.CreateShl(Val, DstSize - SrcSize, "coerce.highbits");
	}
	} else {
	// Little-endian targets preserve the low bits. No shifts required.
	Val = CGF.Builder.CreateIntCast(Val, DestIntTy, false, "coerce.val.ii");
	}
	}

	if (isa<llvm::PointerType>(Ty))
	Val = CGF.Builder.CreateIntToPtr(Val, Ty, "coerce.val.ip");
	return Val;
	}



	/// CreateCoercedLoad - Create a load from \arg SrcPtr interpreted as
	/// a pointer to an object of type \arg Ty, known to be aligned to
	/// \arg SrcAlign bytes.
	///
	/// This safely handles the case when the src type is smaller than the
	/// destination type; in this situation the values of bits which not
	/// present in the src are undefined.
	static llvm::Value CreateCoercedLoad(Address Src, llvm::Type Ty,
	CodeGenFunction &CGF) {
	llvm::Type *SrcTy = Src.getElementType();

	// If SrcTy and Ty are the same, just do a load.
	if (SrcTy == Ty)
	return CGF.Builder.CreateLoad(Src);

	llvm::TypeSize DstSize = CGF.CGM.getDataLayout().getTypeAllocSize(Ty);

	if (llvm::StructType *SrcSTy = dyn_cast<llvm::StructType>(SrcTy)) {
	Src = EnterStructPointerForCoercedAccess(Src, SrcSTy,
	DstSize.getFixedValue(), CGF);
	SrcTy = Src.getElementType();
	}

	llvm::TypeSize SrcSize = CGF.CGM.getDataLayout().getTypeAllocSize(SrcTy);

	// If the source and destination are integer or pointer types, just do an
	// extension or truncation to the desired type.
	if ((isa<llvm::IntegerType>(Ty) \|\| isa<llvm::PointerType>(Ty)) &&
	(isa<llvm::IntegerType>(SrcTy) \|\| isa<llvm::PointerType>(SrcTy))) {
	llvm::Value *Load = CGF.Builder.CreateLoad(Src);
	return CoerceIntOrPtrToIntOrPtr(Load, Ty, CGF);
	}

	// If load is legal, just bitcast the src pointer.
	if (!SrcSize.isScalable() && !DstSize.isScalable() &&
	SrcSize.getFixedValue() >= DstSize.getFixedValue()) {
	// Generally SrcSize is never greater than DstSize, since this means we are
	// losing bits. However, this can happen in cases where the structure has
	// additional padding, for example due to a user specified alignment.
	//
	// FIXME: Assert that we aren't truncating non-padding bits when have access
	// to that information.
	Src = Src.withElementType(Ty);
	return CGF.Builder.CreateLoad(Src);
	}

	// If coercing a fixed vector to a scalable vector for ABI compatibility, and
	// the types match, use the llvm.vector.insert intrinsic to perform the
	// conversion.
	if (auto *ScalableDstTy = dyn_cast<llvm::ScalableVectorType>(Ty)) {
	if (auto *FixedSrcTy = dyn_cast<llvm::FixedVectorType>(SrcTy)) {
	// If we are casting a fixed i8 vector to a scalable i1 predicate
	// vector, use a vector insert and bitcast the result.
	if (ScalableDstTy->getElementType()->isIntegerTy(1) &&
	ScalableDstTy->getElementCount().isKnownMultipleOf(8) &&
	FixedSrcTy->getElementType()->isIntegerTy(8)) {
	ScalableDstTy = llvm::ScalableVectorType::get(
	FixedSrcTy->getElementType(),
	ScalableDstTy->getElementCount().getKnownMinValue() / 8);
	}
	if (ScalableDstTy->getElementType() == FixedSrcTy->getElementType()) {
	auto *Load = CGF.Builder.CreateLoad(Src);
	auto *UndefVec = llvm::UndefValue::get(ScalableDstTy);
	auto *Zero = llvm::Constant::getNullValue(CGF.CGM.Int64Ty);
	llvm::Value *Result = CGF.Builder.CreateInsertVector(
	ScalableDstTy, UndefVec, Load, Zero, "cast.scalable");
	if (ScalableDstTy != Ty)
	Result = CGF.Builder.CreateBitCast(Result, Ty);
	return Result;
	}
	}
	}

	// Otherwise do coercion through memory. This is stupid, but simple.
	RawAddress Tmp =
	CreateTempAllocaForCoercion(CGF, Ty, Src.getAlignment(), Src.getName());
	CGF.Builder.CreateMemCpy(
	Tmp.getPointer(), Tmp.getAlignment().getAsAlign(),
	Src.emitRawPointer(CGF), Src.getAlignment().getAsAlign(),
	llvm::ConstantInt::get(CGF.IntPtrTy, SrcSize.getKnownMinValue()));
	return CGF.Builder.CreateLoad(Tmp);
	}

	-// Function to store a first-class aggregate into memory. We prefer to
	-// store the elements rather than the aggregate to be more friendly to
	-// fast-isel.
	-// FIXME: Do we need to recurse here?
	-void CodeGenFunction::EmitAggregateStore(llvm::Value *Val, Address Dest,
	- bool DestIsVolatile) {
	- // Prefer scalar stores to first-class aggregate stores.
	- if (llvm::StructType *STy = dyn_cast<llvm::StructType>(Val->getType())) {
	- for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
	- Address EltPtr = Builder.CreateStructGEP(Dest, i);
	- llvm::Value *Elt = Builder.CreateExtractValue(Val, i);
	- Builder.CreateStore(Elt, EltPtr, DestIsVolatile);
	- }
	- } else {
	- Builder.CreateStore(Val, Dest, DestIsVolatile);
	- }
	-}
	-
	-/// CreateCoercedStore - Create a store to \arg DstPtr from \arg Src,
	-/// where the source and destination may have different types. The
	-/// destination is known to be aligned to \arg DstAlign bytes.
	-///
	-/// This safely handles the case when the src type is larger than the
	-/// destination type; the upper bits of the src will be lost.
	-static void CreateCoercedStore(llvm::Value *Src,
	- Address Dst,
	- bool DstIsVolatile,
	- CodeGenFunction &CGF) {
	- llvm::Type *SrcTy = Src->getType();
	- llvm::Type *DstTy = Dst.getElementType();
	- if (SrcTy == DstTy) {
	- CGF.Builder.CreateStore(Src, Dst, DstIsVolatile);
	- return;
	- }
	-
	- llvm::TypeSize SrcSize = CGF.CGM.getDataLayout().getTypeAllocSize(SrcTy);
	-
	- if (llvm::StructType *DstSTy = dyn_cast<llvm::StructType>(DstTy)) {
	- Dst = EnterStructPointerForCoercedAccess(Dst, DstSTy,
	- SrcSize.getFixedValue(), CGF);
	- DstTy = Dst.getElementType();
	- }
	-
	- llvm::PointerType *SrcPtrTy = llvm::dyn_cast<llvm::PointerType>(SrcTy);
	- llvm::PointerType *DstPtrTy = llvm::dyn_cast<llvm::PointerType>(DstTy);
	- if (SrcPtrTy && DstPtrTy &&
	- SrcPtrTy->getAddressSpace() != DstPtrTy->getAddressSpace()) {
	- Src = CGF.Builder.CreateAddrSpaceCast(Src, DstTy);
	- CGF.Builder.CreateStore(Src, Dst, DstIsVolatile);
	+void CodeGenFunction::CreateCoercedStore(llvm::Value *Src, Address Dst,
	+ llvm::TypeSize DstSize,
	+ bool DstIsVolatile) {
	+ if (!DstSize)
	return;
	- }

	- // If the source and destination are integer or pointer types, just do an
	- // extension or truncation to the desired type.
	- if ((isa<llvm::IntegerType>(SrcTy) \|\| isa<llvm::PointerType>(SrcTy)) &&
	- (isa<llvm::IntegerType>(DstTy) \|\| isa<llvm::PointerType>(DstTy))) {
	- Src = CoerceIntOrPtrToIntOrPtr(Src, DstTy, CGF);
	- CGF.Builder.CreateStore(Src, Dst, DstIsVolatile);
	- return;
	+ llvm::Type *SrcTy = Src->getType();
	+ llvm::TypeSize SrcSize = CGM.getDataLayout().getTypeAllocSize(SrcTy);
	+
	+ // GEP into structs to try to make types match.
	+ // FIXME: This isn't really that useful with opaque types, but it impacts a
	+ // lot of regression tests.
	+ if (SrcTy != Dst.getElementType()) {
	+ if (llvm::StructType *DstSTy =
	+ dyn_cast<llvm::StructType>(Dst.getElementType())) {
	+ assert(!SrcSize.isScalable());
	+ Dst = EnterStructPointerForCoercedAccess(Dst, DstSTy,
	+ SrcSize.getFixedValue(), *this);
	+ }
	}

	- llvm::TypeSize DstSize = CGF.CGM.getDataLayout().getTypeAllocSize(DstTy);
	-
	- // If store is legal, just bitcast the src pointer.
	- if (isa<llvm::ScalableVectorType>(SrcTy) \|\|
	- isa<llvm::ScalableVectorType>(DstTy) \|\|
	- SrcSize.getFixedValue() <= DstSize.getFixedValue()) {
	- Dst = Dst.withElementType(SrcTy);
	- CGF.EmitAggregateStore(Src, Dst, DstIsVolatile);
	+ if (SrcSize.isScalable() \|\| SrcSize <= DstSize) {
	+ if (SrcTy->isIntegerTy() && Dst.getElementType()->isPointerTy() &&
	+ SrcSize == CGM.getDataLayout().getTypeAllocSize(Dst.getElementType())) {
	+ // If the value is supposed to be a pointer, convert it before storing it.
	+ Src = CoerceIntOrPtrToIntOrPtr(Src, Dst.getElementType(), *this);
	+ Builder.CreateStore(Src, Dst, DstIsVolatile);
	+ } else if (llvm::StructType *STy =
	+ dyn_cast<llvm::StructType>(Src->getType())) {
	+ // Prefer scalar stores to first-class aggregate stores.
	+ Dst = Dst.withElementType(SrcTy);
	+ for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
	+ Address EltPtr = Builder.CreateStructGEP(Dst, i);
	+ llvm::Value *Elt = Builder.CreateExtractValue(Src, i);
	+ Builder.CreateStore(Elt, EltPtr, DstIsVolatile);
	+ }
	+ } else {
	+ Builder.CreateStore(Src, Dst.withElementType(SrcTy), DstIsVolatile);
	+ }
	+ } else if (SrcTy->isIntegerTy()) {
	+ // If the source is a simple integer, coerce it directly.
	+ llvm::Type DstIntTy = Builder.getIntNTy(DstSize.getFixedValue() 8);
	+ Src = CoerceIntOrPtrToIntOrPtr(Src, DstIntTy, *this);
	+ Builder.CreateStore(Src, Dst.withElementType(DstIntTy), DstIsVolatile);
	} else {
	// Otherwise do coercion through memory. This is stupid, but
	// simple.

	// Generally SrcSize is never greater than DstSize, since this means we are
	// losing bits. However, this can happen in cases where the structure has
	// additional padding, for example due to a user specified alignment.
	//
	// FIXME: Assert that we aren't truncating non-padding bits when have access
	// to that information.
	RawAddress Tmp =
	- CreateTempAllocaForCoercion(CGF, SrcTy, Dst.getAlignment());
	- CGF.Builder.CreateStore(Src, Tmp);
	- CGF.Builder.CreateMemCpy(
	- Dst.emitRawPointer(CGF), Dst.getAlignment().getAsAlign(),
	- Tmp.getPointer(), Tmp.getAlignment().getAsAlign(),
	- llvm::ConstantInt::get(CGF.IntPtrTy, DstSize.getFixedValue()));
	+ CreateTempAllocaForCoercion(*this, SrcTy, Dst.getAlignment());
	+ Builder.CreateStore(Src, Tmp);
	+ Builder.CreateMemCpy(Dst.emitRawPointer(*this),
	+ Dst.getAlignment().getAsAlign(), Tmp.getPointer(),
	+ Tmp.getAlignment().getAsAlign(),
	+ Builder.CreateTypeSize(IntPtrTy, DstSize));
	}
	}

	static Address emitAddressAtOffset(CodeGenFunction &CGF, Address addr,
	const ABIArgInfo &info) {
	if (unsigned offset = info.getDirectOffset()) {
	addr = addr.withElementType(CGF.Int8Ty);
	addr = CGF.Builder.CreateConstInBoundsByteGEP(addr,
	CharUnits::fromQuantity(offset));
	addr = addr.withElementType(info.getCoerceToType());
	}
	return addr;
	}

	namespace {

	/// Encapsulates information about the way function arguments from
	/// CGFunctionInfo should be passed to actual LLVM IR function.
	class ClangToLLVMArgMapping {
	static const unsigned InvalidIndex = ~0U;
	unsigned InallocaArgNo;
	unsigned SRetArgNo;
	unsigned TotalIRArgs;

	/// Arguments of LLVM IR function corresponding to single Clang argument.
	struct IRArgs {
	unsigned PaddingArgIndex;
	// Argument is expanded to IR arguments at positions
	// [FirstArgIndex, FirstArgIndex + NumberOfArgs).
	unsigned FirstArgIndex;
	unsigned NumberOfArgs;

	IRArgs()
	: PaddingArgIndex(InvalidIndex), FirstArgIndex(InvalidIndex),
	NumberOfArgs(0) {}
	};

	SmallVector<IRArgs, 8> ArgInfo;

	public:
	ClangToLLVMArgMapping(const ASTContext &Context, const CGFunctionInfo &FI,
	bool OnlyRequiredArgs = false)
	: InallocaArgNo(InvalidIndex), SRetArgNo(InvalidIndex), TotalIRArgs(0),
	ArgInfo(OnlyRequiredArgs ? FI.getNumRequiredArgs() : FI.arg_size()) {
	construct(Context, FI, OnlyRequiredArgs);
	}

	bool hasInallocaArg() const { return InallocaArgNo != InvalidIndex; }
	unsigned getInallocaArgNo() const {
	assert(hasInallocaArg());
	return InallocaArgNo;
	}

	bool hasSRetArg() const { return SRetArgNo != InvalidIndex; }
	unsigned getSRetArgNo() const {
	assert(hasSRetArg());
	return SRetArgNo;
	}

	unsigned totalIRArgs() const { return TotalIRArgs; }

	bool hasPaddingArg(unsigned ArgNo) const {
	assert(ArgNo < ArgInfo.size());
	return ArgInfo[ArgNo].PaddingArgIndex != InvalidIndex;
	}
	unsigned getPaddingArgNo(unsigned ArgNo) const {
	assert(hasPaddingArg(ArgNo));
	return ArgInfo[ArgNo].PaddingArgIndex;
	}

	/// Returns index of first IR argument corresponding to ArgNo, and their
	/// quantity.
	std::pair<unsigned, unsigned> getIRArgs(unsigned ArgNo) const {
	assert(ArgNo < ArgInfo.size());
	return std::make_pair(ArgInfo[ArgNo].FirstArgIndex,
	ArgInfo[ArgNo].NumberOfArgs);
	}

	private:
	void construct(const ASTContext &Context, const CGFunctionInfo &FI,
	bool OnlyRequiredArgs);
	};

	void ClangToLLVMArgMapping::construct(const ASTContext &Context,
	const CGFunctionInfo &FI,
	bool OnlyRequiredArgs) {
	unsigned IRArgNo = 0;
	bool SwapThisWithSRet = false;
	const ABIArgInfo &RetAI = FI.getReturnInfo();

	if (RetAI.getKind() == ABIArgInfo::Indirect) {
	SwapThisWithSRet = RetAI.isSRetAfterThis();
	SRetArgNo = SwapThisWithSRet ? 1 : IRArgNo++;
	}

	unsigned ArgNo = 0;
	unsigned NumArgs = OnlyRequiredArgs ? FI.getNumRequiredArgs() : FI.arg_size();
	for (CGFunctionInfo::const_arg_iterator I = FI.arg_begin(); ArgNo < NumArgs;
	++I, ++ArgNo) {
	assert(I != FI.arg_end());
	QualType ArgType = I->type;
	const ABIArgInfo &AI = I->info;
	// Collect data about IR arguments corresponding to Clang argument ArgNo.
	auto &IRArgs = ArgInfo[ArgNo];

	if (AI.getPaddingType())
	IRArgs.PaddingArgIndex = IRArgNo++;

	switch (AI.getKind()) {
	case ABIArgInfo::Extend:
	case ABIArgInfo::Direct: {
	// FIXME: handle sseregparm someday...
	llvm::StructType *STy = dyn_cast<llvm::StructType>(AI.getCoerceToType());
	if (AI.isDirect() && AI.getCanBeFlattened() && STy) {
	IRArgs.NumberOfArgs = STy->getNumElements();
	} else {
	IRArgs.NumberOfArgs = 1;
	}
	break;
	}
	case ABIArgInfo::Indirect:
	case ABIArgInfo::IndirectAliased:
	IRArgs.NumberOfArgs = 1;
	break;
	case ABIArgInfo::Ignore:
	case ABIArgInfo::InAlloca:
	// ignore and inalloca doesn't have matching LLVM parameters.
	IRArgs.NumberOfArgs = 0;
	break;
	case ABIArgInfo::CoerceAndExpand:
	IRArgs.NumberOfArgs = AI.getCoerceAndExpandTypeSequence().size();
	break;
	case ABIArgInfo::Expand:
	IRArgs.NumberOfArgs = getExpansionSize(ArgType, Context);
	break;
	}

	if (IRArgs.NumberOfArgs > 0) {
	IRArgs.FirstArgIndex = IRArgNo;
	IRArgNo += IRArgs.NumberOfArgs;
	}

	// Skip over the sret parameter when it comes second. We already handled it
	// above.
	if (IRArgNo == 1 && SwapThisWithSRet)
	IRArgNo++;
	}
	assert(ArgNo == ArgInfo.size());

	if (FI.usesInAlloca())
	InallocaArgNo = IRArgNo++;

	TotalIRArgs = IRArgNo;
	}
	} // namespace

	/***/

	bool CodeGenModule::ReturnTypeUsesSRet(const CGFunctionInfo &FI) {
	const auto &RI = FI.getReturnInfo();
	return RI.isIndirect() \|\| (RI.isInAlloca() && RI.getInAllocaSRet());
	}

	bool CodeGenModule::ReturnTypeHasInReg(const CGFunctionInfo &FI) {
	const auto &RI = FI.getReturnInfo();
	return RI.getInReg();
	}

	bool CodeGenModule::ReturnSlotInterferesWithArgs(const CGFunctionInfo &FI) {
	return ReturnTypeUsesSRet(FI) &&
	getTargetCodeGenInfo().doesReturnSlotInterfereWithArgs();
	}

	bool CodeGenModule::ReturnTypeUsesFPRet(QualType ResultType) {
	if (const BuiltinType *BT = ResultType->getAs<BuiltinType>()) {
	switch (BT->getKind()) {
	default:
	return false;
	case BuiltinType::Float:
	return getTarget().useObjCFPRetForRealType(FloatModeKind::Float);
	case BuiltinType::Double:
	return getTarget().useObjCFPRetForRealType(FloatModeKind::Double);
	case BuiltinType::LongDouble:
	return getTarget().useObjCFPRetForRealType(FloatModeKind::LongDouble);
	}
	}

	return false;
	}

	bool CodeGenModule::ReturnTypeUsesFP2Ret(QualType ResultType) {
	if (const ComplexType *CT = ResultType->getAs<ComplexType>()) {
	if (const BuiltinType *BT = CT->getElementType()->getAs<BuiltinType>()) {
	if (BT->getKind() == BuiltinType::LongDouble)
	return getTarget().useObjCFP2RetForComplexLongDouble();
	}
	}

	return false;
	}

	llvm::FunctionType *CodeGenTypes::GetFunctionType(GlobalDecl GD) {
	const CGFunctionInfo &FI = arrangeGlobalDeclaration(GD);
	return GetFunctionType(FI);
	}

	llvm::FunctionType *
	CodeGenTypes::GetFunctionType(const CGFunctionInfo &FI) {

	bool Inserted = FunctionsBeingProcessed.insert(&FI).second;
	(void)Inserted;
	assert(Inserted && "Recursively being processed?");

	llvm::Type *resultType = nullptr;
	const ABIArgInfo &retAI = FI.getReturnInfo();
	switch (retAI.getKind()) {
	case ABIArgInfo::Expand:
	case ABIArgInfo::IndirectAliased:
	llvm_unreachable("Invalid ABI kind for return argument");

	case ABIArgInfo::Extend:
	case ABIArgInfo::Direct:
	resultType = retAI.getCoerceToType();
	break;

	case ABIArgInfo::InAlloca:
	if (retAI.getInAllocaSRet()) {
	// sret things on win32 aren't void, they return the sret pointer.
	QualType ret = FI.getReturnType();
	unsigned addressSpace = CGM.getTypes().getTargetAddressSpace(ret);
	resultType = llvm::PointerType::get(getLLVMContext(), addressSpace);
	} else {
	resultType = llvm::Type::getVoidTy(getLLVMContext());
	}
	break;

	case ABIArgInfo::Indirect:
	case ABIArgInfo::Ignore:
	resultType = llvm::Type::getVoidTy(getLLVMContext());
	break;

	case ABIArgInfo::CoerceAndExpand:
	resultType = retAI.getUnpaddedCoerceAndExpandType();
	break;
	}

	ClangToLLVMArgMapping IRFunctionArgs(getContext(), FI, true);
	SmallVector<llvm::Type*, 8> ArgTypes(IRFunctionArgs.totalIRArgs());

	// Add type for sret argument.
	if (IRFunctionArgs.hasSRetArg()) {
	QualType Ret = FI.getReturnType();
	unsigned AddressSpace = CGM.getTypes().getTargetAddressSpace(Ret);
	ArgTypes[IRFunctionArgs.getSRetArgNo()] =
	llvm::PointerType::get(getLLVMContext(), AddressSpace);
	}

	// Add type for inalloca argument.
	if (IRFunctionArgs.hasInallocaArg())
	ArgTypes[IRFunctionArgs.getInallocaArgNo()] =
	llvm::PointerType::getUnqual(getLLVMContext());

	// Add in all of the required arguments.
	unsigned ArgNo = 0;
	CGFunctionInfo::const_arg_iterator it = FI.arg_begin(),
	ie = it + FI.getNumRequiredArgs();
	for (; it != ie; ++it, ++ArgNo) {
	const ABIArgInfo &ArgInfo = it->info;

	// Insert a padding type to ensure proper alignment.
	if (IRFunctionArgs.hasPaddingArg(ArgNo))
	ArgTypes[IRFunctionArgs.getPaddingArgNo(ArgNo)] =
	ArgInfo.getPaddingType();

	unsigned FirstIRArg, NumIRArgs;
	std::tie(FirstIRArg, NumIRArgs) = IRFunctionArgs.getIRArgs(ArgNo);

	switch (ArgInfo.getKind()) {
	case ABIArgInfo::Ignore:
	case ABIArgInfo::InAlloca:
	assert(NumIRArgs == 0);
	break;

	case ABIArgInfo::Indirect:
	assert(NumIRArgs == 1);
	// indirect arguments are always on the stack, which is alloca addr space.
	ArgTypes[FirstIRArg] = llvm::PointerType::get(
	getLLVMContext(), CGM.getDataLayout().getAllocaAddrSpace());
	break;
	case ABIArgInfo::IndirectAliased:
	assert(NumIRArgs == 1);
	ArgTypes[FirstIRArg] = llvm::PointerType::get(
	getLLVMContext(), ArgInfo.getIndirectAddrSpace());
	break;
	case ABIArgInfo::Extend:
	case ABIArgInfo::Direct: {
	// Fast-isel and the optimizer generally like scalar values better than
	// FCAs, so we flatten them if this is safe to do for this argument.
	llvm::Type *argType = ArgInfo.getCoerceToType();
	llvm::StructType *st = dyn_cast<llvm::StructType>(argType);
	if (st && ArgInfo.isDirect() && ArgInfo.getCanBeFlattened()) {
	assert(NumIRArgs == st->getNumElements());
	for (unsigned i = 0, e = st->getNumElements(); i != e; ++i)
	ArgTypes[FirstIRArg + i] = st->getElementType(i);
	} else {
	assert(NumIRArgs == 1);
	ArgTypes[FirstIRArg] = argType;
	}
	break;
	}

	case ABIArgInfo::CoerceAndExpand: {
	auto ArgTypesIter = ArgTypes.begin() + FirstIRArg;
	for (auto *EltTy : ArgInfo.getCoerceAndExpandTypeSequence()) {
	*ArgTypesIter++ = EltTy;
	}
	assert(ArgTypesIter == ArgTypes.begin() + FirstIRArg + NumIRArgs);
	break;
	}

	case ABIArgInfo::Expand:
	auto ArgTypesIter = ArgTypes.begin() + FirstIRArg;
	getExpandedTypes(it->type, ArgTypesIter);
	assert(ArgTypesIter == ArgTypes.begin() + FirstIRArg + NumIRArgs);
	break;
	}
	}

	bool Erased = FunctionsBeingProcessed.erase(&FI); (void)Erased;
	assert(Erased && "Not in set?");

	return llvm::FunctionType::get(resultType, ArgTypes, FI.isVariadic());
	}

	llvm::Type *CodeGenTypes::GetFunctionTypeForVTable(GlobalDecl GD) {
	const CXXMethodDecl *MD = cast<CXXMethodDecl>(GD.getDecl());
	const FunctionProtoType *FPT = MD->getType()->castAs<FunctionProtoType>();

	if (!isFuncTypeConvertible(FPT))
	return llvm::StructType::get(getLLVMContext());

	return GetFunctionType(GD);
	}

	static void AddAttributesFromFunctionProtoType(ASTContext &Ctx,
	llvm::AttrBuilder &FuncAttrs,
	const FunctionProtoType *FPT) {
	if (!FPT)
	return;

	if (!isUnresolvedExceptionSpec(FPT->getExceptionSpecType()) &&
	FPT->isNothrow())
	FuncAttrs.addAttribute(llvm::Attribute::NoUnwind);

	unsigned SMEBits = FPT->getAArch64SMEAttributes();
	if (SMEBits & FunctionType::SME_PStateSMEnabledMask)
	FuncAttrs.addAttribute("aarch64_pstate_sm_enabled");
	if (SMEBits & FunctionType::SME_PStateSMCompatibleMask)
	FuncAttrs.addAttribute("aarch64_pstate_sm_compatible");

	// ZA
	if (FunctionType::getArmZAState(SMEBits) == FunctionType::ARM_Preserves)
	FuncAttrs.addAttribute("aarch64_preserves_za");
	if (FunctionType::getArmZAState(SMEBits) == FunctionType::ARM_In)
	FuncAttrs.addAttribute("aarch64_in_za");
	if (FunctionType::getArmZAState(SMEBits) == FunctionType::ARM_Out)
	FuncAttrs.addAttribute("aarch64_out_za");
	if (FunctionType::getArmZAState(SMEBits) == FunctionType::ARM_InOut)
	FuncAttrs.addAttribute("aarch64_inout_za");

	// ZT0
	if (FunctionType::getArmZT0State(SMEBits) == FunctionType::ARM_Preserves)
	FuncAttrs.addAttribute("aarch64_preserves_zt0");
	if (FunctionType::getArmZT0State(SMEBits) == FunctionType::ARM_In)
	FuncAttrs.addAttribute("aarch64_in_zt0");
	if (FunctionType::getArmZT0State(SMEBits) == FunctionType::ARM_Out)
	FuncAttrs.addAttribute("aarch64_out_zt0");
	if (FunctionType::getArmZT0State(SMEBits) == FunctionType::ARM_InOut)
	FuncAttrs.addAttribute("aarch64_inout_zt0");
	}

	static void AddAttributesFromOMPAssumes(llvm::AttrBuilder &FuncAttrs,
	const Decl *Callee) {
	if (!Callee)
	return;

	SmallVector<StringRef, 4> Attrs;

	for (const OMPAssumeAttr *AA : Callee->specific_attrs<OMPAssumeAttr>())
	AA->getAssumption().split(Attrs, ",");

	if (!Attrs.empty())
	FuncAttrs.addAttribute(llvm::AssumptionAttrKey,
	llvm::join(Attrs.begin(), Attrs.end(), ","));
	}

	bool CodeGenModule::MayDropFunctionReturn(const ASTContext &Context,
	QualType ReturnType) const {
	// We can't just discard the return value for a record type with a
	// complex destructor or a non-trivially copyable type.
	if (const RecordType *RT =
	ReturnType.getCanonicalType()->getAs<RecordType>()) {
	if (const auto *ClassDecl = dyn_cast<CXXRecordDecl>(RT->getDecl()))
	return ClassDecl->hasTrivialDestructor();
	}
	return ReturnType.isTriviallyCopyableType(Context);
	}

	static bool HasStrictReturn(const CodeGenModule &Module, QualType RetTy,
	const Decl *TargetDecl) {
	// As-is msan can not tolerate noundef mismatch between caller and
	// implementation. Mismatch is possible for e.g. indirect calls from C-caller
	// into C++. Such mismatches lead to confusing false reports. To avoid
	// expensive workaround on msan we enforce initialization event in uncommon
	// cases where it's allowed.
	if (Module.getLangOpts().Sanitize.has(SanitizerKind::Memory))
	return true;
	// C++ explicitly makes returning undefined values UB. C's rule only applies
	// to used values, so we never mark them noundef for now.
	if (!Module.getLangOpts().CPlusPlus)
	return false;
	if (TargetDecl) {
	if (const FunctionDecl *FDecl = dyn_cast<FunctionDecl>(TargetDecl)) {
	if (FDecl->isExternC())
	return false;
	} else if (const VarDecl *VDecl = dyn_cast<VarDecl>(TargetDecl)) {
	// Function pointer.
	if (VDecl->isExternC())
	return false;
	}
	}

	// We don't want to be too aggressive with the return checking, unless
	// it's explicit in the code opts or we're using an appropriate sanitizer.
	// Try to respect what the programmer intended.
	return Module.getCodeGenOpts().StrictReturn \|\|
	!Module.MayDropFunctionReturn(Module.getContext(), RetTy) \|\|
	Module.getLangOpts().Sanitize.has(SanitizerKind::Return);
	}

	/// Add denormal-fp-math and denormal-fp-math-f32 as appropriate for the
	/// requested denormal behavior, accounting for the overriding behavior of the
	/// -f32 case.
	static void addDenormalModeAttrs(llvm::DenormalMode FPDenormalMode,
	llvm::DenormalMode FP32DenormalMode,
	llvm::AttrBuilder &FuncAttrs) {
	if (FPDenormalMode != llvm::DenormalMode::getDefault())
	FuncAttrs.addAttribute("denormal-fp-math", FPDenormalMode.str());

	if (FP32DenormalMode != FPDenormalMode && FP32DenormalMode.isValid())
	FuncAttrs.addAttribute("denormal-fp-math-f32", FP32DenormalMode.str());
	}

	/// Add default attributes to a function, which have merge semantics under
	/// -mlink-builtin-bitcode and should not simply overwrite any existing
	/// attributes in the linked library.
	static void
	addMergableDefaultFunctionAttributes(const CodeGenOptions &CodeGenOpts,
	llvm::AttrBuilder &FuncAttrs) {
	addDenormalModeAttrs(CodeGenOpts.FPDenormalMode, CodeGenOpts.FP32DenormalMode,
	FuncAttrs);
	}

	static void getTrivialDefaultFunctionAttributes(
	StringRef Name, bool HasOptnone, const CodeGenOptions &CodeGenOpts,
	const LangOptions &LangOpts, bool AttrOnCallSite,
	llvm::AttrBuilder &FuncAttrs) {
	// OptimizeNoneAttr takes precedence over -Os or -Oz. No warning needed.
	if (!HasOptnone) {
	if (CodeGenOpts.OptimizeSize)
	FuncAttrs.addAttribute(llvm::Attribute::OptimizeForSize);
	if (CodeGenOpts.OptimizeSize == 2)
	FuncAttrs.addAttribute(llvm::Attribute::MinSize);
	}

	if (CodeGenOpts.DisableRedZone)
	FuncAttrs.addAttribute(llvm::Attribute::NoRedZone);
	if (CodeGenOpts.IndirectTlsSegRefs)
	FuncAttrs.addAttribute("indirect-tls-seg-refs");
	if (CodeGenOpts.NoImplicitFloat)
	FuncAttrs.addAttribute(llvm::Attribute::NoImplicitFloat);

	if (AttrOnCallSite) {
	// Attributes that should go on the call site only.
	// FIXME: Look for 'BuiltinAttr' on the function rather than re-checking
	// the -fno-builtin-foo list.
	if (!CodeGenOpts.SimplifyLibCalls \|\| LangOpts.isNoBuiltinFunc(Name))
	FuncAttrs.addAttribute(llvm::Attribute::NoBuiltin);
	if (!CodeGenOpts.TrapFuncName.empty())
	FuncAttrs.addAttribute("trap-func-name", CodeGenOpts.TrapFuncName);
	} else {
	switch (CodeGenOpts.getFramePointer()) {
	case CodeGenOptions::FramePointerKind::None:
	// This is the default behavior.
	break;
	case CodeGenOptions::FramePointerKind::Reserved:
	case CodeGenOptions::FramePointerKind::NonLeaf:
	case CodeGenOptions::FramePointerKind::All:
	FuncAttrs.addAttribute("frame-pointer",
	CodeGenOptions::getFramePointerKindName(
	CodeGenOpts.getFramePointer()));
	}

	if (CodeGenOpts.LessPreciseFPMAD)
	FuncAttrs.addAttribute("less-precise-fpmad", "true");

	if (CodeGenOpts.NullPointerIsValid)
	FuncAttrs.addAttribute(llvm::Attribute::NullPointerIsValid);

	if (LangOpts.getDefaultExceptionMode() == LangOptions::FPE_Ignore)
	FuncAttrs.addAttribute("no-trapping-math", "true");

	// TODO: Are these all needed?
	// unsafe/inf/nan/nsz are handled by instruction-level FastMathFlags.
	if (LangOpts.NoHonorInfs)
	FuncAttrs.addAttribute("no-infs-fp-math", "true");
	if (LangOpts.NoHonorNaNs)
	FuncAttrs.addAttribute("no-nans-fp-math", "true");
	if (LangOpts.ApproxFunc)
	FuncAttrs.addAttribute("approx-func-fp-math", "true");
	if (LangOpts.AllowFPReassoc && LangOpts.AllowRecip &&
	LangOpts.NoSignedZero && LangOpts.ApproxFunc &&
	(LangOpts.getDefaultFPContractMode() ==
	LangOptions::FPModeKind::FPM_Fast \|\|
	LangOpts.getDefaultFPContractMode() ==
	LangOptions::FPModeKind::FPM_FastHonorPragmas))
	FuncAttrs.addAttribute("unsafe-fp-math", "true");
	if (CodeGenOpts.SoftFloat)
	FuncAttrs.addAttribute("use-soft-float", "true");
	FuncAttrs.addAttribute("stack-protector-buffer-size",
	llvm::utostr(CodeGenOpts.SSPBufferSize));
	if (LangOpts.NoSignedZero)
	FuncAttrs.addAttribute("no-signed-zeros-fp-math", "true");

	// TODO: Reciprocal estimate codegen options should apply to instructions?
	const std::vector<std::string> &Recips = CodeGenOpts.Reciprocals;
	if (!Recips.empty())
	FuncAttrs.addAttribute("reciprocal-estimates",
	llvm::join(Recips, ","));

	if (!CodeGenOpts.PreferVectorWidth.empty() &&
	CodeGenOpts.PreferVectorWidth != "none")
	FuncAttrs.addAttribute("prefer-vector-width",
	CodeGenOpts.PreferVectorWidth);

	if (CodeGenOpts.StackRealignment)
	FuncAttrs.addAttribute("stackrealign");
	if (CodeGenOpts.Backchain)
	FuncAttrs.addAttribute("backchain");
	if (CodeGenOpts.EnableSegmentedStacks)
	FuncAttrs.addAttribute("split-stack");

	if (CodeGenOpts.SpeculativeLoadHardening)
	FuncAttrs.addAttribute(llvm::Attribute::SpeculativeLoadHardening);

	// Add zero-call-used-regs attribute.
	switch (CodeGenOpts.getZeroCallUsedRegs()) {
	case llvm::ZeroCallUsedRegs::ZeroCallUsedRegsKind::Skip:
	FuncAttrs.removeAttribute("zero-call-used-regs");
	break;
	case llvm::ZeroCallUsedRegs::ZeroCallUsedRegsKind::UsedGPRArg:
	FuncAttrs.addAttribute("zero-call-used-regs", "used-gpr-arg");
	break;
	case llvm::ZeroCallUsedRegs::ZeroCallUsedRegsKind::UsedGPR:
	FuncAttrs.addAttribute("zero-call-used-regs", "used-gpr");
	break;
	case llvm::ZeroCallUsedRegs::ZeroCallUsedRegsKind::UsedArg:
	FuncAttrs.addAttribute("zero-call-used-regs", "used-arg");
	break;
	case llvm::ZeroCallUsedRegs::ZeroCallUsedRegsKind::Used:
	FuncAttrs.addAttribute("zero-call-used-regs", "used");
	break;
	case llvm::ZeroCallUsedRegs::ZeroCallUsedRegsKind::AllGPRArg:
	FuncAttrs.addAttribute("zero-call-used-regs", "all-gpr-arg");
	break;
	case llvm::ZeroCallUsedRegs::ZeroCallUsedRegsKind::AllGPR:
	FuncAttrs.addAttribute("zero-call-used-regs", "all-gpr");
	break;
	case llvm::ZeroCallUsedRegs::ZeroCallUsedRegsKind::AllArg:
	FuncAttrs.addAttribute("zero-call-used-regs", "all-arg");
	break;
	case llvm::ZeroCallUsedRegs::ZeroCallUsedRegsKind::All:
	FuncAttrs.addAttribute("zero-call-used-regs", "all");
	break;
	}
	}

	if (LangOpts.assumeFunctionsAreConvergent()) {
	// Conservatively, mark all functions and calls in CUDA and OpenCL as
	// convergent (meaning, they may call an intrinsically convergent op, such
	// as __syncthreads() / barrier(), and so can't have certain optimizations
	// applied around them). LLVM will remove this attribute where it safely
	// can.
	FuncAttrs.addAttribute(llvm::Attribute::Convergent);
	}

	// TODO: NoUnwind attribute should be added for other GPU modes HIP,
	// OpenMP offload. AFAIK, neither of them support exceptions in device code.
	if ((LangOpts.CUDA && LangOpts.CUDAIsDevice) \|\| LangOpts.OpenCL \|\|
	LangOpts.SYCLIsDevice) {
	FuncAttrs.addAttribute(llvm::Attribute::NoUnwind);
	}

	for (StringRef Attr : CodeGenOpts.DefaultFunctionAttrs) {
	StringRef Var, Value;
	std::tie(Var, Value) = Attr.split('=');
	FuncAttrs.addAttribute(Var, Value);
	}

	TargetInfo::BranchProtectionInfo BPI(LangOpts);
	TargetCodeGenInfo::initBranchProtectionFnAttributes(BPI, FuncAttrs);
	}

	/// Merges `target-features` from \TargetOpts and \F, and sets the result in
	/// \FuncAttr
	/// * features from \F are always kept
	/// * a feature from \TargetOpts is kept if itself and its opposite are absent
	/// from \F
	static void
	overrideFunctionFeaturesWithTargetFeatures(llvm::AttrBuilder &FuncAttr,
	const llvm::Function &F,
	const TargetOptions &TargetOpts) {
	auto FFeatures = F.getFnAttribute("target-features");

	llvm::StringSet<> MergedNames;
	SmallVector<StringRef> MergedFeatures;
	MergedFeatures.reserve(TargetOpts.Features.size());

	auto AddUnmergedFeatures = [&](auto &&FeatureRange) {
	for (StringRef Feature : FeatureRange) {
	if (Feature.empty())
	continue;
	assert(Feature[0] == '+' \|\| Feature[0] == '-');
	StringRef Name = Feature.drop_front(1);
	bool Merged = !MergedNames.insert(Name).second;
	if (!Merged)
	MergedFeatures.push_back(Feature);
	}
	};

	if (FFeatures.isValid())
	AddUnmergedFeatures(llvm::split(FFeatures.getValueAsString(), ','));
	AddUnmergedFeatures(TargetOpts.Features);

	if (!MergedFeatures.empty()) {
	llvm::sort(MergedFeatures);
	FuncAttr.addAttribute("target-features", llvm::join(MergedFeatures, ","));
	}
	}

	void CodeGen::mergeDefaultFunctionDefinitionAttributes(
	llvm::Function &F, const CodeGenOptions &CodeGenOpts,
	const LangOptions &LangOpts, const TargetOptions &TargetOpts,
	bool WillInternalize) {

	llvm::AttrBuilder FuncAttrs(F.getContext());
	// Here we only extract the options that are relevant compared to the version
	// from GetCPUAndFeaturesAttributes.
	if (!TargetOpts.CPU.empty())
	FuncAttrs.addAttribute("target-cpu", TargetOpts.CPU);
	if (!TargetOpts.TuneCPU.empty())
	FuncAttrs.addAttribute("tune-cpu", TargetOpts.TuneCPU);

	::getTrivialDefaultFunctionAttributes(F.getName(), F.hasOptNone(),
	CodeGenOpts, LangOpts,
	/AttrOnCallSite=/false, FuncAttrs);

	if (!WillInternalize && F.isInterposable()) {
	// Do not promote "dynamic" denormal-fp-math to this translation unit's
	// setting for weak functions that won't be internalized. The user has no
	// real control for how builtin bitcode is linked, so we shouldn't assume
	// later copies will use a consistent mode.
	F.addFnAttrs(FuncAttrs);
	return;
	}

	llvm::AttributeMask AttrsToRemove;

	llvm::DenormalMode DenormModeToMerge = F.getDenormalModeRaw();
	llvm::DenormalMode DenormModeToMergeF32 = F.getDenormalModeF32Raw();
	llvm::DenormalMode Merged =
	CodeGenOpts.FPDenormalMode.mergeCalleeMode(DenormModeToMerge);
	llvm::DenormalMode MergedF32 = CodeGenOpts.FP32DenormalMode;

	if (DenormModeToMergeF32.isValid()) {
	MergedF32 =
	CodeGenOpts.FP32DenormalMode.mergeCalleeMode(DenormModeToMergeF32);
	}

	if (Merged == llvm::DenormalMode::getDefault()) {
	AttrsToRemove.addAttribute("denormal-fp-math");
	} else if (Merged != DenormModeToMerge) {
	// Overwrite existing attribute
	FuncAttrs.addAttribute("denormal-fp-math",
	CodeGenOpts.FPDenormalMode.str());
	}

	if (MergedF32 == llvm::DenormalMode::getDefault()) {
	AttrsToRemove.addAttribute("denormal-fp-math-f32");
	} else if (MergedF32 != DenormModeToMergeF32) {
	// Overwrite existing attribute
	FuncAttrs.addAttribute("denormal-fp-math-f32",
	CodeGenOpts.FP32DenormalMode.str());
	}

	F.removeFnAttrs(AttrsToRemove);
	addDenormalModeAttrs(Merged, MergedF32, FuncAttrs);

	overrideFunctionFeaturesWithTargetFeatures(FuncAttrs, F, TargetOpts);

	F.addFnAttrs(FuncAttrs);
	}

	void CodeGenModule::getTrivialDefaultFunctionAttributes(
	StringRef Name, bool HasOptnone, bool AttrOnCallSite,
	llvm::AttrBuilder &FuncAttrs) {
	::getTrivialDefaultFunctionAttributes(Name, HasOptnone, getCodeGenOpts(),
	getLangOpts(), AttrOnCallSite,
	FuncAttrs);
	}

	void CodeGenModule::getDefaultFunctionAttributes(StringRef Name,
	bool HasOptnone,
	bool AttrOnCallSite,
	llvm::AttrBuilder &FuncAttrs) {
	getTrivialDefaultFunctionAttributes(Name, HasOptnone, AttrOnCallSite,
	FuncAttrs);
	// If we're just getting the default, get the default values for mergeable
	// attributes.
	if (!AttrOnCallSite)
	addMergableDefaultFunctionAttributes(CodeGenOpts, FuncAttrs);
	}

	void CodeGenModule::addDefaultFunctionDefinitionAttributes(
	llvm::AttrBuilder &attrs) {
	getDefaultFunctionAttributes(/function name/ "", /optnone/ false,
	/for call/ false, attrs);
	GetCPUAndFeaturesAttributes(GlobalDecl(), attrs);
	}

	static void addNoBuiltinAttributes(llvm::AttrBuilder &FuncAttrs,
	const LangOptions &LangOpts,
	const NoBuiltinAttr *NBA = nullptr) {
	auto AddNoBuiltinAttr = [&FuncAttrs](StringRef BuiltinName) {
	SmallString<32> AttributeName;
	AttributeName += "no-builtin-";
	AttributeName += BuiltinName;
	FuncAttrs.addAttribute(AttributeName);
	};

	// First, handle the language options passed through -fno-builtin.
	if (LangOpts.NoBuiltin) {
	// -fno-builtin disables them all.
	FuncAttrs.addAttribute("no-builtins");
	return;
	}

	// Then, add attributes for builtins specified through -fno-builtin-<name>.
	llvm::for_each(LangOpts.NoBuiltinFuncs, AddNoBuiltinAttr);

	// Now, let's check the __attribute__((no_builtin("...")) attribute added to
	// the source.
	if (!NBA)
	return;

	// If there is a wildcard in the builtin names specified through the
	// attribute, disable them all.
	if (llvm::is_contained(NBA->builtinNames(), "*")) {
	FuncAttrs.addAttribute("no-builtins");
	return;
	}

	// And last, add the rest of the builtin names.
	llvm::for_each(NBA->builtinNames(), AddNoBuiltinAttr);
	}

	static bool DetermineNoUndef(QualType QTy, CodeGenTypes &Types,
	const llvm::DataLayout &DL, const ABIArgInfo &AI,
	bool CheckCoerce = true) {
	llvm::Type *Ty = Types.ConvertTypeForMem(QTy);
	if (AI.getKind() == ABIArgInfo::Indirect \|\|
	AI.getKind() == ABIArgInfo::IndirectAliased)
	return true;
	if (AI.getKind() == ABIArgInfo::Extend)
	return true;
	if (!DL.typeSizeEqualsStoreSize(Ty))
	// TODO: This will result in a modest amount of values not marked noundef
	// when they could be. We care about values that invisibly contain undef
	// bits from the perspective of LLVM IR.
	return false;
	if (CheckCoerce && AI.canHaveCoerceToType()) {
	llvm::Type *CoerceTy = AI.getCoerceToType();
	if (llvm::TypeSize::isKnownGT(DL.getTypeSizeInBits(CoerceTy),
	DL.getTypeSizeInBits(Ty)))
	// If we're coercing to a type with a greater size than the canonical one,
	// we're introducing new undef bits.
	// Coercing to a type of smaller or equal size is ok, as we know that
	// there's no internal padding (typeSizeEqualsStoreSize).
	return false;
	}
	if (QTy->isBitIntType())
	return true;
	if (QTy->isReferenceType())
	return true;
	if (QTy->isNullPtrType())
	return false;
	if (QTy->isMemberPointerType())
	// TODO: Some member pointers are `noundef`, but it depends on the ABI. For
	// now, never mark them.
	return false;
	if (QTy->isScalarType()) {
	if (const ComplexType *Complex = dyn_cast<ComplexType>(QTy))
	return DetermineNoUndef(Complex->getElementType(), Types, DL, AI, false);
	return true;
	}
	if (const VectorType *Vector = dyn_cast<VectorType>(QTy))
	return DetermineNoUndef(Vector->getElementType(), Types, DL, AI, false);
	if (const MatrixType *Matrix = dyn_cast<MatrixType>(QTy))
	return DetermineNoUndef(Matrix->getElementType(), Types, DL, AI, false);
	if (const ArrayType *Array = dyn_cast<ArrayType>(QTy))
	return DetermineNoUndef(Array->getElementType(), Types, DL, AI, false);

	// TODO: Some structs may be `noundef`, in specific situations.
	return false;
	}

	/// Check if the argument of a function has maybe_undef attribute.
	static bool IsArgumentMaybeUndef(const Decl *TargetDecl,
	unsigned NumRequiredArgs, unsigned ArgNo) {
	const auto *FD = dyn_cast_or_null<FunctionDecl>(TargetDecl);
	if (!FD)
	return false;

	// Assume variadic arguments do not have maybe_undef attribute.
	if (ArgNo >= NumRequiredArgs)
	return false;

	// Check if argument has maybe_undef attribute.
	if (ArgNo < FD->getNumParams()) {
	const ParmVarDecl *Param = FD->getParamDecl(ArgNo);
	if (Param && Param->hasAttr<MaybeUndefAttr>())
	return true;
	}

	return false;
	}

	/// Test if it's legal to apply nofpclass for the given parameter type and it's
	/// lowered IR type.
	static bool canApplyNoFPClass(const ABIArgInfo &AI, QualType ParamType,
	bool IsReturn) {
	// Should only apply to FP types in the source, not ABI promoted.
	if (!ParamType->hasFloatingRepresentation())
	return false;

	// The promoted-to IR type also needs to support nofpclass.
	llvm::Type *IRTy = AI.getCoerceToType();
	if (llvm::AttributeFuncs::isNoFPClassCompatibleType(IRTy))
	return true;

	if (llvm::StructType *ST = dyn_cast<llvm::StructType>(IRTy)) {
	return !IsReturn && AI.getCanBeFlattened() &&
	llvm::all_of(ST->elements(), [](llvm::Type *Ty) {
	return llvm::AttributeFuncs::isNoFPClassCompatibleType(Ty);
	});
	}

	return false;
	}

	/// Return the nofpclass mask that can be applied to floating-point parameters.
	static llvm::FPClassTest getNoFPClassTestMask(const LangOptions &LangOpts) {
	llvm::FPClassTest Mask = llvm::fcNone;
	if (LangOpts.NoHonorInfs)
	Mask \|= llvm::fcInf;
	if (LangOpts.NoHonorNaNs)
	Mask \|= llvm::fcNan;
	return Mask;
	}

	void CodeGenModule::AdjustMemoryAttribute(StringRef Name,
	CGCalleeInfo CalleeInfo,
	llvm::AttributeList &Attrs) {
	if (Attrs.getMemoryEffects().getModRef() == llvm::ModRefInfo::NoModRef) {
	Attrs = Attrs.removeFnAttribute(getLLVMContext(), llvm::Attribute::Memory);
	llvm::Attribute MemoryAttr = llvm::Attribute::getWithMemoryEffects(
	getLLVMContext(), llvm::MemoryEffects::writeOnly());
	Attrs = Attrs.addFnAttribute(getLLVMContext(), MemoryAttr);
	}
	}

	/// Construct the IR attribute list of a function or call.
	///
	/// When adding an attribute, please consider where it should be handled:
	///
	/// - getDefaultFunctionAttributes is for attributes that are essentially
	/// part of the global target configuration (but perhaps can be
	/// overridden on a per-function basis). Adding attributes there
	/// will cause them to also be set in frontends that build on Clang's
	/// target-configuration logic, as well as for code defined in library
	/// modules such as CUDA's libdevice.
	///
	/// - ConstructAttributeList builds on top of getDefaultFunctionAttributes
	/// and adds declaration-specific, convention-specific, and
	/// frontend-specific logic. The last is of particular importance:
	/// attributes that restrict how the frontend generates code must be
	/// added here rather than getDefaultFunctionAttributes.
	///
	void CodeGenModule::ConstructAttributeList(StringRef Name,
	const CGFunctionInfo &FI,
	CGCalleeInfo CalleeInfo,
	llvm::AttributeList &AttrList,
	unsigned &CallingConv,
	bool AttrOnCallSite, bool IsThunk) {
	llvm::AttrBuilder FuncAttrs(getLLVMContext());
	llvm::AttrBuilder RetAttrs(getLLVMContext());

	// Collect function IR attributes from the CC lowering.
	// We'll collect the paramete and result attributes later.
	CallingConv = FI.getEffectiveCallingConvention();
	if (FI.isNoReturn())
	FuncAttrs.addAttribute(llvm::Attribute::NoReturn);
	if (FI.isCmseNSCall())
	FuncAttrs.addAttribute("cmse_nonsecure_call");

	// Collect function IR attributes from the callee prototype if we have one.
	AddAttributesFromFunctionProtoType(getContext(), FuncAttrs,
	CalleeInfo.getCalleeFunctionProtoType());

	const Decl *TargetDecl = CalleeInfo.getCalleeDecl().getDecl();

	// Attach assumption attributes to the declaration. If this is a call
	// site, attach assumptions from the caller to the call as well.
	AddAttributesFromOMPAssumes(FuncAttrs, TargetDecl);

	bool HasOptnone = false;
	// The NoBuiltinAttr attached to the target FunctionDecl.
	const NoBuiltinAttr *NBA = nullptr;

	// Some ABIs may result in additional accesses to arguments that may
	// otherwise not be present.
	auto AddPotentialArgAccess = [&]() {
	llvm::Attribute A = FuncAttrs.getAttribute(llvm::Attribute::Memory);
	if (A.isValid())
	FuncAttrs.addMemoryAttr(A.getMemoryEffects() \|
	llvm::MemoryEffects::argMemOnly());
	};

	// Collect function IR attributes based on declaration-specific
	// information.
	// FIXME: handle sseregparm someday...
	if (TargetDecl) {
	if (TargetDecl->hasAttr<ReturnsTwiceAttr>())
	FuncAttrs.addAttribute(llvm::Attribute::ReturnsTwice);
	if (TargetDecl->hasAttr<NoThrowAttr>())
	FuncAttrs.addAttribute(llvm::Attribute::NoUnwind);
	if (TargetDecl->hasAttr<NoReturnAttr>())
	FuncAttrs.addAttribute(llvm::Attribute::NoReturn);
	if (TargetDecl->hasAttr<ColdAttr>())
	FuncAttrs.addAttribute(llvm::Attribute::Cold);
	if (TargetDecl->hasAttr<HotAttr>())
	FuncAttrs.addAttribute(llvm::Attribute::Hot);
	if (TargetDecl->hasAttr<NoDuplicateAttr>())
	FuncAttrs.addAttribute(llvm::Attribute::NoDuplicate);
	if (TargetDecl->hasAttr<ConvergentAttr>())
	FuncAttrs.addAttribute(llvm::Attribute::Convergent);

	if (const FunctionDecl *Fn = dyn_cast<FunctionDecl>(TargetDecl)) {
	AddAttributesFromFunctionProtoType(
	getContext(), FuncAttrs, Fn->getType()->getAs<FunctionProtoType>());
	if (AttrOnCallSite && Fn->isReplaceableGlobalAllocationFunction()) {
	// A sane operator new returns a non-aliasing pointer.
	auto Kind = Fn->getDeclName().getCXXOverloadedOperator();
	if (getCodeGenOpts().AssumeSaneOperatorNew &&
	(Kind == OO_New \|\| Kind == OO_Array_New))
	RetAttrs.addAttribute(llvm::Attribute::NoAlias);
	}
	const CXXMethodDecl *MD = dyn_cast<CXXMethodDecl>(Fn);
	const bool IsVirtualCall = MD && MD->isVirtual();
	// Don't use [[noreturn]], _Noreturn or [[no_builtin]] for a call to a
	// virtual function. These attributes are not inherited by overloads.
	if (!(AttrOnCallSite && IsVirtualCall)) {
	if (Fn->isNoReturn())
	FuncAttrs.addAttribute(llvm::Attribute::NoReturn);
	NBA = Fn->getAttr<NoBuiltinAttr>();
	}
	}

	if (isa<FunctionDecl>(TargetDecl) \|\| isa<VarDecl>(TargetDecl)) {
	// Only place nomerge attribute on call sites, never functions. This
	// allows it to work on indirect virtual function calls.
	if (AttrOnCallSite && TargetDecl->hasAttr<NoMergeAttr>())
	FuncAttrs.addAttribute(llvm::Attribute::NoMerge);
	}

	// 'const', 'pure' and 'noalias' attributed functions are also nounwind.
	if (TargetDecl->hasAttr<ConstAttr>()) {
	FuncAttrs.addMemoryAttr(llvm::MemoryEffects::none());
	FuncAttrs.addAttribute(llvm::Attribute::NoUnwind);
	// gcc specifies that 'const' functions have greater restrictions than
	// 'pure' functions, so they also cannot have infinite loops.
	FuncAttrs.addAttribute(llvm::Attribute::WillReturn);
	} else if (TargetDecl->hasAttr<PureAttr>()) {
	FuncAttrs.addMemoryAttr(llvm::MemoryEffects::readOnly());
	FuncAttrs.addAttribute(llvm::Attribute::NoUnwind);
	// gcc specifies that 'pure' functions cannot have infinite loops.
	FuncAttrs.addAttribute(llvm::Attribute::WillReturn);
	} else if (TargetDecl->hasAttr<NoAliasAttr>()) {
	FuncAttrs.addMemoryAttr(llvm::MemoryEffects::inaccessibleOrArgMemOnly());
	FuncAttrs.addAttribute(llvm::Attribute::NoUnwind);
	}
	if (TargetDecl->hasAttr<RestrictAttr>())
	RetAttrs.addAttribute(llvm::Attribute::NoAlias);
	if (TargetDecl->hasAttr<ReturnsNonNullAttr>() &&
	!CodeGenOpts.NullPointerIsValid)
	RetAttrs.addAttribute(llvm::Attribute::NonNull);
	if (TargetDecl->hasAttr<AnyX86NoCallerSavedRegistersAttr>())
	FuncAttrs.addAttribute("no_caller_saved_registers");
	if (TargetDecl->hasAttr<AnyX86NoCfCheckAttr>())
	FuncAttrs.addAttribute(llvm::Attribute::NoCfCheck);
	if (TargetDecl->hasAttr<LeafAttr>())
	FuncAttrs.addAttribute(llvm::Attribute::NoCallback);

	HasOptnone = TargetDecl->hasAttr<OptimizeNoneAttr>();
	if (auto *AllocSize = TargetDecl->getAttr<AllocSizeAttr>()) {
	std::optional<unsigned> NumElemsParam;
	if (AllocSize->getNumElemsParam().isValid())
	NumElemsParam = AllocSize->getNumElemsParam().getLLVMIndex();
	FuncAttrs.addAllocSizeAttr(AllocSize->getElemSizeParam().getLLVMIndex(),
	NumElemsParam);
	}

	if (TargetDecl->hasAttr<OpenCLKernelAttr>()) {
	if (getLangOpts().OpenCLVersion <= 120) {
	// OpenCL v1.2 Work groups are always uniform
	FuncAttrs.addAttribute("uniform-work-group-size", "true");
	} else {
	// OpenCL v2.0 Work groups may be whether uniform or not.
	// '-cl-uniform-work-group-size' compile option gets a hint
	// to the compiler that the global work-size be a multiple of
	// the work-group size specified to clEnqueueNDRangeKernel
	// (i.e. work groups are uniform).
	FuncAttrs.addAttribute(
	"uniform-work-group-size",
	llvm::toStringRef(getLangOpts().OffloadUniformBlock));
	}
	}

	if (TargetDecl->hasAttr<CUDAGlobalAttr>() &&
	getLangOpts().OffloadUniformBlock)
	FuncAttrs.addAttribute("uniform-work-group-size", "true");

	if (TargetDecl->hasAttr<ArmLocallyStreamingAttr>())
	FuncAttrs.addAttribute("aarch64_pstate_sm_body");
	}

	// Attach "no-builtins" attributes to:
	// * call sites: both `nobuiltin` and "no-builtins" or "no-builtin-<name>".
	// * definitions: "no-builtins" or "no-builtin-<name>" only.
	// The attributes can come from:
	// * LangOpts: -ffreestanding, -fno-builtin, -fno-builtin-<name>
	// * FunctionDecl attributes: __attribute__((no_builtin(...)))
	addNoBuiltinAttributes(FuncAttrs, getLangOpts(), NBA);

	// Collect function IR attributes based on global settiings.
	getDefaultFunctionAttributes(Name, HasOptnone, AttrOnCallSite, FuncAttrs);

	// Override some default IR attributes based on declaration-specific
	// information.
	if (TargetDecl) {
	if (TargetDecl->hasAttr<NoSpeculativeLoadHardeningAttr>())
	FuncAttrs.removeAttribute(llvm::Attribute::SpeculativeLoadHardening);
	if (TargetDecl->hasAttr<SpeculativeLoadHardeningAttr>())
	FuncAttrs.addAttribute(llvm::Attribute::SpeculativeLoadHardening);
	if (TargetDecl->hasAttr<NoSplitStackAttr>())
	FuncAttrs.removeAttribute("split-stack");
	if (TargetDecl->hasAttr<ZeroCallUsedRegsAttr>()) {
	// A function "__attribute__((...))" overrides the command-line flag.
	auto Kind =
	TargetDecl->getAttr<ZeroCallUsedRegsAttr>()->getZeroCallUsedRegs();
	FuncAttrs.removeAttribute("zero-call-used-regs");
	FuncAttrs.addAttribute(
	"zero-call-used-regs",
	ZeroCallUsedRegsAttr::ConvertZeroCallUsedRegsKindToStr(Kind));
	}

	// Add NonLazyBind attribute to function declarations when -fno-plt
	// is used.
	// FIXME: what if we just haven't processed the function definition
	// yet, or if it's an external definition like C99 inline?
	if (CodeGenOpts.NoPLT) {
	if (auto *Fn = dyn_cast<FunctionDecl>(TargetDecl)) {
	if (!Fn->isDefined() && !AttrOnCallSite) {
	FuncAttrs.addAttribute(llvm::Attribute::NonLazyBind);
	}
	}
	}
	}

	// Add "sample-profile-suffix-elision-policy" attribute for internal linkage
	// functions with -funique-internal-linkage-names.
	if (TargetDecl && CodeGenOpts.UniqueInternalLinkageNames) {
	if (const auto *FD = dyn_cast_or_null<FunctionDecl>(TargetDecl)) {
	if (!FD->isExternallyVisible())
	FuncAttrs.addAttribute("sample-profile-suffix-elision-policy",
	"selected");
	}
	}

	// Collect non-call-site function IR attributes from declaration-specific
	// information.
	if (!AttrOnCallSite) {
	if (TargetDecl && TargetDecl->hasAttr<CmseNSEntryAttr>())
	FuncAttrs.addAttribute("cmse_nonsecure_entry");

	// Whether tail calls are enabled.
	auto shouldDisableTailCalls = [&] {
	// Should this be honored in getDefaultFunctionAttributes?
	if (CodeGenOpts.DisableTailCalls)
	return true;

	if (!TargetDecl)
	return false;

	if (TargetDecl->hasAttr<DisableTailCallsAttr>() \|\|
	TargetDecl->hasAttr<AnyX86InterruptAttr>())
	return true;

	if (CodeGenOpts.NoEscapingBlockTailCalls) {
	if (const auto *BD = dyn_cast<BlockDecl>(TargetDecl))
	if (!BD->doesNotEscape())
	return true;
	}

	return false;
	};
	if (shouldDisableTailCalls())
	FuncAttrs.addAttribute("disable-tail-calls", "true");

	// CPU/feature overrides. addDefaultFunctionDefinitionAttributes
	// handles these separately to set them based on the global defaults.
	GetCPUAndFeaturesAttributes(CalleeInfo.getCalleeDecl(), FuncAttrs);
	}

	// Collect attributes from arguments and return values.
	ClangToLLVMArgMapping IRFunctionArgs(getContext(), FI);

	QualType RetTy = FI.getReturnType();
	const ABIArgInfo &RetAI = FI.getReturnInfo();
	const llvm::DataLayout &DL = getDataLayout();

	// Determine if the return type could be partially undef
	if (CodeGenOpts.EnableNoundefAttrs &&
	HasStrictReturn(*this, RetTy, TargetDecl)) {
	if (!RetTy->isVoidType() && RetAI.getKind() != ABIArgInfo::Indirect &&
	DetermineNoUndef(RetTy, getTypes(), DL, RetAI))
	RetAttrs.addAttribute(llvm::Attribute::NoUndef);
	}

	switch (RetAI.getKind()) {
	case ABIArgInfo::Extend:
	if (RetAI.isSignExt())
	RetAttrs.addAttribute(llvm::Attribute::SExt);
	else
	RetAttrs.addAttribute(llvm::Attribute::ZExt);
	[[fallthrough]];
	case ABIArgInfo::Direct:
	if (RetAI.getInReg())
	RetAttrs.addAttribute(llvm::Attribute::InReg);

	if (canApplyNoFPClass(RetAI, RetTy, true))
	RetAttrs.addNoFPClassAttr(getNoFPClassTestMask(getLangOpts()));

	break;
	case ABIArgInfo::Ignore:
	break;

	case ABIArgInfo::InAlloca:
	case ABIArgInfo::Indirect: {
	// inalloca and sret disable readnone and readonly
	AddPotentialArgAccess();
	break;
	}

	case ABIArgInfo::CoerceAndExpand:
	break;

	case ABIArgInfo::Expand:
	case ABIArgInfo::IndirectAliased:
	llvm_unreachable("Invalid ABI kind for return argument");
	}

	if (!IsThunk) {
	// FIXME: fix this properly, https://reviews.llvm.org/D100388
	if (const auto *RefTy = RetTy->getAs<ReferenceType>()) {
	QualType PTy = RefTy->getPointeeType();
	if (!PTy->isIncompleteType() && PTy->isConstantSizeType())
	RetAttrs.addDereferenceableAttr(
	getMinimumObjectSize(PTy).getQuantity());
	if (getTypes().getTargetAddressSpace(PTy) == 0 &&
	!CodeGenOpts.NullPointerIsValid)
	RetAttrs.addAttribute(llvm::Attribute::NonNull);
	if (PTy->isObjectType()) {
	llvm::Align Alignment =
	getNaturalPointeeTypeAlignment(RetTy).getAsAlign();
	RetAttrs.addAlignmentAttr(Alignment);
	}
	}
	}

	bool hasUsedSRet = false;
	SmallVector<llvm::AttributeSet, 4> ArgAttrs(IRFunctionArgs.totalIRArgs());

	// Attach attributes to sret.
	if (IRFunctionArgs.hasSRetArg()) {
	llvm::AttrBuilder SRETAttrs(getLLVMContext());
	SRETAttrs.addStructRetAttr(getTypes().ConvertTypeForMem(RetTy));
	SRETAttrs.addAttribute(llvm::Attribute::Writable);
	SRETAttrs.addAttribute(llvm::Attribute::DeadOnUnwind);
	hasUsedSRet = true;
	if (RetAI.getInReg())
	SRETAttrs.addAttribute(llvm::Attribute::InReg);
	SRETAttrs.addAlignmentAttr(RetAI.getIndirectAlign().getQuantity());
	ArgAttrs[IRFunctionArgs.getSRetArgNo()] =
	llvm::AttributeSet::get(getLLVMContext(), SRETAttrs);
	}

	// Attach attributes to inalloca argument.
	if (IRFunctionArgs.hasInallocaArg()) {
	llvm::AttrBuilder Attrs(getLLVMContext());
	Attrs.addInAllocaAttr(FI.getArgStruct());
	ArgAttrs[IRFunctionArgs.getInallocaArgNo()] =
	llvm::AttributeSet::get(getLLVMContext(), Attrs);
	}

	// Apply `nonnull`, `dereferencable(N)` and `align N` to the `this` argument,
	// unless this is a thunk function.
	// FIXME: fix this properly, https://reviews.llvm.org/D100388
	if (FI.isInstanceMethod() && !IRFunctionArgs.hasInallocaArg() &&
	!FI.arg_begin()->type->isVoidPointerType() && !IsThunk) {
	auto IRArgs = IRFunctionArgs.getIRArgs(0);

	assert(IRArgs.second == 1 && "Expected only a single `this` pointer.");

	llvm::AttrBuilder Attrs(getLLVMContext());

	QualType ThisTy =
	FI.arg_begin()->type.getTypePtr()->getPointeeType();

	if (!CodeGenOpts.NullPointerIsValid &&
	getTypes().getTargetAddressSpace(FI.arg_begin()->type) == 0) {
	Attrs.addAttribute(llvm::Attribute::NonNull);
	Attrs.addDereferenceableAttr(getMinimumObjectSize(ThisTy).getQuantity());
	} else {
	// FIXME dereferenceable should be correct here, regardless of
	// NullPointerIsValid. However, dereferenceable currently does not always
	// respect NullPointerIsValid and may imply nonnull and break the program.
	// See https://reviews.llvm.org/D66618 for discussions.
	Attrs.addDereferenceableOrNullAttr(
	getMinimumObjectSize(
	FI.arg_begin()->type.castAs<PointerType>()->getPointeeType())
	.getQuantity());
	}

	llvm::Align Alignment =
	getNaturalTypeAlignment(ThisTy, /BaseInfo=/nullptr,
	/TBAAInfo=/nullptr, /forPointeeType=/true)
	.getAsAlign();
	Attrs.addAlignmentAttr(Alignment);

	ArgAttrs[IRArgs.first] = llvm::AttributeSet::get(getLLVMContext(), Attrs);
	}

	unsigned ArgNo = 0;
	for (CGFunctionInfo::const_arg_iterator I = FI.arg_begin(),
	E = FI.arg_end();
	I != E; ++I, ++ArgNo) {
	QualType ParamType = I->type;
	const ABIArgInfo &AI = I->info;
	llvm::AttrBuilder Attrs(getLLVMContext());

	// Add attribute for padding argument, if necessary.
	if (IRFunctionArgs.hasPaddingArg(ArgNo)) {
	if (AI.getPaddingInReg()) {
	ArgAttrs[IRFunctionArgs.getPaddingArgNo(ArgNo)] =
	llvm::AttributeSet::get(
	getLLVMContext(),
	llvm::AttrBuilder(getLLVMContext()).addAttribute(llvm::Attribute::InReg));
	}
	}

	// Decide whether the argument we're handling could be partially undef
	if (CodeGenOpts.EnableNoundefAttrs &&
	DetermineNoUndef(ParamType, getTypes(), DL, AI)) {
	Attrs.addAttribute(llvm::Attribute::NoUndef);
	}

	// 'restrict' -> 'noalias' is done in EmitFunctionProlog when we
	// have the corresponding parameter variable. It doesn't make
	// sense to do it here because parameters are so messed up.
	switch (AI.getKind()) {
	case ABIArgInfo::Extend:
	if (AI.isSignExt())
	Attrs.addAttribute(llvm::Attribute::SExt);
	else
	Attrs.addAttribute(llvm::Attribute::ZExt);
	[[fallthrough]];
	case ABIArgInfo::Direct:
	if (ArgNo == 0 && FI.isChainCall())
	Attrs.addAttribute(llvm::Attribute::Nest);
	else if (AI.getInReg())
	Attrs.addAttribute(llvm::Attribute::InReg);
	Attrs.addStackAlignmentAttr(llvm::MaybeAlign(AI.getDirectAlign()));

	if (canApplyNoFPClass(AI, ParamType, false))
	Attrs.addNoFPClassAttr(getNoFPClassTestMask(getLangOpts()));
	break;
	case ABIArgInfo::Indirect: {
	if (AI.getInReg())
	Attrs.addAttribute(llvm::Attribute::InReg);

	if (AI.getIndirectByVal())
	Attrs.addByValAttr(getTypes().ConvertTypeForMem(ParamType));

	auto *Decl = ParamType->getAsRecordDecl();
	if (CodeGenOpts.PassByValueIsNoAlias && Decl &&
	Decl->getArgPassingRestrictions() ==
	RecordArgPassingKind::CanPassInRegs)
	// When calling the function, the pointer passed in will be the only
	// reference to the underlying object. Mark it accordingly.
	Attrs.addAttribute(llvm::Attribute::NoAlias);

	// TODO: We could add the byref attribute if not byval, but it would
	// require updating many testcases.

	CharUnits Align = AI.getIndirectAlign();

	// In a byval argument, it is important that the required
	// alignment of the type is honored, as LLVM might be creating a
	// new stack object, and needs to know what alignment to give
	// it. (Sometimes it can deduce a sensible alignment on its own,
	// but not if clang decides it must emit a packed struct, or the
	// user specifies increased alignment requirements.)
	//
	// This is different from indirect not byval, where the object
	// exists already, and the align attribute is purely
	// informative.
	assert(!Align.isZero());

	// For now, only add this when we have a byval argument.
	// TODO: be less lazy about updating test cases.
	if (AI.getIndirectByVal())
	Attrs.addAlignmentAttr(Align.getQuantity());

	// byval disables readnone and readonly.
	AddPotentialArgAccess();
	break;
	}
	case ABIArgInfo::IndirectAliased: {
	CharUnits Align = AI.getIndirectAlign();
	Attrs.addByRefAttr(getTypes().ConvertTypeForMem(ParamType));
	Attrs.addAlignmentAttr(Align.getQuantity());
	break;
	}
	case ABIArgInfo::Ignore:
	case ABIArgInfo::Expand:
	case ABIArgInfo::CoerceAndExpand:
	break;

	case ABIArgInfo::InAlloca:
	// inalloca disables readnone and readonly.
	AddPotentialArgAccess();
	continue;
	}

	if (const auto *RefTy = ParamType->getAs<ReferenceType>()) {
	QualType PTy = RefTy->getPointeeType();
	if (!PTy->isIncompleteType() && PTy->isConstantSizeType())
	Attrs.addDereferenceableAttr(
	getMinimumObjectSize(PTy).getQuantity());
	if (getTypes().getTargetAddressSpace(PTy) == 0 &&
	!CodeGenOpts.NullPointerIsValid)
	Attrs.addAttribute(llvm::Attribute::NonNull);
	if (PTy->isObjectType()) {
	llvm::Align Alignment =
	getNaturalPointeeTypeAlignment(ParamType).getAsAlign();
	Attrs.addAlignmentAttr(Alignment);
	}
	}

	// From OpenCL spec v3.0.10 section 6.3.5 Alignment of Types:
	// > For arguments to a __kernel function declared to be a pointer to a
	// > data type, the OpenCL compiler can assume that the pointee is always
	// > appropriately aligned as required by the data type.
	if (TargetDecl && TargetDecl->hasAttr<OpenCLKernelAttr>() &&
	ParamType->isPointerType()) {
	QualType PTy = ParamType->getPointeeType();
	if (!PTy->isIncompleteType() && PTy->isConstantSizeType()) {
	llvm::Align Alignment =
	getNaturalPointeeTypeAlignment(ParamType).getAsAlign();
	Attrs.addAlignmentAttr(Alignment);
	}
	}

	switch (FI.getExtParameterInfo(ArgNo).getABI()) {
	case ParameterABI::Ordinary:
	break;

	case ParameterABI::SwiftIndirectResult: {
	// Add 'sret' if we haven't already used it for something, but
	// only if the result is void.
	if (!hasUsedSRet && RetTy->isVoidType()) {
	Attrs.addStructRetAttr(getTypes().ConvertTypeForMem(ParamType));
	hasUsedSRet = true;
	}

	// Add 'noalias' in either case.
	Attrs.addAttribute(llvm::Attribute::NoAlias);

	// Add 'dereferenceable' and 'alignment'.
	auto PTy = ParamType->getPointeeType();
	if (!PTy->isIncompleteType() && PTy->isConstantSizeType()) {
	auto info = getContext().getTypeInfoInChars(PTy);
	Attrs.addDereferenceableAttr(info.Width.getQuantity());
	Attrs.addAlignmentAttr(info.Align.getAsAlign());
	}
	break;
	}

	case ParameterABI::SwiftErrorResult:
	Attrs.addAttribute(llvm::Attribute::SwiftError);
	break;

	case ParameterABI::SwiftContext:
	Attrs.addAttribute(llvm::Attribute::SwiftSelf);
	break;

	case ParameterABI::SwiftAsyncContext:
	Attrs.addAttribute(llvm::Attribute::SwiftAsync);
	break;
	}

	if (FI.getExtParameterInfo(ArgNo).isNoEscape())
	Attrs.addAttribute(llvm::Attribute::NoCapture);

	if (Attrs.hasAttributes()) {
	unsigned FirstIRArg, NumIRArgs;
	std::tie(FirstIRArg, NumIRArgs) = IRFunctionArgs.getIRArgs(ArgNo);
	for (unsigned i = 0; i < NumIRArgs; i++)
	ArgAttrs[FirstIRArg + i] = ArgAttrs[FirstIRArg + i].addAttributes(
	getLLVMContext(), llvm::AttributeSet::get(getLLVMContext(), Attrs));
	}
	}
	assert(ArgNo == FI.arg_size());

	AttrList = llvm::AttributeList::get(
	getLLVMContext(), llvm::AttributeSet::get(getLLVMContext(), FuncAttrs),
	llvm::AttributeSet::get(getLLVMContext(), RetAttrs), ArgAttrs);
	}

	/// An argument came in as a promoted argument; demote it back to its
	/// declared type.
	static llvm::Value *emitArgumentDemotion(CodeGenFunction &CGF,
	const VarDecl *var,
	llvm::Value *value) {
	llvm::Type *varType = CGF.ConvertType(var->getType());

	// This can happen with promotions that actually don't change the
	// underlying type, like the enum promotions.
	if (value->getType() == varType) return value;

	assert((varType->isIntegerTy() \|\| varType->isFloatingPointTy())
	&& "unexpected promotion type");

	if (isa<llvm::IntegerType>(varType))
	return CGF.Builder.CreateTrunc(value, varType, "arg.unpromote");

	return CGF.Builder.CreateFPCast(value, varType, "arg.unpromote");
	}

	/// Returns the attribute (either parameter attribute, or function
	/// attribute), which declares argument ArgNo to be non-null.
	static const NonNullAttr getNonNullAttr(const Decl FD, const ParmVarDecl *PVD,
	QualType ArgType, unsigned ArgNo) {
	// FIXME: __attribute__((nonnull)) can also be applied to:
	// - references to pointers, where the pointee is known to be
	// nonnull (apparently a Clang extension)
	// - transparent unions containing pointers
	// In the former case, LLVM IR cannot represent the constraint. In
	// the latter case, we have no guarantee that the transparent union
	// is in fact passed as a pointer.
	if (!ArgType->isAnyPointerType() && !ArgType->isBlockPointerType())
	return nullptr;
	// First, check attribute on parameter itself.
	if (PVD) {
	if (auto ParmNNAttr = PVD->getAttr<NonNullAttr>())
	return ParmNNAttr;
	}
	// Check function attributes.
	if (!FD)
	return nullptr;
	for (const auto *NNAttr : FD->specific_attrs<NonNullAttr>()) {
	if (NNAttr->isNonNull(ArgNo))
	return NNAttr;
	}
	return nullptr;
	}

	namespace {
	struct CopyBackSwiftError final : EHScopeStack::Cleanup {
	Address Temp;
	Address Arg;
	CopyBackSwiftError(Address temp, Address arg) : Temp(temp), Arg(arg) {}
	void Emit(CodeGenFunction &CGF, Flags flags) override {
	llvm::Value *errorValue = CGF.Builder.CreateLoad(Temp);
	CGF.Builder.CreateStore(errorValue, Arg);
	}
	};
	}

	void CodeGenFunction::EmitFunctionProlog(const CGFunctionInfo &FI,
	llvm::Function *Fn,
	const FunctionArgList &Args) {
	if (CurCodeDecl && CurCodeDecl->hasAttr<NakedAttr>())
	// Naked functions don't have prologues.
	return;

	// If this is an implicit-return-zero function, go ahead and
	// initialize the return value. TODO: it might be nice to have
	// a more general mechanism for this that didn't require synthesized
	// return statements.
	if (const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(CurCodeDecl)) {
	if (FD->hasImplicitReturnZero()) {
	QualType RetTy = FD->getReturnType().getUnqualifiedType();
	llvm::Type* LLVMTy = CGM.getTypes().ConvertType(RetTy);
	llvm::Constant* Zero = llvm::Constant::getNullValue(LLVMTy);
	Builder.CreateStore(Zero, ReturnValue);
	}
	}

	// FIXME: We no longer need the types from FunctionArgList; lift up and
	// simplify.

	ClangToLLVMArgMapping IRFunctionArgs(CGM.getContext(), FI);
	assert(Fn->arg_size() == IRFunctionArgs.totalIRArgs());

	// If we're using inalloca, all the memory arguments are GEPs off of the last
	// parameter, which is a pointer to the complete memory area.
	Address ArgStruct = Address::invalid();
	if (IRFunctionArgs.hasInallocaArg())
	ArgStruct = Address(Fn->getArg(IRFunctionArgs.getInallocaArgNo()),
	FI.getArgStruct(), FI.getArgStructAlignment());

	// Name the struct return parameter.
	if (IRFunctionArgs.hasSRetArg()) {
	auto AI = Fn->getArg(IRFunctionArgs.getSRetArgNo());
	AI->setName("agg.result");
	AI->addAttr(llvm::Attribute::NoAlias);
	}

	// Track if we received the parameter as a pointer (indirect, byval, or
	// inalloca). If already have a pointer, EmitParmDecl doesn't need to copy it
	// into a local alloca for us.
	SmallVector<ParamValue, 16> ArgVals;
	ArgVals.reserve(Args.size());

	// Create a pointer value for every parameter declaration. This usually
	// entails copying one or more LLVM IR arguments into an alloca. Don't push
	// any cleanups or do anything that might unwind. We do that separately, so
	// we can push the cleanups in the correct order for the ABI.
	assert(FI.arg_size() == Args.size() &&
	"Mismatch between function signature & arguments.");
	unsigned ArgNo = 0;
	CGFunctionInfo::const_arg_iterator info_it = FI.arg_begin();
	for (FunctionArgList::const_iterator i = Args.begin(), e = Args.end();
	i != e; ++i, ++info_it, ++ArgNo) {
	const VarDecl Arg = i;
	const ABIArgInfo &ArgI = info_it->info;

	bool isPromoted =
	isa<ParmVarDecl>(Arg) && cast<ParmVarDecl>(Arg)->isKNRPromoted();
	// We are converting from ABIArgInfo type to VarDecl type directly, unless
	// the parameter is promoted. In this case we convert to
	// CGFunctionInfo::ArgInfo type with subsequent argument demotion.
	QualType Ty = isPromoted ? info_it->type : Arg->getType();
	assert(hasScalarEvaluationKind(Ty) ==
	hasScalarEvaluationKind(Arg->getType()));

	unsigned FirstIRArg, NumIRArgs;
	std::tie(FirstIRArg, NumIRArgs) = IRFunctionArgs.getIRArgs(ArgNo);

	switch (ArgI.getKind()) {
	case ABIArgInfo::InAlloca: {
	assert(NumIRArgs == 0);
	auto FieldIndex = ArgI.getInAllocaFieldIndex();
	Address V =
	Builder.CreateStructGEP(ArgStruct, FieldIndex, Arg->getName());
	if (ArgI.getInAllocaIndirect())
	V = Address(Builder.CreateLoad(V), ConvertTypeForMem(Ty),
	getContext().getTypeAlignInChars(Ty));
	ArgVals.push_back(ParamValue::forIndirect(V));
	break;
	}

	case ABIArgInfo::Indirect:
	case ABIArgInfo::IndirectAliased: {
	assert(NumIRArgs == 1);
	Address ParamAddr = makeNaturalAddressForPointer(
	Fn->getArg(FirstIRArg), Ty, ArgI.getIndirectAlign(), false, nullptr,
	nullptr, KnownNonNull);

	if (!hasScalarEvaluationKind(Ty)) {
	// Aggregates and complex variables are accessed by reference. All we
	// need to do is realign the value, if requested. Also, if the address
	// may be aliased, copy it to ensure that the parameter variable is
	// mutable and has a unique adress, as C requires.
	if (ArgI.getIndirectRealign() \|\| ArgI.isIndirectAliased()) {
	RawAddress AlignedTemp = CreateMemTemp(Ty, "coerce");

	// Copy from the incoming argument pointer to the temporary with the
	// appropriate alignment.
	//
	// FIXME: We should have a common utility for generating an aggregate
	// copy.
	CharUnits Size = getContext().getTypeSizeInChars(Ty);
	Builder.CreateMemCpy(
	AlignedTemp.getPointer(), AlignedTemp.getAlignment().getAsAlign(),
	ParamAddr.emitRawPointer(*this),
	ParamAddr.getAlignment().getAsAlign(),
	llvm::ConstantInt::get(IntPtrTy, Size.getQuantity()));
	ParamAddr = AlignedTemp;
	}
	ArgVals.push_back(ParamValue::forIndirect(ParamAddr));
	} else {
	// Load scalar value from indirect argument.
	llvm::Value *V =
	EmitLoadOfScalar(ParamAddr, false, Ty, Arg->getBeginLoc());

	if (isPromoted)
	V = emitArgumentDemotion(*this, Arg, V);
	ArgVals.push_back(ParamValue::forDirect(V));
	}
	break;
	}

	case ABIArgInfo::Extend:
	case ABIArgInfo::Direct: {
	auto AI = Fn->getArg(FirstIRArg);
	llvm::Type *LTy = ConvertType(Arg->getType());

	// Prepare parameter attributes. So far, only attributes for pointer
	// parameters are prepared. See
	// http://llvm.org/docs/LangRef.html#paramattrs.
	if (ArgI.getDirectOffset() == 0 && LTy->isPointerTy() &&
	ArgI.getCoerceToType()->isPointerTy()) {
	assert(NumIRArgs == 1);

	if (const ParmVarDecl *PVD = dyn_cast<ParmVarDecl>(Arg)) {
	// Set `nonnull` attribute if any.
	if (getNonNullAttr(CurCodeDecl, PVD, PVD->getType(),
	PVD->getFunctionScopeIndex()) &&
	!CGM.getCodeGenOpts().NullPointerIsValid)
	AI->addAttr(llvm::Attribute::NonNull);

	QualType OTy = PVD->getOriginalType();
	if (const auto *ArrTy =
	getContext().getAsConstantArrayType(OTy)) {
	// A C99 array parameter declaration with the static keyword also
	// indicates dereferenceability, and if the size is constant we can
	// use the dereferenceable attribute (which requires the size in
	// bytes).
	if (ArrTy->getSizeModifier() == ArraySizeModifier::Static) {
	QualType ETy = ArrTy->getElementType();
	llvm::Align Alignment =
	CGM.getNaturalTypeAlignment(ETy).getAsAlign();
	AI->addAttrs(llvm::AttrBuilder(getLLVMContext()).addAlignmentAttr(Alignment));
	uint64_t ArrSize = ArrTy->getZExtSize();
	if (!ETy->isIncompleteType() && ETy->isConstantSizeType() &&
	ArrSize) {
	llvm::AttrBuilder Attrs(getLLVMContext());
	Attrs.addDereferenceableAttr(
	getContext().getTypeSizeInChars(ETy).getQuantity() *
	ArrSize);
	AI->addAttrs(Attrs);
	} else if (getContext().getTargetInfo().getNullPointerValue(
	ETy.getAddressSpace()) == 0 &&
	!CGM.getCodeGenOpts().NullPointerIsValid) {
	AI->addAttr(llvm::Attribute::NonNull);
	}
	}
	} else if (const auto *ArrTy =
	getContext().getAsVariableArrayType(OTy)) {
	// For C99 VLAs with the static keyword, we don't know the size so
	// we can't use the dereferenceable attribute, but in addrspace(0)
	// we know that it must be nonnull.
	if (ArrTy->getSizeModifier() == ArraySizeModifier::Static) {
	QualType ETy = ArrTy->getElementType();
	llvm::Align Alignment =
	CGM.getNaturalTypeAlignment(ETy).getAsAlign();
	AI->addAttrs(llvm::AttrBuilder(getLLVMContext()).addAlignmentAttr(Alignment));
	if (!getTypes().getTargetAddressSpace(ETy) &&
	!CGM.getCodeGenOpts().NullPointerIsValid)
	AI->addAttr(llvm::Attribute::NonNull);
	}
	}

	// Set `align` attribute if any.
	const auto *AVAttr = PVD->getAttr<AlignValueAttr>();
	if (!AVAttr)
	if (const auto *TOTy = OTy->getAs<TypedefType>())
	AVAttr = TOTy->getDecl()->getAttr<AlignValueAttr>();
	if (AVAttr && !SanOpts.has(SanitizerKind::Alignment)) {
	// If alignment-assumption sanitizer is enabled, we do not add
	// alignment attribute here, but emit normal alignment assumption,
	// so the UBSAN check could function.
	llvm::ConstantInt *AlignmentCI =
	cast<llvm::ConstantInt>(EmitScalarExpr(AVAttr->getAlignment()));
	uint64_t AlignmentInt =
	AlignmentCI->getLimitedValue(llvm::Value::MaximumAlignment);
	if (AI->getParamAlign().valueOrOne() < AlignmentInt) {
	AI->removeAttr(llvm::Attribute::AttrKind::Alignment);
	AI->addAttrs(llvm::AttrBuilder(getLLVMContext()).addAlignmentAttr(
	llvm::Align(AlignmentInt)));
	}
	}
	}

	// Set 'noalias' if an argument type has the `restrict` qualifier.
	if (Arg->getType().isRestrictQualified())
	AI->addAttr(llvm::Attribute::NoAlias);
	}

	// Prepare the argument value. If we have the trivial case, handle it
	// with no muss and fuss.
	if (!isa<llvm::StructType>(ArgI.getCoerceToType()) &&
	ArgI.getCoerceToType() == ConvertType(Ty) &&
	ArgI.getDirectOffset() == 0) {
	assert(NumIRArgs == 1);

	// LLVM expects swifterror parameters to be used in very restricted
	// ways. Copy the value into a less-restricted temporary.
	llvm::Value *V = AI;
	if (FI.getExtParameterInfo(ArgNo).getABI()
	== ParameterABI::SwiftErrorResult) {
	QualType pointeeTy = Ty->getPointeeType();
	assert(pointeeTy->isPointerType());
	RawAddress temp =
	CreateMemTemp(pointeeTy, getPointerAlign(), "swifterror.temp");
	Address arg = makeNaturalAddressForPointer(
	V, pointeeTy, getContext().getTypeAlignInChars(pointeeTy));
	llvm::Value *incomingErrorValue = Builder.CreateLoad(arg);
	Builder.CreateStore(incomingErrorValue, temp);
	V = temp.getPointer();

	// Push a cleanup to copy the value back at the end of the function.
	// The convention does not guarantee that the value will be written
	// back if the function exits with an unwind exception.
	EHStack.pushCleanup<CopyBackSwiftError>(NormalCleanup, temp, arg);
	}

	// Ensure the argument is the correct type.
	if (V->getType() != ArgI.getCoerceToType())
	V = Builder.CreateBitCast(V, ArgI.getCoerceToType());

	if (isPromoted)
	V = emitArgumentDemotion(*this, Arg, V);

	// Because of merging of function types from multiple decls it is
	// possible for the type of an argument to not match the corresponding
	// type in the function type. Since we are codegening the callee
	// in here, add a cast to the argument type.
	llvm::Type *LTy = ConvertType(Arg->getType());
	if (V->getType() != LTy)
	V = Builder.CreateBitCast(V, LTy);

	ArgVals.push_back(ParamValue::forDirect(V));
	break;
	}

	// VLST arguments are coerced to VLATs at the function boundary for
	// ABI consistency. If this is a VLST that was coerced to
	// a VLAT at the function boundary and the types match up, use
	// llvm.vector.extract to convert back to the original VLST.
	if (auto *VecTyTo = dyn_cast<llvm::FixedVectorType>(ConvertType(Ty))) {
	llvm::Value *Coerced = Fn->getArg(FirstIRArg);
	if (auto *VecTyFrom =
	dyn_cast<llvm::ScalableVectorType>(Coerced->getType())) {
	// If we are casting a scalable i1 predicate vector to a fixed i8
	// vector, bitcast the source and use a vector extract.
	if (VecTyFrom->getElementType()->isIntegerTy(1) &&
	VecTyFrom->getElementCount().isKnownMultipleOf(8) &&
	VecTyTo->getElementType() == Builder.getInt8Ty()) {
	VecTyFrom = llvm::ScalableVectorType::get(
	VecTyTo->getElementType(),
	VecTyFrom->getElementCount().getKnownMinValue() / 8);
	Coerced = Builder.CreateBitCast(Coerced, VecTyFrom);
	}
	if (VecTyFrom->getElementType() == VecTyTo->getElementType()) {
	llvm::Value *Zero = llvm::Constant::getNullValue(CGM.Int64Ty);

	assert(NumIRArgs == 1);
	Coerced->setName(Arg->getName() + ".coerce");
	ArgVals.push_back(ParamValue::forDirect(Builder.CreateExtractVector(
	VecTyTo, Coerced, Zero, "cast.fixed")));
	break;
	}
	}
	}

	llvm::StructType *STy =
	dyn_cast<llvm::StructType>(ArgI.getCoerceToType());
	if (ArgI.isDirect() && !ArgI.getCanBeFlattened() && STy &&
	STy->getNumElements() > 1) {
	[[maybe_unused]] llvm::TypeSize StructSize =
	CGM.getDataLayout().getTypeAllocSize(STy);
	[[maybe_unused]] llvm::TypeSize PtrElementSize =
	CGM.getDataLayout().getTypeAllocSize(ConvertTypeForMem(Ty));
	if (STy->containsHomogeneousScalableVectorTypes()) {
	assert(StructSize == PtrElementSize &&
	"Only allow non-fractional movement of structure with"
	"homogeneous scalable vector type");

	ArgVals.push_back(ParamValue::forDirect(AI));
	break;
	}
	}

	Address Alloca = CreateMemTemp(Ty, getContext().getDeclAlign(Arg),
	Arg->getName());

	// Pointer to store into.
	Address Ptr = emitAddressAtOffset(*this, Alloca, ArgI);

	// Fast-isel and the optimizer generally like scalar values better than
	// FCAs, so we flatten them if this is safe to do for this argument.
	if (ArgI.isDirect() && ArgI.getCanBeFlattened() && STy &&
	STy->getNumElements() > 1) {
	llvm::TypeSize StructSize = CGM.getDataLayout().getTypeAllocSize(STy);
	llvm::TypeSize PtrElementSize =
	CGM.getDataLayout().getTypeAllocSize(Ptr.getElementType());
	if (StructSize.isScalable()) {
	assert(STy->containsHomogeneousScalableVectorTypes() &&
	"ABI only supports structure with homogeneous scalable vector "
	"type");
	assert(StructSize == PtrElementSize &&
	"Only allow non-fractional movement of structure with"
	"homogeneous scalable vector type");
	assert(STy->getNumElements() == NumIRArgs);

	llvm::Value *LoadedStructValue = llvm::PoisonValue::get(STy);
	for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
	auto *AI = Fn->getArg(FirstIRArg + i);
	AI->setName(Arg->getName() + ".coerce" + Twine(i));
	LoadedStructValue =
	Builder.CreateInsertValue(LoadedStructValue, AI, i);
	}

	Builder.CreateStore(LoadedStructValue, Ptr);
	} else {
	uint64_t SrcSize = StructSize.getFixedValue();
	uint64_t DstSize = PtrElementSize.getFixedValue();

	Address AddrToStoreInto = Address::invalid();
	if (SrcSize <= DstSize) {
	AddrToStoreInto = Ptr.withElementType(STy);
	} else {
	AddrToStoreInto =
	CreateTempAlloca(STy, Alloca.getAlignment(), "coerce");
	}

	assert(STy->getNumElements() == NumIRArgs);
	for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
	auto AI = Fn->getArg(FirstIRArg + i);
	AI->setName(Arg->getName() + ".coerce" + Twine(i));
	Address EltPtr = Builder.CreateStructGEP(AddrToStoreInto, i);
	Builder.CreateStore(AI, EltPtr);
	}

	if (SrcSize > DstSize) {
	Builder.CreateMemCpy(Ptr, AddrToStoreInto, DstSize);
	}
	}
	} else {
	// Simple case, just do a coerced store of the argument into the alloca.
	assert(NumIRArgs == 1);
	auto AI = Fn->getArg(FirstIRArg);
	AI->setName(Arg->getName() + ".coerce");
	- CreateCoercedStore(AI, Ptr, /DstIsVolatile=/false, *this);
	+ CreateCoercedStore(
	+ AI, Ptr,
	+ llvm::TypeSize::getFixed(
	+ getContext().getTypeSizeInChars(Ty).getQuantity() -
	+ ArgI.getDirectOffset()),
	+ /DstIsVolatile=/false);
	}

	// Match to what EmitParmDecl is expecting for this type.
	if (CodeGenFunction::hasScalarEvaluationKind(Ty)) {
	llvm::Value *V =
	EmitLoadOfScalar(Alloca, false, Ty, Arg->getBeginLoc());
	if (isPromoted)
	V = emitArgumentDemotion(*this, Arg, V);
	ArgVals.push_back(ParamValue::forDirect(V));
	} else {
	ArgVals.push_back(ParamValue::forIndirect(Alloca));
	}
	break;
	}

	case ABIArgInfo::CoerceAndExpand: {
	// Reconstruct into a temporary.
	Address alloca = CreateMemTemp(Ty, getContext().getDeclAlign(Arg));
	ArgVals.push_back(ParamValue::forIndirect(alloca));

	auto coercionType = ArgI.getCoerceAndExpandType();
	alloca = alloca.withElementType(coercionType);

	unsigned argIndex = FirstIRArg;
	for (unsigned i = 0, e = coercionType->getNumElements(); i != e; ++i) {
	llvm::Type *eltType = coercionType->getElementType(i);
	if (ABIArgInfo::isPaddingForCoerceAndExpand(eltType))
	continue;

	auto eltAddr = Builder.CreateStructGEP(alloca, i);
	auto elt = Fn->getArg(argIndex++);
	Builder.CreateStore(elt, eltAddr);
	}
	assert(argIndex == FirstIRArg + NumIRArgs);
	break;
	}

	case ABIArgInfo::Expand: {
	// If this structure was expanded into multiple arguments then
	// we need to create a temporary and reconstruct it from the
	// arguments.
	Address Alloca = CreateMemTemp(Ty, getContext().getDeclAlign(Arg));
	LValue LV = MakeAddrLValue(Alloca, Ty);
	ArgVals.push_back(ParamValue::forIndirect(Alloca));

	auto FnArgIter = Fn->arg_begin() + FirstIRArg;
	ExpandTypeFromArgs(Ty, LV, FnArgIter);
	assert(FnArgIter == Fn->arg_begin() + FirstIRArg + NumIRArgs);
	for (unsigned i = 0, e = NumIRArgs; i != e; ++i) {
	auto AI = Fn->getArg(FirstIRArg + i);
	AI->setName(Arg->getName() + "." + Twine(i));
	}
	break;
	}

	case ABIArgInfo::Ignore:
	assert(NumIRArgs == 0);
	// Initialize the local variable appropriately.
	if (!hasScalarEvaluationKind(Ty)) {
	ArgVals.push_back(ParamValue::forIndirect(CreateMemTemp(Ty)));
	} else {
	llvm::Value *U = llvm::UndefValue::get(ConvertType(Arg->getType()));
	ArgVals.push_back(ParamValue::forDirect(U));
	}
	break;
	}
	}

	if (getTarget().getCXXABI().areArgsDestroyedLeftToRightInCallee()) {
	for (int I = Args.size() - 1; I >= 0; --I)
	EmitParmDecl(*Args[I], ArgVals[I], I + 1);
	} else {
	for (unsigned I = 0, E = Args.size(); I != E; ++I)
	EmitParmDecl(*Args[I], ArgVals[I], I + 1);
	}
	}

	static void eraseUnusedBitCasts(llvm::Instruction *insn) {
	while (insn->use_empty()) {
	llvm::BitCastInst *bitcast = dyn_cast<llvm::BitCastInst>(insn);
	if (!bitcast) return;

	// This is "safe" because we would have used a ConstantExpr otherwise.
	insn = cast<llvm::Instruction>(bitcast->getOperand(0));
	bitcast->eraseFromParent();
	}
	}

	/// Try to emit a fused autorelease of a return result.
	static llvm::Value *tryEmitFusedAutoreleaseOfResult(CodeGenFunction &CGF,
	llvm::Value *result) {
	// We must be immediately followed the cast.
	llvm::BasicBlock *BB = CGF.Builder.GetInsertBlock();
	if (BB->empty()) return nullptr;
	if (&BB->back() != result) return nullptr;

	llvm::Type *resultType = result->getType();

	// result is in a BasicBlock and is therefore an Instruction.
	llvm::Instruction *generator = cast<llvm::Instruction>(result);

	SmallVector<llvm::Instruction *, 4> InstsToKill;

	// Look for:
	// %generator = bitcast %type1* %generator2 to %type2*
	while (llvm::BitCastInst *bitcast = dyn_cast<llvm::BitCastInst>(generator)) {
	// We would have emitted this as a constant if the operand weren't
	// an Instruction.
	generator = cast<llvm::Instruction>(bitcast->getOperand(0));

	// Require the generator to be immediately followed by the cast.
	if (generator->getNextNode() != bitcast)
	return nullptr;

	InstsToKill.push_back(bitcast);
	}

	// Look for:
	// %generator = call i8* @objc_retain(i8* %originalResult)
	// or
	// %generator = call i8* @objc_retainAutoreleasedReturnValue(i8* %originalResult)
	llvm::CallInst *call = dyn_cast<llvm::CallInst>(generator);
	if (!call) return nullptr;

	bool doRetainAutorelease;

	if (call->getCalledOperand() == CGF.CGM.getObjCEntrypoints().objc_retain) {
	doRetainAutorelease = true;
	} else if (call->getCalledOperand() ==
	CGF.CGM.getObjCEntrypoints().objc_retainAutoreleasedReturnValue) {
	doRetainAutorelease = false;

	// If we emitted an assembly marker for this call (and the
	// ARCEntrypoints field should have been set if so), go looking
	// for that call. If we can't find it, we can't do this
	// optimization. But it should always be the immediately previous
	// instruction, unless we needed bitcasts around the call.
	if (CGF.CGM.getObjCEntrypoints().retainAutoreleasedReturnValueMarker) {
	llvm::Instruction *prev = call->getPrevNode();
	assert(prev);
	if (isa<llvm::BitCastInst>(prev)) {
	prev = prev->getPrevNode();
	assert(prev);
	}
	assert(isa<llvm::CallInst>(prev));
	assert(cast<llvm::CallInst>(prev)->getCalledOperand() ==
	CGF.CGM.getObjCEntrypoints().retainAutoreleasedReturnValueMarker);
	InstsToKill.push_back(prev);
	}
	} else {
	return nullptr;
	}

	result = call->getArgOperand(0);
	InstsToKill.push_back(call);

	// Keep killing bitcasts, for sanity. Note that we no longer care
	// about precise ordering as long as there's exactly one use.
	while (llvm::BitCastInst *bitcast = dyn_cast<llvm::BitCastInst>(result)) {
	if (!bitcast->hasOneUse()) break;
	InstsToKill.push_back(bitcast);
	result = bitcast->getOperand(0);
	}

	// Delete all the unnecessary instructions, from latest to earliest.
	for (auto *I : InstsToKill)
	I->eraseFromParent();

	// Do the fused retain/autorelease if we were asked to.
	if (doRetainAutorelease)
	result = CGF.EmitARCRetainAutoreleaseReturnValue(result);

	// Cast back to the result type.
	return CGF.Builder.CreateBitCast(result, resultType);
	}

	/// If this is a +1 of the value of an immutable 'self', remove it.
	static llvm::Value *tryRemoveRetainOfSelf(CodeGenFunction &CGF,
	llvm::Value *result) {
	// This is only applicable to a method with an immutable 'self'.
	const ObjCMethodDecl *method =
	dyn_cast_or_null<ObjCMethodDecl>(CGF.CurCodeDecl);
	if (!method) return nullptr;
	const VarDecl *self = method->getSelfDecl();
	if (!self->getType().isConstQualified()) return nullptr;

	// Look for a retain call. Note: stripPointerCasts looks through returned arg
	// functions, which would cause us to miss the retain.
	llvm::CallInst *retainCall = dyn_cast<llvm::CallInst>(result);
	if (!retainCall \|\| retainCall->getCalledOperand() !=
	CGF.CGM.getObjCEntrypoints().objc_retain)
	return nullptr;

	// Look for an ordinary load of 'self'.
	llvm::Value *retainedValue = retainCall->getArgOperand(0);
	llvm::LoadInst *load =
	dyn_cast<llvm::LoadInst>(retainedValue->stripPointerCasts());
	if (!load \|\| load->isAtomic() \|\| load->isVolatile() \|\|
	load->getPointerOperand() != CGF.GetAddrOfLocalVar(self).getBasePointer())
	return nullptr;

	// Okay! Burn it all down. This relies for correctness on the
	// assumption that the retain is emitted as part of the return and
	// that thereafter everything is used "linearly".
	llvm::Type *resultType = result->getType();
	eraseUnusedBitCasts(cast<llvm::Instruction>(result));
	assert(retainCall->use_empty());
	retainCall->eraseFromParent();
	eraseUnusedBitCasts(cast<llvm::Instruction>(retainedValue));

	return CGF.Builder.CreateBitCast(load, resultType);
	}

	/// Emit an ARC autorelease of the result of a function.
	///
	/// \return the value to actually return from the function
	static llvm::Value *emitAutoreleaseOfResult(CodeGenFunction &CGF,
	llvm::Value *result) {
	// If we're returning 'self', kill the initial retain. This is a
	// heuristic attempt to "encourage correctness" in the really unfortunate
	// case where we have a return of self during a dealloc and we desperately
	// need to avoid the possible autorelease.
	if (llvm::Value *self = tryRemoveRetainOfSelf(CGF, result))
	return self;

	// At -O0, try to emit a fused retain/autorelease.
	if (CGF.shouldUseFusedARCCalls())
	if (llvm::Value *fused = tryEmitFusedAutoreleaseOfResult(CGF, result))
	return fused;

	return CGF.EmitARCAutoreleaseReturnValue(result);
	}

	/// Heuristically search for a dominating store to the return-value slot.
	static llvm::StoreInst *findDominatingStoreToReturnValue(CodeGenFunction &CGF) {
	llvm::Value *ReturnValuePtr = CGF.ReturnValue.getBasePointer();

	// Check if a User is a store which pointerOperand is the ReturnValue.
	// We are looking for stores to the ReturnValue, not for stores of the
	// ReturnValue to some other location.
	auto GetStoreIfValid = [&CGF,
	ReturnValuePtr](llvm::User U) -> llvm::StoreInst {
	auto *SI = dyn_cast<llvm::StoreInst>(U);
	if (!SI \|\| SI->getPointerOperand() != ReturnValuePtr \|\|
	SI->getValueOperand()->getType() != CGF.ReturnValue.getElementType())
	return nullptr;
	// These aren't actually possible for non-coerced returns, and we
	// only care about non-coerced returns on this code path.
	// All memory instructions inside __try block are volatile.
	assert(!SI->isAtomic() &&
	(!SI->isVolatile() \|\| CGF.currentFunctionUsesSEHTry()));
	return SI;
	};
	// If there are multiple uses of the return-value slot, just check
	// for something immediately preceding the IP. Sometimes this can
	// happen with how we generate implicit-returns; it can also happen
	// with noreturn cleanups.
	if (!ReturnValuePtr->hasOneUse()) {
	llvm::BasicBlock *IP = CGF.Builder.GetInsertBlock();
	if (IP->empty()) return nullptr;

	// Look at directly preceding instruction, skipping bitcasts and lifetime
	// markers.
	for (llvm::Instruction &I : make_range(IP->rbegin(), IP->rend())) {
	if (isa<llvm::BitCastInst>(&I))
	continue;
	if (auto *II = dyn_cast<llvm::IntrinsicInst>(&I))
	if (II->getIntrinsicID() == llvm::Intrinsic::lifetime_end)
	continue;

	return GetStoreIfValid(&I);
	}
	return nullptr;
	}

	llvm::StoreInst *store = GetStoreIfValid(ReturnValuePtr->user_back());
	if (!store) return nullptr;

	// Now do a first-and-dirty dominance check: just walk up the
	// single-predecessors chain from the current insertion point.
	llvm::BasicBlock *StoreBB = store->getParent();
	llvm::BasicBlock *IP = CGF.Builder.GetInsertBlock();
	llvm::SmallPtrSet<llvm::BasicBlock *, 4> SeenBBs;
	while (IP != StoreBB) {
	if (!SeenBBs.insert(IP).second \|\| !(IP = IP->getSinglePredecessor()))
	return nullptr;
	}

	// Okay, the store's basic block dominates the insertion point; we
	// can do our thing.
	return store;
	}

	// Helper functions for EmitCMSEClearRecord

	// Set the bits corresponding to a field having width `BitWidth` and located at
	// offset `BitOffset` (from the least significant bit) within a storage unit of
	// `Bits.size()` bytes. Each element of `Bits` corresponds to one target byte.
	// Use little-endian layout, i.e.`Bits[0]` is the LSB.
	static void setBitRange(SmallVectorImpl<uint64_t> &Bits, int BitOffset,
	int BitWidth, int CharWidth) {
	assert(CharWidth <= 64);
	assert(static_cast<unsigned>(BitWidth) <= Bits.size() * CharWidth);

	int Pos = 0;
	if (BitOffset >= CharWidth) {
	Pos += BitOffset / CharWidth;
	BitOffset = BitOffset % CharWidth;
	}

	const uint64_t Used = (uint64_t(1) << CharWidth) - 1;
	if (BitOffset + BitWidth >= CharWidth) {
	Bits[Pos++] \|= (Used << BitOffset) & Used;
	BitWidth -= CharWidth - BitOffset;
	BitOffset = 0;
	}

	while (BitWidth >= CharWidth) {
	Bits[Pos++] = Used;
	BitWidth -= CharWidth;
	}

	if (BitWidth > 0)
	Bits[Pos++] \|= (Used >> (CharWidth - BitWidth)) << BitOffset;
	}

	// Set the bits corresponding to a field having width `BitWidth` and located at
	// offset `BitOffset` (from the least significant bit) within a storage unit of
	// `StorageSize` bytes, located at `StorageOffset` in `Bits`. Each element of
	// `Bits` corresponds to one target byte. Use target endian layout.
	static void setBitRange(SmallVectorImpl<uint64_t> &Bits, int StorageOffset,
	int StorageSize, int BitOffset, int BitWidth,
	int CharWidth, bool BigEndian) {

	SmallVector<uint64_t, 8> TmpBits(StorageSize);
	setBitRange(TmpBits, BitOffset, BitWidth, CharWidth);

	if (BigEndian)
	std::reverse(TmpBits.begin(), TmpBits.end());

	for (uint64_t V : TmpBits)
	Bits[StorageOffset++] \|= V;
	}

	static void setUsedBits(CodeGenModule &, QualType, int,
	SmallVectorImpl<uint64_t> &);

	// Set the bits in `Bits`, which correspond to the value representations of
	// the actual members of the record type `RTy`. Note that this function does
	// not handle base classes, virtual tables, etc, since they cannot happen in
	// CMSE function arguments or return. The bit mask corresponds to the target
	// memory layout, i.e. it's endian dependent.
	static void setUsedBits(CodeGenModule &CGM, const RecordType *RTy, int Offset,
	SmallVectorImpl<uint64_t> &Bits) {
	ASTContext &Context = CGM.getContext();
	int CharWidth = Context.getCharWidth();
	const RecordDecl *RD = RTy->getDecl()->getDefinition();
	const ASTRecordLayout &ASTLayout = Context.getASTRecordLayout(RD);
	const CGRecordLayout &Layout = CGM.getTypes().getCGRecordLayout(RD);

	int Idx = 0;
	for (auto I = RD->field_begin(), E = RD->field_end(); I != E; ++I, ++Idx) {
	const FieldDecl F = I;

	if (F->isUnnamedBitField() \|\| F->isZeroLengthBitField(Context) \|\|
	F->getType()->isIncompleteArrayType())
	continue;

	if (F->isBitField()) {
	const CGBitFieldInfo &BFI = Layout.getBitFieldInfo(F);
	setBitRange(Bits, Offset + BFI.StorageOffset.getQuantity(),
	BFI.StorageSize / CharWidth, BFI.Offset,
	BFI.Size, CharWidth,
	CGM.getDataLayout().isBigEndian());
	continue;
	}

	setUsedBits(CGM, F->getType(),
	Offset + ASTLayout.getFieldOffset(Idx) / CharWidth, Bits);
	}
	}

	// Set the bits in `Bits`, which correspond to the value representations of
	// the elements of an array type `ATy`.
	static void setUsedBits(CodeGenModule &CGM, const ConstantArrayType *ATy,
	int Offset, SmallVectorImpl<uint64_t> &Bits) {
	const ASTContext &Context = CGM.getContext();

	QualType ETy = Context.getBaseElementType(ATy);
	int Size = Context.getTypeSizeInChars(ETy).getQuantity();
	SmallVector<uint64_t, 4> TmpBits(Size);
	setUsedBits(CGM, ETy, 0, TmpBits);

	for (int I = 0, N = Context.getConstantArrayElementCount(ATy); I < N; ++I) {
	auto Src = TmpBits.begin();
	auto Dst = Bits.begin() + Offset + I * Size;
	for (int J = 0; J < Size; ++J)
	Dst++ \|= Src++;
	}
	}

	// Set the bits in `Bits`, which correspond to the value representations of
	// the type `QTy`.
	static void setUsedBits(CodeGenModule &CGM, QualType QTy, int Offset,
	SmallVectorImpl<uint64_t> &Bits) {
	if (const auto *RTy = QTy->getAs<RecordType>())
	return setUsedBits(CGM, RTy, Offset, Bits);

	ASTContext &Context = CGM.getContext();
	if (const auto *ATy = Context.getAsConstantArrayType(QTy))
	return setUsedBits(CGM, ATy, Offset, Bits);

	int Size = Context.getTypeSizeInChars(QTy).getQuantity();
	if (Size <= 0)
	return;

	std::fill_n(Bits.begin() + Offset, Size,
	(uint64_t(1) << Context.getCharWidth()) - 1);
	}

	static uint64_t buildMultiCharMask(const SmallVectorImpl<uint64_t> &Bits,
	int Pos, int Size, int CharWidth,
	bool BigEndian) {
	assert(Size > 0);
	uint64_t Mask = 0;
	if (BigEndian) {
	for (auto P = Bits.begin() + Pos, E = Bits.begin() + Pos + Size; P != E;
	++P)
	Mask = (Mask << CharWidth) \| *P;
	} else {
	auto P = Bits.begin() + Pos + Size, End = Bits.begin() + Pos;
	do
	Mask = (Mask << CharWidth) \| *--P;
	while (P != End);
	}
	return Mask;
	}

	// Emit code to clear the bits in a record, which aren't a part of any user
	// declared member, when the record is a function return.
	llvm::Value CodeGenFunction::EmitCMSEClearRecord(llvm::Value Src,
	llvm::IntegerType *ITy,
	QualType QTy) {
	assert(Src->getType() == ITy);
	assert(ITy->getScalarSizeInBits() <= 64);

	const llvm::DataLayout &DataLayout = CGM.getDataLayout();
	int Size = DataLayout.getTypeStoreSize(ITy);
	SmallVector<uint64_t, 4> Bits(Size);
	setUsedBits(CGM, QTy->castAs<RecordType>(), 0, Bits);

	int CharWidth = CGM.getContext().getCharWidth();
	uint64_t Mask =
	buildMultiCharMask(Bits, 0, Size, CharWidth, DataLayout.isBigEndian());

	return Builder.CreateAnd(Src, Mask, "cmse.clear");
	}

	// Emit code to clear the bits in a record, which aren't a part of any user
	// declared member, when the record is a function argument.
	llvm::Value CodeGenFunction::EmitCMSEClearRecord(llvm::Value Src,
	llvm::ArrayType *ATy,
	QualType QTy) {
	const llvm::DataLayout &DataLayout = CGM.getDataLayout();
	int Size = DataLayout.getTypeStoreSize(ATy);
	SmallVector<uint64_t, 16> Bits(Size);
	setUsedBits(CGM, QTy->castAs<RecordType>(), 0, Bits);

	// Clear each element of the LLVM array.
	int CharWidth = CGM.getContext().getCharWidth();
	int CharsPerElt =
	ATy->getArrayElementType()->getScalarSizeInBits() / CharWidth;
	int MaskIndex = 0;
	llvm::Value *R = llvm::PoisonValue::get(ATy);
	for (int I = 0, N = ATy->getArrayNumElements(); I != N; ++I) {
	uint64_t Mask = buildMultiCharMask(Bits, MaskIndex, CharsPerElt, CharWidth,
	DataLayout.isBigEndian());
	MaskIndex += CharsPerElt;
	llvm::Value *T0 = Builder.CreateExtractValue(Src, I);
	llvm::Value *T1 = Builder.CreateAnd(T0, Mask, "cmse.clear");
	R = Builder.CreateInsertValue(R, T1, I);
	}

	return R;
	}

	void CodeGenFunction::EmitFunctionEpilog(const CGFunctionInfo &FI,
	bool EmitRetDbgLoc,
	SourceLocation EndLoc) {
	if (FI.isNoReturn()) {
	// Noreturn functions don't return.
	EmitUnreachable(EndLoc);
	return;
	}

	if (CurCodeDecl && CurCodeDecl->hasAttr<NakedAttr>()) {
	// Naked functions don't have epilogues.
	Builder.CreateUnreachable();
	return;
	}

	// Functions with no result always return void.
	if (!ReturnValue.isValid()) {
	Builder.CreateRetVoid();
	return;
	}

	llvm::DebugLoc RetDbgLoc;
	llvm::Value *RV = nullptr;
	QualType RetTy = FI.getReturnType();
	const ABIArgInfo &RetAI = FI.getReturnInfo();

	switch (RetAI.getKind()) {
	case ABIArgInfo::InAlloca:
	// Aggregates get evaluated directly into the destination. Sometimes we
	// need to return the sret value in a register, though.
	assert(hasAggregateEvaluationKind(RetTy));
	if (RetAI.getInAllocaSRet()) {
	llvm::Function::arg_iterator EI = CurFn->arg_end();
	--EI;
	llvm::Value ArgStruct = &EI;
	llvm::Value *SRet = Builder.CreateStructGEP(
	FI.getArgStruct(), ArgStruct, RetAI.getInAllocaFieldIndex());
	llvm::Type *Ty =
	cast<llvm::GetElementPtrInst>(SRet)->getResultElementType();
	RV = Builder.CreateAlignedLoad(Ty, SRet, getPointerAlign(), "sret");
	}
	break;

	case ABIArgInfo::Indirect: {
	auto AI = CurFn->arg_begin();
	if (RetAI.isSRetAfterThis())
	++AI;
	switch (getEvaluationKind(RetTy)) {
	case TEK_Complex: {
	ComplexPairTy RT =
	EmitLoadOfComplex(MakeAddrLValue(ReturnValue, RetTy), EndLoc);
	EmitStoreOfComplex(RT, MakeNaturalAlignAddrLValue(&*AI, RetTy),
	/isInit/ true);
	break;
	}
	case TEK_Aggregate:
	// Do nothing; aggregates get evaluated directly into the destination.
	break;
	case TEK_Scalar: {
	LValueBaseInfo BaseInfo;
	TBAAAccessInfo TBAAInfo;
	CharUnits Alignment =
	CGM.getNaturalTypeAlignment(RetTy, &BaseInfo, &TBAAInfo);
	Address ArgAddr(&*AI, ConvertType(RetTy), Alignment);
	LValue ArgVal =
	LValue::MakeAddr(ArgAddr, RetTy, getContext(), BaseInfo, TBAAInfo);
	EmitStoreOfScalar(
	EmitLoadOfScalar(MakeAddrLValue(ReturnValue, RetTy), EndLoc), ArgVal,
	/isInit/ true);
	break;
	}
	}
	break;
	}

	case ABIArgInfo::Extend:
	case ABIArgInfo::Direct:
	if (RetAI.getCoerceToType() == ConvertType(RetTy) &&
	RetAI.getDirectOffset() == 0) {
	// The internal return value temp always will have pointer-to-return-type
	// type, just do a load.

	// If there is a dominating store to ReturnValue, we can elide
	// the load, zap the store, and usually zap the alloca.
	if (llvm::StoreInst *SI =
	findDominatingStoreToReturnValue(*this)) {
	// Reuse the debug location from the store unless there is
	// cleanup code to be emitted between the store and return
	// instruction.
	if (EmitRetDbgLoc && !AutoreleaseResult)
	RetDbgLoc = SI->getDebugLoc();
	// Get the stored value and nuke the now-dead store.
	RV = SI->getValueOperand();
	SI->eraseFromParent();

	// Otherwise, we have to do a simple load.
	} else {
	RV = Builder.CreateLoad(ReturnValue);
	}
	} else {
	// If the value is offset in memory, apply the offset now.
	Address V = emitAddressAtOffset(*this, ReturnValue, RetAI);

	RV = CreateCoercedLoad(V, RetAI.getCoerceToType(), *this);
	}

	// In ARC, end functions that return a retainable type with a call
	// to objc_autoreleaseReturnValue.
	if (AutoreleaseResult) {
	#ifndef NDEBUG
	// Type::isObjCRetainabletype has to be called on a QualType that hasn't
	// been stripped of the typedefs, so we cannot use RetTy here. Get the
	// original return type of FunctionDecl, CurCodeDecl, and BlockDecl from
	// CurCodeDecl or BlockInfo.
	QualType RT;

	if (auto *FD = dyn_cast<FunctionDecl>(CurCodeDecl))
	RT = FD->getReturnType();
	else if (auto *MD = dyn_cast<ObjCMethodDecl>(CurCodeDecl))
	RT = MD->getReturnType();
	else if (isa<BlockDecl>(CurCodeDecl))
	RT = BlockInfo->BlockExpression->getFunctionType()->getReturnType();
	else
	llvm_unreachable("Unexpected function/method type");

	assert(getLangOpts().ObjCAutoRefCount &&
	!FI.isReturnsRetained() &&
	RT->isObjCRetainableType());
	#endif
	RV = emitAutoreleaseOfResult(*this, RV);
	}

	break;

	case ABIArgInfo::Ignore:
	break;

	case ABIArgInfo::CoerceAndExpand: {
	auto coercionType = RetAI.getCoerceAndExpandType();

	// Load all of the coerced elements out into results.
	llvm::SmallVector<llvm::Value*, 4> results;
	Address addr = ReturnValue.withElementType(coercionType);
	for (unsigned i = 0, e = coercionType->getNumElements(); i != e; ++i) {
	auto coercedEltType = coercionType->getElementType(i);
	if (ABIArgInfo::isPaddingForCoerceAndExpand(coercedEltType))
	continue;

	auto eltAddr = Builder.CreateStructGEP(addr, i);
	auto elt = Builder.CreateLoad(eltAddr);
	results.push_back(elt);
	}

	// If we have one result, it's the single direct result type.
	if (results.size() == 1) {
	RV = results[0];

	// Otherwise, we need to make a first-class aggregate.
	} else {
	// Construct a return type that lacks padding elements.
	llvm::Type *returnType = RetAI.getUnpaddedCoerceAndExpandType();

	RV = llvm::PoisonValue::get(returnType);
	for (unsigned i = 0, e = results.size(); i != e; ++i) {
	RV = Builder.CreateInsertValue(RV, results[i], i);
	}
	}
	break;
	}
	case ABIArgInfo::Expand:
	case ABIArgInfo::IndirectAliased:
	llvm_unreachable("Invalid ABI kind for return argument");
	}

	llvm::Instruction *Ret;
	if (RV) {
	if (CurFuncDecl && CurFuncDecl->hasAttr<CmseNSEntryAttr>()) {
	// For certain return types, clear padding bits, as they may reveal
	// sensitive information.
	// Small struct/union types are passed as integers.
	auto *ITy = dyn_cast<llvm::IntegerType>(RV->getType());
	if (ITy != nullptr && isa<RecordType>(RetTy.getCanonicalType()))
	RV = EmitCMSEClearRecord(RV, ITy, RetTy);
	}
	EmitReturnValueCheck(RV);
	Ret = Builder.CreateRet(RV);
	} else {
	Ret = Builder.CreateRetVoid();
	}

	if (RetDbgLoc)
	Ret->setDebugLoc(std::move(RetDbgLoc));
	}

	void CodeGenFunction::EmitReturnValueCheck(llvm::Value *RV) {
	// A current decl may not be available when emitting vtable thunks.
	if (!CurCodeDecl)
	return;

	// If the return block isn't reachable, neither is this check, so don't emit
	// it.
	if (ReturnBlock.isValid() && ReturnBlock.getBlock()->use_empty())
	return;

	ReturnsNonNullAttr *RetNNAttr = nullptr;
	if (SanOpts.has(SanitizerKind::ReturnsNonnullAttribute))
	RetNNAttr = CurCodeDecl->getAttr<ReturnsNonNullAttr>();

	if (!RetNNAttr && !requiresReturnValueNullabilityCheck())
	return;

	// Prefer the returns_nonnull attribute if it's present.
	SourceLocation AttrLoc;
	SanitizerMask CheckKind;
	SanitizerHandler Handler;
	if (RetNNAttr) {
	assert(!requiresReturnValueNullabilityCheck() &&
	"Cannot check nullability and the nonnull attribute");
	AttrLoc = RetNNAttr->getLocation();
	CheckKind = SanitizerKind::ReturnsNonnullAttribute;
	Handler = SanitizerHandler::NonnullReturn;
	} else {
	if (auto *DD = dyn_cast<DeclaratorDecl>(CurCodeDecl))
	if (auto *TSI = DD->getTypeSourceInfo())
	if (auto FTL = TSI->getTypeLoc().getAsAdjusted<FunctionTypeLoc>())
	AttrLoc = FTL.getReturnLoc().findNullabilityLoc();
	CheckKind = SanitizerKind::NullabilityReturn;
	Handler = SanitizerHandler::NullabilityReturn;
	}

	SanitizerScope SanScope(this);

	// Make sure the "return" source location is valid. If we're checking a
	// nullability annotation, make sure the preconditions for the check are met.
	llvm::BasicBlock *Check = createBasicBlock("nullcheck");
	llvm::BasicBlock *NoCheck = createBasicBlock("no.nullcheck");
	llvm::Value *SLocPtr = Builder.CreateLoad(ReturnLocation, "return.sloc.load");
	llvm::Value *CanNullCheck = Builder.CreateIsNotNull(SLocPtr);
	if (requiresReturnValueNullabilityCheck())
	CanNullCheck =
	Builder.CreateAnd(CanNullCheck, RetValNullabilityPrecondition);
	Builder.CreateCondBr(CanNullCheck, Check, NoCheck);
	EmitBlock(Check);

	// Now do the null check.
	llvm::Value *Cond = Builder.CreateIsNotNull(RV);
	llvm::Constant *StaticData[] = {EmitCheckSourceLocation(AttrLoc)};
	llvm::Value *DynamicData[] = {SLocPtr};
	EmitCheck(std::make_pair(Cond, CheckKind), Handler, StaticData, DynamicData);

	EmitBlock(NoCheck);

	#ifndef NDEBUG
	// The return location should not be used after the check has been emitted.
	ReturnLocation = Address::invalid();
	#endif
	}

	static bool isInAllocaArgument(CGCXXABI &ABI, QualType type) {
	const CXXRecordDecl *RD = type->getAsCXXRecordDecl();
	return RD && ABI.getRecordArgABI(RD) == CGCXXABI::RAA_DirectInMemory;
	}

	static AggValueSlot createPlaceholderSlot(CodeGenFunction &CGF,
	QualType Ty) {
	// FIXME: Generate IR in one pass, rather than going back and fixing up these
	// placeholders.
	llvm::Type *IRTy = CGF.ConvertTypeForMem(Ty);
	llvm::Type *IRPtrTy = llvm::PointerType::getUnqual(CGF.getLLVMContext());
	llvm::Value *Placeholder = llvm::PoisonValue::get(IRPtrTy);

	// FIXME: When we generate this IR in one pass, we shouldn't need
	// this win32-specific alignment hack.
	CharUnits Align = CharUnits::fromQuantity(4);
	Placeholder = CGF.Builder.CreateAlignedLoad(IRPtrTy, Placeholder, Align);

	return AggValueSlot::forAddr(Address(Placeholder, IRTy, Align),
	Ty.getQualifiers(),
	AggValueSlot::IsNotDestructed,
	AggValueSlot::DoesNotNeedGCBarriers,
	AggValueSlot::IsNotAliased,
	AggValueSlot::DoesNotOverlap);
	}

	void CodeGenFunction::EmitDelegateCallArg(CallArgList &args,
	const VarDecl *param,
	SourceLocation loc) {
	// StartFunction converted the ABI-lowered parameter(s) into a
	// local alloca. We need to turn that into an r-value suitable
	// for EmitCall.
	Address local = GetAddrOfLocalVar(param);

	QualType type = param->getType();

	// GetAddrOfLocalVar returns a pointer-to-pointer for references,
	// but the argument needs to be the original pointer.
	if (type->isReferenceType()) {
	args.add(RValue::get(Builder.CreateLoad(local)), type);

	// In ARC, move out of consumed arguments so that the release cleanup
	// entered by StartFunction doesn't cause an over-release. This isn't
	// optimal -O0 code generation, but it should get cleaned up when
	// optimization is enabled. This also assumes that delegate calls are
	// performed exactly once for a set of arguments, but that should be safe.
	} else if (getLangOpts().ObjCAutoRefCount &&
	param->hasAttr<NSConsumedAttr>() &&
	type->isObjCRetainableType()) {
	llvm::Value *ptr = Builder.CreateLoad(local);
	auto null =
	llvm::ConstantPointerNull::get(cast<llvm::PointerType>(ptr->getType()));
	Builder.CreateStore(null, local);
	args.add(RValue::get(ptr), type);

	// For the most part, we just need to load the alloca, except that
	// aggregate r-values are actually pointers to temporaries.
	} else {
	args.add(convertTempToRValue(local, type, loc), type);
	}

	// Deactivate the cleanup for the callee-destructed param that was pushed.
	if (type->isRecordType() && !CurFuncIsThunk &&
	type->castAs<RecordType>()->getDecl()->isParamDestroyedInCallee() &&
	param->needsDestruction(getContext())) {
	EHScopeStack::stable_iterator cleanup =
	CalleeDestructedParamCleanups.lookup(cast<ParmVarDecl>(param));
	assert(cleanup.isValid() &&
	"cleanup for callee-destructed param not recorded");
	// This unreachable is a temporary marker which will be removed later.
	llvm::Instruction *isActive = Builder.CreateUnreachable();
	args.addArgCleanupDeactivation(cleanup, isActive);
	}
	}

	static bool isProvablyNull(llvm::Value *addr) {
	return llvm::isa_and_nonnull<llvm::ConstantPointerNull>(addr);
	}

	static bool isProvablyNonNull(Address Addr, CodeGenFunction &CGF) {
	return llvm::isKnownNonZero(Addr.getBasePointer(), CGF.CGM.getDataLayout());
	}

	/// Emit the actual writing-back of a writeback.
	static void emitWriteback(CodeGenFunction &CGF,
	const CallArgList::Writeback &writeback) {
	const LValue &srcLV = writeback.Source;
	Address srcAddr = srcLV.getAddress();
	assert(!isProvablyNull(srcAddr.getBasePointer()) &&
	"shouldn't have writeback for provably null argument");

	llvm::BasicBlock *contBB = nullptr;

	// If the argument wasn't provably non-null, we need to null check
	// before doing the store.
	bool provablyNonNull = isProvablyNonNull(srcAddr, CGF);

	if (!provablyNonNull) {
	llvm::BasicBlock *writebackBB = CGF.createBasicBlock("icr.writeback");
	contBB = CGF.createBasicBlock("icr.done");

	llvm::Value *isNull = CGF.Builder.CreateIsNull(srcAddr, "icr.isnull");
	CGF.Builder.CreateCondBr(isNull, contBB, writebackBB);
	CGF.EmitBlock(writebackBB);
	}

	// Load the value to writeback.
	llvm::Value *value = CGF.Builder.CreateLoad(writeback.Temporary);

	// Cast it back, in case we're writing an id to a Foo* or something.
	value = CGF.Builder.CreateBitCast(value, srcAddr.getElementType(),
	"icr.writeback-cast");

	// Perform the writeback.

	// If we have a "to use" value, it's something we need to emit a use
	// of. This has to be carefully threaded in: if it's done after the
	// release it's potentially undefined behavior (and the optimizer
	// will ignore it), and if it happens before the retain then the
	// optimizer could move the release there.
	if (writeback.ToUse) {
	assert(srcLV.getObjCLifetime() == Qualifiers::OCL_Strong);

	// Retain the new value. No need to block-copy here: the block's
	// being passed up the stack.
	value = CGF.EmitARCRetainNonBlock(value);

	// Emit the intrinsic use here.
	CGF.EmitARCIntrinsicUse(writeback.ToUse);

	// Load the old value (primitively).
	llvm::Value *oldValue = CGF.EmitLoadOfScalar(srcLV, SourceLocation());

	// Put the new value in place (primitively).
	CGF.EmitStoreOfScalar(value, srcLV, /init/ false);

	// Release the old value.
	CGF.EmitARCRelease(oldValue, srcLV.isARCPreciseLifetime());

	// Otherwise, we can just do a normal lvalue store.
	} else {
	CGF.EmitStoreThroughLValue(RValue::get(value), srcLV);
	}

	// Jump to the continuation block.
	if (!provablyNonNull)
	CGF.EmitBlock(contBB);
	}

	static void emitWritebacks(CodeGenFunction &CGF,
	const CallArgList &args) {
	for (const auto &I : args.writebacks())
	emitWriteback(CGF, I);
	}

	static void deactivateArgCleanupsBeforeCall(CodeGenFunction &CGF,
	const CallArgList &CallArgs) {
	ArrayRef<CallArgList::CallArgCleanup> Cleanups =
	CallArgs.getCleanupsToDeactivate();
	// Iterate in reverse to increase the likelihood of popping the cleanup.
	for (const auto &I : llvm::reverse(Cleanups)) {
	CGF.DeactivateCleanupBlock(I.Cleanup, I.IsActiveIP);
	I.IsActiveIP->eraseFromParent();
	}
	}

	static const Expr maybeGetUnaryAddrOfOperand(const Expr E) {
	if (const UnaryOperator *uop = dyn_cast<UnaryOperator>(E->IgnoreParens()))
	if (uop->getOpcode() == UO_AddrOf)
	return uop->getSubExpr();
	return nullptr;
	}

	/// Emit an argument that's being passed call-by-writeback. That is,
	/// we are passing the address of an __autoreleased temporary; it
	/// might be copy-initialized with the current value of the given
	/// address, but it will definitely be copied out of after the call.
	static void emitWritebackArg(CodeGenFunction &CGF, CallArgList &args,
	const ObjCIndirectCopyRestoreExpr *CRE) {
	LValue srcLV;

	// Make an optimistic effort to emit the address as an l-value.
	// This can fail if the argument expression is more complicated.
	if (const Expr *lvExpr = maybeGetUnaryAddrOfOperand(CRE->getSubExpr())) {
	srcLV = CGF.EmitLValue(lvExpr);

	// Otherwise, just emit it as a scalar.
	} else {
	Address srcAddr = CGF.EmitPointerWithAlignment(CRE->getSubExpr());

	QualType srcAddrType =
	CRE->getSubExpr()->getType()->castAs<PointerType>()->getPointeeType();
	srcLV = CGF.MakeAddrLValue(srcAddr, srcAddrType);
	}
	Address srcAddr = srcLV.getAddress();

	// The dest and src types don't necessarily match in LLVM terms
	// because of the crazy ObjC compatibility rules.

	llvm::PointerType *destType =
	cast<llvm::PointerType>(CGF.ConvertType(CRE->getType()));
	llvm::Type *destElemType =
	CGF.ConvertTypeForMem(CRE->getType()->getPointeeType());

	// If the address is a constant null, just pass the appropriate null.
	if (isProvablyNull(srcAddr.getBasePointer())) {
	args.add(RValue::get(llvm::ConstantPointerNull::get(destType)),
	CRE->getType());
	return;
	}

	// Create the temporary.
	Address temp =
	CGF.CreateTempAlloca(destElemType, CGF.getPointerAlign(), "icr.temp");
	// Loading an l-value can introduce a cleanup if the l-value is __weak,
	// and that cleanup will be conditional if we can't prove that the l-value
	// isn't null, so we need to register a dominating point so that the cleanups
	// system will make valid IR.
	CodeGenFunction::ConditionalEvaluation condEval(CGF);

	// Zero-initialize it if we're not doing a copy-initialization.
	bool shouldCopy = CRE->shouldCopy();
	if (!shouldCopy) {
	llvm::Value *null =
	llvm::ConstantPointerNull::get(cast<llvm::PointerType>(destElemType));
	CGF.Builder.CreateStore(null, temp);
	}

	llvm::BasicBlock *contBB = nullptr;
	llvm::BasicBlock *originBB = nullptr;

	// If the address is not known to be non-null, we need to switch.
	llvm::Value *finalArgument;

	bool provablyNonNull = isProvablyNonNull(srcAddr, CGF);

	if (provablyNonNull) {
	finalArgument = temp.emitRawPointer(CGF);
	} else {
	llvm::Value *isNull = CGF.Builder.CreateIsNull(srcAddr, "icr.isnull");

	finalArgument = CGF.Builder.CreateSelect(
	isNull, llvm::ConstantPointerNull::get(destType),
	temp.emitRawPointer(CGF), "icr.argument");

	// If we need to copy, then the load has to be conditional, which
	// means we need control flow.
	if (shouldCopy) {
	originBB = CGF.Builder.GetInsertBlock();
	contBB = CGF.createBasicBlock("icr.cont");
	llvm::BasicBlock *copyBB = CGF.createBasicBlock("icr.copy");
	CGF.Builder.CreateCondBr(isNull, contBB, copyBB);
	CGF.EmitBlock(copyBB);
	condEval.begin(CGF);
	}
	}

	llvm::Value *valueToUse = nullptr;

	// Perform a copy if necessary.
	if (shouldCopy) {
	RValue srcRV = CGF.EmitLoadOfLValue(srcLV, SourceLocation());
	assert(srcRV.isScalar());

	llvm::Value *src = srcRV.getScalarVal();
	src = CGF.Builder.CreateBitCast(src, destElemType, "icr.cast");

	// Use an ordinary store, not a store-to-lvalue.
	CGF.Builder.CreateStore(src, temp);

	// If optimization is enabled, and the value was held in a
	// __strong variable, we need to tell the optimizer that this
	// value has to stay alive until we're doing the store back.
	// This is because the temporary is effectively unretained,
	// and so otherwise we can violate the high-level semantics.
	if (CGF.CGM.getCodeGenOpts().OptimizationLevel != 0 &&
	srcLV.getObjCLifetime() == Qualifiers::OCL_Strong) {
	valueToUse = src;
	}
	}

	// Finish the control flow if we needed it.
	if (shouldCopy && !provablyNonNull) {
	llvm::BasicBlock *copyBB = CGF.Builder.GetInsertBlock();
	CGF.EmitBlock(contBB);

	// Make a phi for the value to intrinsically use.
	if (valueToUse) {
	llvm::PHINode *phiToUse = CGF.Builder.CreatePHI(valueToUse->getType(), 2,
	"icr.to-use");
	phiToUse->addIncoming(valueToUse, copyBB);
	phiToUse->addIncoming(llvm::UndefValue::get(valueToUse->getType()),
	originBB);
	valueToUse = phiToUse;
	}

	condEval.end(CGF);
	}

	args.addWriteback(srcLV, temp, valueToUse);
	args.add(RValue::get(finalArgument), CRE->getType());
	}

	void CallArgList::allocateArgumentMemory(CodeGenFunction &CGF) {
	assert(!StackBase);

	// Save the stack.
	StackBase = CGF.Builder.CreateStackSave("inalloca.save");
	}

	void CallArgList::freeArgumentMemory(CodeGenFunction &CGF) const {
	if (StackBase) {
	// Restore the stack after the call.
	CGF.Builder.CreateStackRestore(StackBase);
	}
	}

	void CodeGenFunction::EmitNonNullArgCheck(RValue RV, QualType ArgType,
	SourceLocation ArgLoc,
	AbstractCallee AC,
	unsigned ParmNum) {
	if (!AC.getDecl() \|\| !(SanOpts.has(SanitizerKind::NonnullAttribute) \|\|
	SanOpts.has(SanitizerKind::NullabilityArg)))
	return;

	// The param decl may be missing in a variadic function.
	auto PVD = ParmNum < AC.getNumParams() ? AC.getParamDecl(ParmNum) : nullptr;
	unsigned ArgNo = PVD ? PVD->getFunctionScopeIndex() : ParmNum;

	// Prefer the nonnull attribute if it's present.
	const NonNullAttr *NNAttr = nullptr;
	if (SanOpts.has(SanitizerKind::NonnullAttribute))
	NNAttr = getNonNullAttr(AC.getDecl(), PVD, ArgType, ArgNo);

	bool CanCheckNullability = false;
	if (SanOpts.has(SanitizerKind::NullabilityArg) && !NNAttr && PVD &&
	!PVD->getType()->isRecordType()) {
	auto Nullability = PVD->getType()->getNullability();
	CanCheckNullability = Nullability &&
	*Nullability == NullabilityKind::NonNull &&
	PVD->getTypeSourceInfo();
	}

	if (!NNAttr && !CanCheckNullability)
	return;

	SourceLocation AttrLoc;
	SanitizerMask CheckKind;
	SanitizerHandler Handler;
	if (NNAttr) {
	AttrLoc = NNAttr->getLocation();
	CheckKind = SanitizerKind::NonnullAttribute;
	Handler = SanitizerHandler::NonnullArg;
	} else {
	AttrLoc = PVD->getTypeSourceInfo()->getTypeLoc().findNullabilityLoc();
	CheckKind = SanitizerKind::NullabilityArg;
	Handler = SanitizerHandler::NullabilityArg;
	}

	SanitizerScope SanScope(this);
	llvm::Value *Cond = EmitNonNullRValueCheck(RV, ArgType);
	llvm::Constant *StaticData[] = {
	EmitCheckSourceLocation(ArgLoc), EmitCheckSourceLocation(AttrLoc),
	llvm::ConstantInt::get(Int32Ty, ArgNo + 1),
	};
	EmitCheck(std::make_pair(Cond, CheckKind), Handler, StaticData, std::nullopt);
	}

	void CodeGenFunction::EmitNonNullArgCheck(Address Addr, QualType ArgType,
	SourceLocation ArgLoc,
	AbstractCallee AC, unsigned ParmNum) {
	if (!AC.getDecl() \|\| !(SanOpts.has(SanitizerKind::NonnullAttribute) \|\|
	SanOpts.has(SanitizerKind::NullabilityArg)))
	return;

	EmitNonNullArgCheck(RValue::get(Addr, *this), ArgType, ArgLoc, AC, ParmNum);
	}

	// Check if the call is going to use the inalloca convention. This needs to
	// agree with CGFunctionInfo::usesInAlloca. The CGFunctionInfo is arranged
	// later, so we can't check it directly.
	static bool hasInAllocaArgs(CodeGenModule &CGM, CallingConv ExplicitCC,
	ArrayRef<QualType> ArgTypes) {
	// The Swift calling conventions don't go through the target-specific
	// argument classification, they never use inalloca.
	// TODO: Consider limiting inalloca use to only calling conventions supported
	// by MSVC.
	if (ExplicitCC == CC_Swift \|\| ExplicitCC == CC_SwiftAsync)
	return false;
	if (!CGM.getTarget().getCXXABI().isMicrosoft())
	return false;
	return llvm::any_of(ArgTypes, [&](QualType Ty) {
	return isInAllocaArgument(CGM.getCXXABI(), Ty);
	});
	}

	#ifndef NDEBUG
	// Determine whether the given argument is an Objective-C method
	// that may have type parameters in its signature.
	static bool isObjCMethodWithTypeParams(const ObjCMethodDecl *method) {
	const DeclContext *dc = method->getDeclContext();
	if (const ObjCInterfaceDecl *classDecl = dyn_cast<ObjCInterfaceDecl>(dc)) {
	return classDecl->getTypeParamListAsWritten();
	}

	if (const ObjCCategoryDecl *catDecl = dyn_cast<ObjCCategoryDecl>(dc)) {
	return catDecl->getTypeParamList();
	}

	return false;
	}
	#endif

	/// EmitCallArgs - Emit call arguments for a function.
	void CodeGenFunction::EmitCallArgs(
	CallArgList &Args, PrototypeWrapper Prototype,
	llvm::iterator_range<CallExpr::const_arg_iterator> ArgRange,
	AbstractCallee AC, unsigned ParamsToSkip, EvaluationOrder Order) {
	SmallVector<QualType, 16> ArgTypes;

	assert((ParamsToSkip == 0 \|\| Prototype.P) &&
	"Can't skip parameters if type info is not provided");

	// This variable only captures explicitly written conventions, not those
	// applied by default via command line flags or target defaults, such as
	// thiscall, aapcs, stdcall via -mrtd, etc. Computing that correctly would
	// require knowing if this is a C++ instance method or being able to see
	// unprototyped FunctionTypes.
	CallingConv ExplicitCC = CC_C;

	// First, if a prototype was provided, use those argument types.
	bool IsVariadic = false;
	if (Prototype.P) {
	const auto MD = Prototype.P.dyn_cast<const ObjCMethodDecl >();
	if (MD) {
	IsVariadic = MD->isVariadic();
	ExplicitCC = getCallingConventionForDecl(
	MD, CGM.getTarget().getTriple().isOSWindows());
	ArgTypes.assign(MD->param_type_begin() + ParamsToSkip,
	MD->param_type_end());
	} else {
	const auto FPT = Prototype.P.get<const FunctionProtoType >();
	IsVariadic = FPT->isVariadic();
	ExplicitCC = FPT->getExtInfo().getCC();
	ArgTypes.assign(FPT->param_type_begin() + ParamsToSkip,
	FPT->param_type_end());
	}

	#ifndef NDEBUG
	// Check that the prototyped types match the argument expression types.
	bool isGenericMethod = MD && isObjCMethodWithTypeParams(MD);
	CallExpr::const_arg_iterator Arg = ArgRange.begin();
	for (QualType Ty : ArgTypes) {
	assert(Arg != ArgRange.end() && "Running over edge of argument list!");
	assert(
	(isGenericMethod \|\| Ty->isVariablyModifiedType() \|\|
	Ty.getNonReferenceType()->isObjCRetainableType() \|\|
	getContext()
	.getCanonicalType(Ty.getNonReferenceType())
	.getTypePtr() ==
	getContext().getCanonicalType((*Arg)->getType()).getTypePtr()) &&
	"type mismatch in call argument!");
	++Arg;
	}

	// Either we've emitted all the call args, or we have a call to variadic
	// function.
	assert((Arg == ArgRange.end() \|\| IsVariadic) &&
	"Extra arguments in non-variadic function!");
	#endif
	}

	// If we still have any arguments, emit them using the type of the argument.
	for (auto *A : llvm::drop_begin(ArgRange, ArgTypes.size()))
	ArgTypes.push_back(IsVariadic ? getVarArgType(A) : A->getType());
	assert((int)ArgTypes.size() == (ArgRange.end() - ArgRange.begin()));

	// We must evaluate arguments from right to left in the MS C++ ABI,
	// because arguments are destroyed left to right in the callee. As a special
	// case, there are certain language constructs that require left-to-right
	// evaluation, and in those cases we consider the evaluation order requirement
	// to trump the "destruction order is reverse construction order" guarantee.
	bool LeftToRight =
	CGM.getTarget().getCXXABI().areArgsDestroyedLeftToRightInCallee()
	? Order == EvaluationOrder::ForceLeftToRight
	: Order != EvaluationOrder::ForceRightToLeft;

	auto MaybeEmitImplicitObjectSize = [&](unsigned I, const Expr *Arg,
	RValue EmittedArg) {
	if (!AC.hasFunctionDecl() \|\| I >= AC.getNumParams())
	return;
	auto *PS = AC.getParamDecl(I)->getAttr<PassObjectSizeAttr>();
	if (PS == nullptr)
	return;

	const auto &Context = getContext();
	auto SizeTy = Context.getSizeType();
	auto T = Builder.getIntNTy(Context.getTypeSize(SizeTy));
	assert(EmittedArg.getScalarVal() && "We emitted nothing for the arg?");
	llvm::Value *V = evaluateOrEmitBuiltinObjectSize(Arg, PS->getType(), T,
	EmittedArg.getScalarVal(),
	PS->isDynamic());
	Args.add(RValue::get(V), SizeTy);
	// If we're emitting args in reverse, be sure to do so with
	// pass_object_size, as well.
	if (!LeftToRight)
	std::swap(Args.back(), *(&Args.back() - 1));
	};

	// Insert a stack save if we're going to need any inalloca args.
	if (hasInAllocaArgs(CGM, ExplicitCC, ArgTypes)) {
	assert(getTarget().getTriple().getArch() == llvm::Triple::x86 &&
	"inalloca only supported on x86");
	Args.allocateArgumentMemory(*this);
	}

	// Evaluate each argument in the appropriate order.
	size_t CallArgsStart = Args.size();
	for (unsigned I = 0, E = ArgTypes.size(); I != E; ++I) {
	unsigned Idx = LeftToRight ? I : E - I - 1;
	CallExpr::const_arg_iterator Arg = ArgRange.begin() + Idx;
	unsigned InitialArgSize = Args.size();
	// If *Arg is an ObjCIndirectCopyRestoreExpr, check that either the types of
	// the argument and parameter match or the objc method is parameterized.
	assert((!isa<ObjCIndirectCopyRestoreExpr>(*Arg) \|\|
	getContext().hasSameUnqualifiedType((*Arg)->getType(),
	ArgTypes[Idx]) \|\|
	(isa<ObjCMethodDecl>(AC.getDecl()) &&
	isObjCMethodWithTypeParams(cast<ObjCMethodDecl>(AC.getDecl())))) &&
	"Argument and parameter types don't match");
	EmitCallArg(Args, *Arg, ArgTypes[Idx]);
	// In particular, we depend on it being the last arg in Args, and the
	// objectsize bits depend on there only being one arg if !LeftToRight.
	assert(InitialArgSize + 1 == Args.size() &&
	"The code below depends on only adding one arg per EmitCallArg");
	(void)InitialArgSize;
	// Since pointer argument are never emitted as LValue, it is safe to emit
	// non-null argument check for r-value only.
	if (!Args.back().hasLValue()) {
	RValue RVArg = Args.back().getKnownRValue();
	EmitNonNullArgCheck(RVArg, ArgTypes[Idx], (*Arg)->getExprLoc(), AC,
	ParamsToSkip + Idx);
	// @llvm.objectsize should never have side-effects and shouldn't need
	// destruction/cleanups, so we can safely "emit" it after its arg,
	// regardless of right-to-leftness
	MaybeEmitImplicitObjectSize(Idx, *Arg, RVArg);
	}
	}

	if (!LeftToRight) {
	// Un-reverse the arguments we just evaluated so they match up with the LLVM
	// IR function.
	std::reverse(Args.begin() + CallArgsStart, Args.end());
	}
	}

	namespace {

	struct DestroyUnpassedArg final : EHScopeStack::Cleanup {
	DestroyUnpassedArg(Address Addr, QualType Ty)
	: Addr(Addr), Ty(Ty) {}

	Address Addr;
	QualType Ty;

	void Emit(CodeGenFunction &CGF, Flags flags) override {
	QualType::DestructionKind DtorKind = Ty.isDestructedType();
	if (DtorKind == QualType::DK_cxx_destructor) {
	const CXXDestructorDecl *Dtor = Ty->getAsCXXRecordDecl()->getDestructor();
	assert(!Dtor->isTrivial());
	CGF.EmitCXXDestructorCall(Dtor, Dtor_Complete, /for vbase/ false,
	/Delegating=/false, Addr, Ty);
	} else {
	CGF.callCStructDestructor(CGF.MakeAddrLValue(Addr, Ty));
	}
	}
	};

	struct DisableDebugLocationUpdates {
	CodeGenFunction &CGF;
	bool disabledDebugInfo;
	DisableDebugLocationUpdates(CodeGenFunction &CGF, const Expr *E) : CGF(CGF) {
	if ((disabledDebugInfo = isa<CXXDefaultArgExpr>(E) && CGF.getDebugInfo()))
	CGF.disableDebugInfo();
	}
	~DisableDebugLocationUpdates() {
	if (disabledDebugInfo)
	CGF.enableDebugInfo();
	}
	};

	} // end anonymous namespace

	RValue CallArg::getRValue(CodeGenFunction &CGF) const {
	if (!HasLV)
	return RV;
	LValue Copy = CGF.MakeAddrLValue(CGF.CreateMemTemp(Ty), Ty);
	CGF.EmitAggregateCopy(Copy, LV, Ty, AggValueSlot::DoesNotOverlap,
	LV.isVolatile());
	IsUsed = true;
	return RValue::getAggregate(Copy.getAddress());
	}

	void CallArg::copyInto(CodeGenFunction &CGF, Address Addr) const {
	LValue Dst = CGF.MakeAddrLValue(Addr, Ty);
	if (!HasLV && RV.isScalar())
	CGF.EmitStoreOfScalar(RV.getScalarVal(), Dst, /isInit=/true);
	else if (!HasLV && RV.isComplex())
	CGF.EmitStoreOfComplex(RV.getComplexVal(), Dst, /init=/true);
	else {
	auto Addr = HasLV ? LV.getAddress() : RV.getAggregateAddress();
	LValue SrcLV = CGF.MakeAddrLValue(Addr, Ty);
	// We assume that call args are never copied into subobjects.
	CGF.EmitAggregateCopy(Dst, SrcLV, Ty, AggValueSlot::DoesNotOverlap,
	HasLV ? LV.isVolatileQualified()
	: RV.isVolatileQualified());
	}
	IsUsed = true;
	}

	void CodeGenFunction::EmitCallArg(CallArgList &args, const Expr *E,
	QualType type) {
	DisableDebugLocationUpdates Dis(*this, E);
	if (const ObjCIndirectCopyRestoreExpr *CRE
	= dyn_cast<ObjCIndirectCopyRestoreExpr>(E)) {
	assert(getLangOpts().ObjCAutoRefCount);
	return emitWritebackArg(*this, args, CRE);
	}

	assert(type->isReferenceType() == E->isGLValue() &&
	"reference binding to unmaterialized r-value!");

	if (E->isGLValue()) {
	assert(E->getObjectKind() == OK_Ordinary);
	return args.add(EmitReferenceBindingToExpr(E), type);
	}

	bool HasAggregateEvalKind = hasAggregateEvaluationKind(type);

	// In the Microsoft C++ ABI, aggregate arguments are destructed by the callee.
	// However, we still have to push an EH-only cleanup in case we unwind before
	// we make it to the call.
	if (type->isRecordType() &&
	type->castAs<RecordType>()->getDecl()->isParamDestroyedInCallee()) {
	// If we're using inalloca, use the argument memory. Otherwise, use a
	// temporary.
	AggValueSlot Slot = args.isUsingInAlloca()
	? createPlaceholderSlot(*this, type) : CreateAggTemp(type, "agg.tmp");

	bool DestroyedInCallee = true, NeedsCleanup = true;
	if (const auto *RD = type->getAsCXXRecordDecl())
	DestroyedInCallee = RD->hasNonTrivialDestructor();
	else
	NeedsCleanup = type.isDestructedType();

	if (DestroyedInCallee)
	Slot.setExternallyDestructed();

	EmitAggExpr(E, Slot);
	RValue RV = Slot.asRValue();
	args.add(RV, type);

	if (DestroyedInCallee && NeedsCleanup) {
	// Create a no-op GEP between the placeholder and the cleanup so we can
	// RAUW it successfully. It also serves as a marker of the first
	// instruction where the cleanup is active.
	pushFullExprCleanup<DestroyUnpassedArg>(NormalAndEHCleanup,
	Slot.getAddress(), type);
	// This unreachable is a temporary marker which will be removed later.
	llvm::Instruction *IsActive =
	Builder.CreateFlagLoad(llvm::Constant::getNullValue(Int8PtrTy));
	args.addArgCleanupDeactivation(EHStack.stable_begin(), IsActive);
	}
	return;
	}

	if (HasAggregateEvalKind && isa<ImplicitCastExpr>(E) &&
	cast<CastExpr>(E)->getCastKind() == CK_LValueToRValue &&
	!type->isArrayParameterType()) {
	LValue L = EmitLValue(cast<CastExpr>(E)->getSubExpr());
	assert(L.isSimple());
	args.addUncopiedAggregate(L, type);
	return;
	}

	args.add(EmitAnyExprToTemp(E), type);
	}

	QualType CodeGenFunction::getVarArgType(const Expr *Arg) {
	// System headers on Windows define NULL to 0 instead of 0LL on Win64. MSVC
	// implicitly widens null pointer constants that are arguments to varargs
	// functions to pointer-sized ints.
	if (!getTarget().getTriple().isOSWindows())
	return Arg->getType();

	if (Arg->getType()->isIntegerType() &&
	getContext().getTypeSize(Arg->getType()) <
	getContext().getTargetInfo().getPointerWidth(LangAS::Default) &&
	Arg->isNullPointerConstant(getContext(),
	Expr::NPC_ValueDependentIsNotNull)) {
	return getContext().getIntPtrType();
	}

	return Arg->getType();
	}

	// In ObjC ARC mode with no ObjC ARC exception safety, tell the ARC
	// optimizer it can aggressively ignore unwind edges.
	void
	CodeGenFunction::AddObjCARCExceptionMetadata(llvm::Instruction *Inst) {
	if (CGM.getCodeGenOpts().OptimizationLevel != 0 &&
	!CGM.getCodeGenOpts().ObjCAutoRefCountExceptions)
	Inst->setMetadata("clang.arc.no_objc_arc_exceptions",
	CGM.getNoObjCARCExceptionsMetadata());
	}

	/// Emits a call to the given no-arguments nounwind runtime function.
	llvm::CallInst *
	CodeGenFunction::EmitNounwindRuntimeCall(llvm::FunctionCallee callee,
	const llvm::Twine &name) {
	return EmitNounwindRuntimeCall(callee, ArrayRef<llvm::Value *>(), name);
	}

	/// Emits a call to the given nounwind runtime function.
	llvm::CallInst *
	CodeGenFunction::EmitNounwindRuntimeCall(llvm::FunctionCallee callee,
	ArrayRef<Address> args,
	const llvm::Twine &name) {
	SmallVector<llvm::Value *, 3> values;
	for (auto arg : args)
	values.push_back(arg.emitRawPointer(*this));
	return EmitNounwindRuntimeCall(callee, values, name);
	}

	llvm::CallInst *
	CodeGenFunction::EmitNounwindRuntimeCall(llvm::FunctionCallee callee,
	ArrayRef<llvm::Value *> args,
	const llvm::Twine &name) {
	llvm::CallInst *call = EmitRuntimeCall(callee, args, name);
	call->setDoesNotThrow();
	return call;
	}

	/// Emits a simple call (never an invoke) to the given no-arguments
	/// runtime function.
	llvm::CallInst *CodeGenFunction::EmitRuntimeCall(llvm::FunctionCallee callee,
	const llvm::Twine &name) {
	return EmitRuntimeCall(callee, std::nullopt, name);
	}

	// Calls which may throw must have operand bundles indicating which funclet
	// they are nested within.
	SmallVector<llvm::OperandBundleDef, 1>
	CodeGenFunction::getBundlesForFunclet(llvm::Value *Callee) {
	// There is no need for a funclet operand bundle if we aren't inside a
	// funclet.
	if (!CurrentFuncletPad)
	return (SmallVector<llvm::OperandBundleDef, 1>());

	// Skip intrinsics which cannot throw (as long as they don't lower into
	// regular function calls in the course of IR transformations).
	if (auto *CalleeFn = dyn_cast<llvm::Function>(Callee->stripPointerCasts())) {
	if (CalleeFn->isIntrinsic() && CalleeFn->doesNotThrow()) {
	auto IID = CalleeFn->getIntrinsicID();
	if (!llvm::IntrinsicInst::mayLowerToFunctionCall(IID))
	return (SmallVector<llvm::OperandBundleDef, 1>());
	}
	}

	SmallVector<llvm::OperandBundleDef, 1> BundleList;
	BundleList.emplace_back("funclet", CurrentFuncletPad);
	return BundleList;
	}

	/// Emits a simple call (never an invoke) to the given runtime function.
	llvm::CallInst *CodeGenFunction::EmitRuntimeCall(llvm::FunctionCallee callee,
	ArrayRef<llvm::Value *> args,
	const llvm::Twine &name) {
	llvm::CallInst *call = Builder.CreateCall(
	callee, args, getBundlesForFunclet(callee.getCallee()), name);
	call->setCallingConv(getRuntimeCC());

	if (CGM.shouldEmitConvergenceTokens() && call->isConvergent())
	return addControlledConvergenceToken(call);
	return call;
	}

	/// Emits a call or invoke to the given noreturn runtime function.
	void CodeGenFunction::EmitNoreturnRuntimeCallOrInvoke(
	llvm::FunctionCallee callee, ArrayRef<llvm::Value *> args) {
	SmallVector<llvm::OperandBundleDef, 1> BundleList =
	getBundlesForFunclet(callee.getCallee());

	if (getInvokeDest()) {
	llvm::InvokeInst *invoke =
	Builder.CreateInvoke(callee,
	getUnreachableBlock(),
	getInvokeDest(),
	args,
	BundleList);
	invoke->setDoesNotReturn();
	invoke->setCallingConv(getRuntimeCC());
	} else {
	llvm::CallInst *call = Builder.CreateCall(callee, args, BundleList);
	call->setDoesNotReturn();
	call->setCallingConv(getRuntimeCC());
	Builder.CreateUnreachable();
	}
	}

	/// Emits a call or invoke instruction to the given nullary runtime function.
	llvm::CallBase *
	CodeGenFunction::EmitRuntimeCallOrInvoke(llvm::FunctionCallee callee,
	const Twine &name) {
	return EmitRuntimeCallOrInvoke(callee, std::nullopt, name);
	}

	/// Emits a call or invoke instruction to the given runtime function.
	llvm::CallBase *
	CodeGenFunction::EmitRuntimeCallOrInvoke(llvm::FunctionCallee callee,
	ArrayRef<llvm::Value *> args,
	const Twine &name) {
	llvm::CallBase *call = EmitCallOrInvoke(callee, args, name);
	call->setCallingConv(getRuntimeCC());
	return call;
	}

	/// Emits a call or invoke instruction to the given function, depending
	/// on the current state of the EH stack.
	llvm::CallBase *CodeGenFunction::EmitCallOrInvoke(llvm::FunctionCallee Callee,
	ArrayRef<llvm::Value *> Args,
	const Twine &Name) {
	llvm::BasicBlock *InvokeDest = getInvokeDest();
	SmallVector<llvm::OperandBundleDef, 1> BundleList =
	getBundlesForFunclet(Callee.getCallee());

	llvm::CallBase *Inst;
	if (!InvokeDest)
	Inst = Builder.CreateCall(Callee, Args, BundleList, Name);
	else {
	llvm::BasicBlock *ContBB = createBasicBlock("invoke.cont");
	Inst = Builder.CreateInvoke(Callee, ContBB, InvokeDest, Args, BundleList,
	Name);
	EmitBlock(ContBB);
	}

	// In ObjC ARC mode with no ObjC ARC exception safety, tell the ARC
	// optimizer it can aggressively ignore unwind edges.
	if (CGM.getLangOpts().ObjCAutoRefCount)
	AddObjCARCExceptionMetadata(Inst);

	return Inst;
	}

	void CodeGenFunction::deferPlaceholderReplacement(llvm::Instruction *Old,
	llvm::Value *New) {
	DeferredReplacements.push_back(
	std::make_pair(llvm::WeakTrackingVH(Old), New));
	}

	namespace {

	/// Specify given \p NewAlign as the alignment of return value attribute. If
	/// such attribute already exists, re-set it to the maximal one of two options.
	[[nodiscard]] llvm::AttributeList
	maybeRaiseRetAlignmentAttribute(llvm::LLVMContext &Ctx,
	const llvm::AttributeList &Attrs,
	llvm::Align NewAlign) {
	llvm::Align CurAlign = Attrs.getRetAlignment().valueOrOne();
	if (CurAlign >= NewAlign)
	return Attrs;
	llvm::Attribute AlignAttr = llvm::Attribute::getWithAlignment(Ctx, NewAlign);
	return Attrs.removeRetAttribute(Ctx, llvm::Attribute::AttrKind::Alignment)
	.addRetAttribute(Ctx, AlignAttr);
	}

	template <typename AlignedAttrTy> class AbstractAssumeAlignedAttrEmitter {
	protected:
	CodeGenFunction &CGF;

	/// We do nothing if this is, or becomes, nullptr.
	const AlignedAttrTy *AA = nullptr;

	llvm::Value *Alignment = nullptr; // May or may not be a constant.
	llvm::ConstantInt *OffsetCI = nullptr; // Constant, hopefully zero.

	AbstractAssumeAlignedAttrEmitter(CodeGenFunction &CGF_, const Decl *FuncDecl)
	: CGF(CGF_) {
	if (!FuncDecl)
	return;
	AA = FuncDecl->getAttr<AlignedAttrTy>();
	}

	public:
	/// If we can, materialize the alignment as an attribute on return value.
	[[nodiscard]] llvm::AttributeList
	TryEmitAsCallSiteAttribute(const llvm::AttributeList &Attrs) {
	if (!AA \|\| OffsetCI \|\| CGF.SanOpts.has(SanitizerKind::Alignment))
	return Attrs;
	const auto *AlignmentCI = dyn_cast<llvm::ConstantInt>(Alignment);
	if (!AlignmentCI)
	return Attrs;
	// We may legitimately have non-power-of-2 alignment here.
	// If so, this is UB land, emit it via `@llvm.assume` instead.
	if (!AlignmentCI->getValue().isPowerOf2())
	return Attrs;
	llvm::AttributeList NewAttrs = maybeRaiseRetAlignmentAttribute(
	CGF.getLLVMContext(), Attrs,
	llvm::Align(
	AlignmentCI->getLimitedValue(llvm::Value::MaximumAlignment)));
	AA = nullptr; // We're done. Disallow doing anything else.
	return NewAttrs;
	}

	/// Emit alignment assumption.
	/// This is a general fallback that we take if either there is an offset,
	/// or the alignment is variable or we are sanitizing for alignment.
	void EmitAsAnAssumption(SourceLocation Loc, QualType RetTy, RValue &Ret) {
	if (!AA)
	return;
	CGF.emitAlignmentAssumption(Ret.getScalarVal(), RetTy, Loc,
	AA->getLocation(), Alignment, OffsetCI);
	AA = nullptr; // We're done. Disallow doing anything else.
	}
	};

	/// Helper data structure to emit `AssumeAlignedAttr`.
	class AssumeAlignedAttrEmitter final
	: public AbstractAssumeAlignedAttrEmitter<AssumeAlignedAttr> {
	public:
	AssumeAlignedAttrEmitter(CodeGenFunction &CGF_, const Decl *FuncDecl)
	: AbstractAssumeAlignedAttrEmitter(CGF_, FuncDecl) {
	if (!AA)
	return;
	// It is guaranteed that the alignment/offset are constants.
	Alignment = cast<llvm::ConstantInt>(CGF.EmitScalarExpr(AA->getAlignment()));
	if (Expr *Offset = AA->getOffset()) {
	OffsetCI = cast<llvm::ConstantInt>(CGF.EmitScalarExpr(Offset));
	if (OffsetCI->isNullValue()) // Canonicalize zero offset to no offset.
	OffsetCI = nullptr;
	}
	}
	};

	/// Helper data structure to emit `AllocAlignAttr`.
	class AllocAlignAttrEmitter final
	: public AbstractAssumeAlignedAttrEmitter<AllocAlignAttr> {
	public:
	AllocAlignAttrEmitter(CodeGenFunction &CGF_, const Decl *FuncDecl,
	const CallArgList &CallArgs)
	: AbstractAssumeAlignedAttrEmitter(CGF_, FuncDecl) {
	if (!AA)
	return;
	// Alignment may or may not be a constant, and that is okay.
	Alignment = CallArgs[AA->getParamIndex().getLLVMIndex()]
	.getRValue(CGF)
	.getScalarVal();
	}
	};

	} // namespace

	static unsigned getMaxVectorWidth(const llvm::Type *Ty) {
	if (auto *VT = dyn_cast<llvm::VectorType>(Ty))
	return VT->getPrimitiveSizeInBits().getKnownMinValue();
	if (auto *AT = dyn_cast<llvm::ArrayType>(Ty))
	return getMaxVectorWidth(AT->getElementType());

	unsigned MaxVectorWidth = 0;
	if (auto *ST = dyn_cast<llvm::StructType>(Ty))
	for (auto *I : ST->elements())
	MaxVectorWidth = std::max(MaxVectorWidth, getMaxVectorWidth(I));
	return MaxVectorWidth;
	}

	RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
	const CGCallee &Callee,
	ReturnValueSlot ReturnValue,
	const CallArgList &CallArgs,
	llvm::CallBase **callOrInvoke, bool IsMustTail,
	SourceLocation Loc,
	bool IsVirtualFunctionPointerThunk) {
	// FIXME: We no longer need the types from CallArgs; lift up and simplify.

	assert(Callee.isOrdinary() \|\| Callee.isVirtual());

	// Handle struct-return functions by passing a pointer to the
	// location that we would like to return into.
	QualType RetTy = CallInfo.getReturnType();
	const ABIArgInfo &RetAI = CallInfo.getReturnInfo();

	llvm::FunctionType *IRFuncTy = getTypes().GetFunctionType(CallInfo);

	const Decl *TargetDecl = Callee.getAbstractInfo().getCalleeDecl().getDecl();
	if (const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(TargetDecl)) {
	// We can only guarantee that a function is called from the correct
	// context/function based on the appropriate target attributes,
	// so only check in the case where we have both always_inline and target
	// since otherwise we could be making a conditional call after a check for
	// the proper cpu features (and it won't cause code generation issues due to
	// function based code generation).
	if (TargetDecl->hasAttr<AlwaysInlineAttr>() &&
	(TargetDecl->hasAttr<TargetAttr>() \|\|
	(CurFuncDecl && CurFuncDecl->hasAttr<TargetAttr>())))
	checkTargetFeatures(Loc, FD);
	}

	// Some architectures (such as x86-64) have the ABI changed based on
	// attribute-target/features. Give them a chance to diagnose.
	CGM.getTargetCodeGenInfo().checkFunctionCallABI(
	CGM, Loc, dyn_cast_or_null<FunctionDecl>(CurCodeDecl),
	dyn_cast_or_null<FunctionDecl>(TargetDecl), CallArgs, RetTy);

	// 1. Set up the arguments.

	// If we're using inalloca, insert the allocation after the stack save.
	// FIXME: Do this earlier rather than hacking it in here!
	RawAddress ArgMemory = RawAddress::invalid();
	if (llvm::StructType *ArgStruct = CallInfo.getArgStruct()) {
	const llvm::DataLayout &DL = CGM.getDataLayout();
	llvm::Instruction *IP = CallArgs.getStackBase();
	llvm::AllocaInst *AI;
	if (IP) {
	IP = IP->getNextNode();
	AI = new llvm::AllocaInst(ArgStruct, DL.getAllocaAddrSpace(),
	"argmem", IP);
	} else {
	AI = CreateTempAlloca(ArgStruct, "argmem");
	}
	auto Align = CallInfo.getArgStructAlignment();
	AI->setAlignment(Align.getAsAlign());
	AI->setUsedWithInAlloca(true);
	assert(AI->isUsedWithInAlloca() && !AI->isStaticAlloca());
	ArgMemory = RawAddress(AI, ArgStruct, Align);
	}

	ClangToLLVMArgMapping IRFunctionArgs(CGM.getContext(), CallInfo);
	SmallVector<llvm::Value *, 16> IRCallArgs(IRFunctionArgs.totalIRArgs());

	// If the call returns a temporary with struct return, create a temporary
	// alloca to hold the result, unless one is given to us.
	Address SRetPtr = Address::invalid();
	RawAddress SRetAlloca = RawAddress::invalid();
	llvm::Value *UnusedReturnSizePtr = nullptr;
	if (RetAI.isIndirect() \|\| RetAI.isInAlloca() \|\| RetAI.isCoerceAndExpand()) {
	if (IsVirtualFunctionPointerThunk && RetAI.isIndirect()) {
	SRetPtr = makeNaturalAddressForPointer(CurFn->arg_begin() +
	IRFunctionArgs.getSRetArgNo(),
	RetTy, CharUnits::fromQuantity(1));
	} else if (!ReturnValue.isNull()) {
	SRetPtr = ReturnValue.getAddress();
	} else {
	SRetPtr = CreateMemTemp(RetTy, "tmp", &SRetAlloca);
	if (HaveInsertPoint() && ReturnValue.isUnused()) {
	llvm::TypeSize size =
	CGM.getDataLayout().getTypeAllocSize(ConvertTypeForMem(RetTy));
	UnusedReturnSizePtr = EmitLifetimeStart(size, SRetAlloca.getPointer());
	}
	}
	if (IRFunctionArgs.hasSRetArg()) {
	IRCallArgs[IRFunctionArgs.getSRetArgNo()] =
	getAsNaturalPointerTo(SRetPtr, RetTy);
	} else if (RetAI.isInAlloca()) {
	Address Addr =
	Builder.CreateStructGEP(ArgMemory, RetAI.getInAllocaFieldIndex());
	Builder.CreateStore(getAsNaturalPointerTo(SRetPtr, RetTy), Addr);
	}
	}

	RawAddress swiftErrorTemp = RawAddress::invalid();
	Address swiftErrorArg = Address::invalid();

	// When passing arguments using temporary allocas, we need to add the
	// appropriate lifetime markers. This vector keeps track of all the lifetime
	// markers that need to be ended right after the call.
	SmallVector<CallLifetimeEnd, 2> CallLifetimeEndAfterCall;

	// Translate all of the arguments as necessary to match the IR lowering.
	assert(CallInfo.arg_size() == CallArgs.size() &&
	"Mismatch between function signature & arguments.");
	unsigned ArgNo = 0;
	CGFunctionInfo::const_arg_iterator info_it = CallInfo.arg_begin();
	for (CallArgList::const_iterator I = CallArgs.begin(), E = CallArgs.end();
	I != E; ++I, ++info_it, ++ArgNo) {
	const ABIArgInfo &ArgInfo = info_it->info;

	// Insert a padding argument to ensure proper alignment.
	if (IRFunctionArgs.hasPaddingArg(ArgNo))
	IRCallArgs[IRFunctionArgs.getPaddingArgNo(ArgNo)] =
	llvm::UndefValue::get(ArgInfo.getPaddingType());

	unsigned FirstIRArg, NumIRArgs;
	std::tie(FirstIRArg, NumIRArgs) = IRFunctionArgs.getIRArgs(ArgNo);

	bool ArgHasMaybeUndefAttr =
	IsArgumentMaybeUndef(TargetDecl, CallInfo.getNumRequiredArgs(), ArgNo);

	switch (ArgInfo.getKind()) {
	case ABIArgInfo::InAlloca: {
	assert(NumIRArgs == 0);
	assert(getTarget().getTriple().getArch() == llvm::Triple::x86);
	if (I->isAggregate()) {
	RawAddress Addr = I->hasLValue()
	? I->getKnownLValue().getAddress()
	: I->getKnownRValue().getAggregateAddress();
	llvm::Instruction *Placeholder =
	cast<llvm::Instruction>(Addr.getPointer());

	if (!ArgInfo.getInAllocaIndirect()) {
	// Replace the placeholder with the appropriate argument slot GEP.
	CGBuilderTy::InsertPoint IP = Builder.saveIP();
	Builder.SetInsertPoint(Placeholder);
	Addr = Builder.CreateStructGEP(ArgMemory,
	ArgInfo.getInAllocaFieldIndex());
	Builder.restoreIP(IP);
	} else {
	// For indirect things such as overaligned structs, replace the
	// placeholder with a regular aggregate temporary alloca. Store the
	// address of this alloca into the struct.
	Addr = CreateMemTemp(info_it->type, "inalloca.indirect.tmp");
	Address ArgSlot = Builder.CreateStructGEP(
	ArgMemory, ArgInfo.getInAllocaFieldIndex());
	Builder.CreateStore(Addr.getPointer(), ArgSlot);
	}
	deferPlaceholderReplacement(Placeholder, Addr.getPointer());
	} else if (ArgInfo.getInAllocaIndirect()) {
	// Make a temporary alloca and store the address of it into the argument
	// struct.
	RawAddress Addr = CreateMemTempWithoutCast(
	I->Ty, getContext().getTypeAlignInChars(I->Ty),
	"indirect-arg-temp");
	I->copyInto(*this, Addr);
	Address ArgSlot =
	Builder.CreateStructGEP(ArgMemory, ArgInfo.getInAllocaFieldIndex());
	Builder.CreateStore(Addr.getPointer(), ArgSlot);
	} else {
	// Store the RValue into the argument struct.
	Address Addr =
	Builder.CreateStructGEP(ArgMemory, ArgInfo.getInAllocaFieldIndex());
	Addr = Addr.withElementType(ConvertTypeForMem(I->Ty));
	I->copyInto(*this, Addr);
	}
	break;
	}

	case ABIArgInfo::Indirect:
	case ABIArgInfo::IndirectAliased: {
	assert(NumIRArgs == 1);
	if (I->isAggregate()) {
	// We want to avoid creating an unnecessary temporary+copy here;
	// however, we need one in three cases:
	// 1. If the argument is not byval, and we are required to copy the
	// source. (This case doesn't occur on any common architecture.)
	// 2. If the argument is byval, RV is not sufficiently aligned, and
	// we cannot force it to be sufficiently aligned.
	// 3. If the argument is byval, but RV is not located in default
	// or alloca address space.
	Address Addr = I->hasLValue()
	? I->getKnownLValue().getAddress()
	: I->getKnownRValue().getAggregateAddress();
	CharUnits Align = ArgInfo.getIndirectAlign();
	const llvm::DataLayout *TD = &CGM.getDataLayout();

	assert((FirstIRArg >= IRFuncTy->getNumParams() \|\|
	IRFuncTy->getParamType(FirstIRArg)->getPointerAddressSpace() ==
	TD->getAllocaAddrSpace()) &&
	"indirect argument must be in alloca address space");

	bool NeedCopy = false;
	if (Addr.getAlignment() < Align &&
	llvm::getOrEnforceKnownAlignment(Addr.emitRawPointer(*this),
	Align.getAsAlign(),
	*TD) < Align.getAsAlign()) {
	NeedCopy = true;
	} else if (I->hasLValue()) {
	auto LV = I->getKnownLValue();
	auto AS = LV.getAddressSpace();

	bool isByValOrRef =
	ArgInfo.isIndirectAliased() \|\| ArgInfo.getIndirectByVal();

	if (!isByValOrRef \|\|
	(LV.getAlignment() < getContext().getTypeAlignInChars(I->Ty))) {
	NeedCopy = true;
	}
	if (!getLangOpts().OpenCL) {
	if ((isByValOrRef &&
	(AS != LangAS::Default &&
	AS != CGM.getASTAllocaAddressSpace()))) {
	NeedCopy = true;
	}
	}
	// For OpenCL even if RV is located in default or alloca address space
	// we don't want to perform address space cast for it.
	else if ((isByValOrRef &&
	Addr.getType()->getAddressSpace() != IRFuncTy->
	getParamType(FirstIRArg)->getPointerAddressSpace())) {
	NeedCopy = true;
	}
	}

	if (!NeedCopy) {
	// Skip the extra memcpy call.
	llvm::Value *V = getAsNaturalPointerTo(Addr, I->Ty);
	auto *T = llvm::PointerType::get(
	CGM.getLLVMContext(), CGM.getDataLayout().getAllocaAddrSpace());

	llvm::Value *Val = getTargetHooks().performAddrSpaceCast(
	*this, V, LangAS::Default, CGM.getASTAllocaAddressSpace(), T,
	true);
	if (ArgHasMaybeUndefAttr)
	Val = Builder.CreateFreeze(Val);
	IRCallArgs[FirstIRArg] = Val;
	break;
	}
	}

	// For non-aggregate args and aggregate args meeting conditions above
	// we need to create an aligned temporary, and copy to it.
	RawAddress AI = CreateMemTempWithoutCast(
	I->Ty, ArgInfo.getIndirectAlign(), "byval-temp");
	llvm::Value *Val = getAsNaturalPointerTo(AI, I->Ty);
	if (ArgHasMaybeUndefAttr)
	Val = Builder.CreateFreeze(Val);
	IRCallArgs[FirstIRArg] = Val;

	// Emit lifetime markers for the temporary alloca.
	llvm::TypeSize ByvalTempElementSize =
	CGM.getDataLayout().getTypeAllocSize(AI.getElementType());
	llvm::Value *LifetimeSize =
	EmitLifetimeStart(ByvalTempElementSize, AI.getPointer());

	// Add cleanup code to emit the end lifetime marker after the call.
	if (LifetimeSize) // In case we disabled lifetime markers.
	CallLifetimeEndAfterCall.emplace_back(AI, LifetimeSize);

	// Generate the copy.
	I->copyInto(*this, AI);
	break;
	}

	case ABIArgInfo::Ignore:
	assert(NumIRArgs == 0);
	break;

	case ABIArgInfo::Extend:
	case ABIArgInfo::Direct: {
	if (!isa<llvm::StructType>(ArgInfo.getCoerceToType()) &&
	ArgInfo.getCoerceToType() == ConvertType(info_it->type) &&
	ArgInfo.getDirectOffset() == 0) {
	assert(NumIRArgs == 1);
	llvm::Value *V;
	if (!I->isAggregate())
	V = I->getKnownRValue().getScalarVal();
	else
	V = Builder.CreateLoad(
	I->hasLValue() ? I->getKnownLValue().getAddress()
	: I->getKnownRValue().getAggregateAddress());

	// Implement swifterror by copying into a new swifterror argument.
	// We'll write back in the normal path out of the call.
	if (CallInfo.getExtParameterInfo(ArgNo).getABI()
	== ParameterABI::SwiftErrorResult) {
	assert(!swiftErrorTemp.isValid() && "multiple swifterror args");

	QualType pointeeTy = I->Ty->getPointeeType();
	swiftErrorArg = makeNaturalAddressForPointer(
	V, pointeeTy, getContext().getTypeAlignInChars(pointeeTy));

	swiftErrorTemp =
	CreateMemTemp(pointeeTy, getPointerAlign(), "swifterror.temp");
	V = swiftErrorTemp.getPointer();
	cast<llvm::AllocaInst>(V)->setSwiftError(true);

	llvm::Value *errorValue = Builder.CreateLoad(swiftErrorArg);
	Builder.CreateStore(errorValue, swiftErrorTemp);
	}

	// We might have to widen integers, but we should never truncate.
	if (ArgInfo.getCoerceToType() != V->getType() &&
	V->getType()->isIntegerTy())
	V = Builder.CreateZExt(V, ArgInfo.getCoerceToType());

	// If the argument doesn't match, perform a bitcast to coerce it. This
	// can happen due to trivial type mismatches.
	if (FirstIRArg < IRFuncTy->getNumParams() &&
	V->getType() != IRFuncTy->getParamType(FirstIRArg))
	V = Builder.CreateBitCast(V, IRFuncTy->getParamType(FirstIRArg));

	if (ArgHasMaybeUndefAttr)
	V = Builder.CreateFreeze(V);
	IRCallArgs[FirstIRArg] = V;
	break;
	}

	llvm::StructType *STy =
	dyn_cast<llvm::StructType>(ArgInfo.getCoerceToType());
	if (STy && ArgInfo.isDirect() && !ArgInfo.getCanBeFlattened()) {
	llvm::Type *SrcTy = ConvertTypeForMem(I->Ty);
	[[maybe_unused]] llvm::TypeSize SrcTypeSize =
	CGM.getDataLayout().getTypeAllocSize(SrcTy);
	[[maybe_unused]] llvm::TypeSize DstTypeSize =
	CGM.getDataLayout().getTypeAllocSize(STy);
	if (STy->containsHomogeneousScalableVectorTypes()) {
	assert(SrcTypeSize == DstTypeSize &&
	"Only allow non-fractional movement of structure with "
	"homogeneous scalable vector type");

	IRCallArgs[FirstIRArg] = I->getKnownRValue().getScalarVal();
	break;
	}
	}

	// FIXME: Avoid the conversion through memory if possible.
	Address Src = Address::invalid();
	if (!I->isAggregate()) {
	Src = CreateMemTemp(I->Ty, "coerce");
	I->copyInto(*this, Src);
	} else {
	Src = I->hasLValue() ? I->getKnownLValue().getAddress()
	: I->getKnownRValue().getAggregateAddress();
	}

	// If the value is offset in memory, apply the offset now.
	Src = emitAddressAtOffset(*this, Src, ArgInfo);

	// Fast-isel and the optimizer generally like scalar values better than
	// FCAs, so we flatten them if this is safe to do for this argument.
	if (STy && ArgInfo.isDirect() && ArgInfo.getCanBeFlattened()) {
	llvm::Type *SrcTy = Src.getElementType();
	llvm::TypeSize SrcTypeSize =
	CGM.getDataLayout().getTypeAllocSize(SrcTy);
	llvm::TypeSize DstTypeSize = CGM.getDataLayout().getTypeAllocSize(STy);
	if (SrcTypeSize.isScalable()) {
	assert(STy->containsHomogeneousScalableVectorTypes() &&
	"ABI only supports structure with homogeneous scalable vector "
	"type");
	assert(SrcTypeSize == DstTypeSize &&
	"Only allow non-fractional movement of structure with "
	"homogeneous scalable vector type");
	assert(NumIRArgs == STy->getNumElements());

	llvm::Value *StoredStructValue =
	Builder.CreateLoad(Src, Src.getName() + ".tuple");
	for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
	llvm::Value *Extract = Builder.CreateExtractValue(
	StoredStructValue, i, Src.getName() + ".extract" + Twine(i));
	IRCallArgs[FirstIRArg + i] = Extract;
	}
	} else {
	uint64_t SrcSize = SrcTypeSize.getFixedValue();
	uint64_t DstSize = DstTypeSize.getFixedValue();

	// If the source type is smaller than the destination type of the
	// coerce-to logic, copy the source value into a temp alloca the size
	// of the destination type to allow loading all of it. The bits past
	// the source value are left undef.
	if (SrcSize < DstSize) {
	Address TempAlloca = CreateTempAlloca(STy, Src.getAlignment(),
	Src.getName() + ".coerce");
	Builder.CreateMemCpy(TempAlloca, Src, SrcSize);
	Src = TempAlloca;
	} else {
	Src = Src.withElementType(STy);
	}

	assert(NumIRArgs == STy->getNumElements());
	for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
	Address EltPtr = Builder.CreateStructGEP(Src, i);
	llvm::Value *LI = Builder.CreateLoad(EltPtr);
	if (ArgHasMaybeUndefAttr)
	LI = Builder.CreateFreeze(LI);
	IRCallArgs[FirstIRArg + i] = LI;
	}
	}
	} else {
	// In the simple case, just pass the coerced loaded value.
	assert(NumIRArgs == 1);
	llvm::Value *Load =
	CreateCoercedLoad(Src, ArgInfo.getCoerceToType(), *this);

	if (CallInfo.isCmseNSCall()) {
	// For certain parameter types, clear padding bits, as they may reveal
	// sensitive information.
	// Small struct/union types are passed as integer arrays.
	auto *ATy = dyn_cast<llvm::ArrayType>(Load->getType());
	if (ATy != nullptr && isa<RecordType>(I->Ty.getCanonicalType()))
	Load = EmitCMSEClearRecord(Load, ATy, I->Ty);
	}

	if (ArgHasMaybeUndefAttr)
	Load = Builder.CreateFreeze(Load);
	IRCallArgs[FirstIRArg] = Load;
	}

	break;
	}

	case ABIArgInfo::CoerceAndExpand: {
	auto coercionType = ArgInfo.getCoerceAndExpandType();
	auto layout = CGM.getDataLayout().getStructLayout(coercionType);

	llvm::Value *tempSize = nullptr;
	Address addr = Address::invalid();
	RawAddress AllocaAddr = RawAddress::invalid();
	if (I->isAggregate()) {
	addr = I->hasLValue() ? I->getKnownLValue().getAddress()
	: I->getKnownRValue().getAggregateAddress();

	} else {
	RValue RV = I->getKnownRValue();
	assert(RV.isScalar()); // complex should always just be direct

	llvm::Type *scalarType = RV.getScalarVal()->getType();
	auto scalarSize = CGM.getDataLayout().getTypeAllocSize(scalarType);
	auto scalarAlign = CGM.getDataLayout().getPrefTypeAlign(scalarType);

	// Materialize to a temporary.
	addr = CreateTempAlloca(
	RV.getScalarVal()->getType(),
	CharUnits::fromQuantity(std::max(layout->getAlignment(), scalarAlign)),
	"tmp",
	/ArraySize=/nullptr, &AllocaAddr);
	tempSize = EmitLifetimeStart(scalarSize, AllocaAddr.getPointer());

	Builder.CreateStore(RV.getScalarVal(), addr);
	}

	addr = addr.withElementType(coercionType);

	unsigned IRArgPos = FirstIRArg;
	for (unsigned i = 0, e = coercionType->getNumElements(); i != e; ++i) {
	llvm::Type *eltType = coercionType->getElementType(i);
	if (ABIArgInfo::isPaddingForCoerceAndExpand(eltType)) continue;
	Address eltAddr = Builder.CreateStructGEP(addr, i);
	llvm::Value *elt = Builder.CreateLoad(eltAddr);
	if (ArgHasMaybeUndefAttr)
	elt = Builder.CreateFreeze(elt);
	IRCallArgs[IRArgPos++] = elt;
	}
	assert(IRArgPos == FirstIRArg + NumIRArgs);

	if (tempSize) {
	EmitLifetimeEnd(tempSize, AllocaAddr.getPointer());
	}

	break;
	}

	case ABIArgInfo::Expand: {
	unsigned IRArgPos = FirstIRArg;
	ExpandTypeToArgs(I->Ty, *I, IRFuncTy, IRCallArgs, IRArgPos);
	assert(IRArgPos == FirstIRArg + NumIRArgs);
	break;
	}
	}
	}

	const CGCallee &ConcreteCallee = Callee.prepareConcreteCallee(*this);
	llvm::Value *CalleePtr = ConcreteCallee.getFunctionPointer();

	// If we're using inalloca, set up that argument.
	if (ArgMemory.isValid()) {
	llvm::Value *Arg = ArgMemory.getPointer();
	assert(IRFunctionArgs.hasInallocaArg());
	IRCallArgs[IRFunctionArgs.getInallocaArgNo()] = Arg;
	}

	// 2. Prepare the function pointer.

	// If the callee is a bitcast of a non-variadic function to have a
	// variadic function pointer type, check to see if we can remove the
	// bitcast. This comes up with unprototyped functions.
	//
	// This makes the IR nicer, but more importantly it ensures that we
	// can inline the function at -O0 if it is marked always_inline.
	auto simplifyVariadicCallee = [](llvm::FunctionType *CalleeFT,
	llvm::Value Ptr) -> llvm::Function {
	if (!CalleeFT->isVarArg())
	return nullptr;

	// Get underlying value if it's a bitcast
	if (llvm::ConstantExpr *CE = dyn_cast<llvm::ConstantExpr>(Ptr)) {
	if (CE->getOpcode() == llvm::Instruction::BitCast)
	Ptr = CE->getOperand(0);
	}

	llvm::Function *OrigFn = dyn_cast<llvm::Function>(Ptr);
	if (!OrigFn)
	return nullptr;

	llvm::FunctionType *OrigFT = OrigFn->getFunctionType();

	// If the original type is variadic, or if any of the component types
	// disagree, we cannot remove the cast.
	if (OrigFT->isVarArg() \|\|
	OrigFT->getNumParams() != CalleeFT->getNumParams() \|\|
	OrigFT->getReturnType() != CalleeFT->getReturnType())
	return nullptr;

	for (unsigned i = 0, e = OrigFT->getNumParams(); i != e; ++i)
	if (OrigFT->getParamType(i) != CalleeFT->getParamType(i))
	return nullptr;

	return OrigFn;
	};

	if (llvm::Function *OrigFn = simplifyVariadicCallee(IRFuncTy, CalleePtr)) {
	CalleePtr = OrigFn;
	IRFuncTy = OrigFn->getFunctionType();
	}

	// 3. Perform the actual call.

	// Deactivate any cleanups that we're supposed to do immediately before
	// the call.
	if (!CallArgs.getCleanupsToDeactivate().empty())
	deactivateArgCleanupsBeforeCall(*this, CallArgs);

	// Assert that the arguments we computed match up. The IR verifier
	// will catch this, but this is a common enough source of problems
	// during IRGen changes that it's way better for debugging to catch
	// it ourselves here.
	#ifndef NDEBUG
	assert(IRCallArgs.size() == IRFuncTy->getNumParams() \|\| IRFuncTy->isVarArg());
	for (unsigned i = 0; i < IRCallArgs.size(); ++i) {
	// Inalloca argument can have different type.
	if (IRFunctionArgs.hasInallocaArg() &&
	i == IRFunctionArgs.getInallocaArgNo())
	continue;
	if (i < IRFuncTy->getNumParams())
	assert(IRCallArgs[i]->getType() == IRFuncTy->getParamType(i));
	}
	#endif

	// Update the largest vector width if any arguments have vector types.
	for (unsigned i = 0; i < IRCallArgs.size(); ++i)
	LargestVectorWidth = std::max(LargestVectorWidth,
	getMaxVectorWidth(IRCallArgs[i]->getType()));

	// Compute the calling convention and attributes.
	unsigned CallingConv;
	llvm::AttributeList Attrs;
	CGM.ConstructAttributeList(CalleePtr->getName(), CallInfo,
	Callee.getAbstractInfo(), Attrs, CallingConv,
	/AttrOnCallSite=/true,
	/IsThunk=/false);

	if (CallingConv == llvm::CallingConv::X86_VectorCall &&
	getTarget().getTriple().isWindowsArm64EC()) {
	CGM.Error(Loc, "__vectorcall calling convention is not currently "
	"supported");
	}

	if (const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(CurFuncDecl)) {
	if (FD->hasAttr<StrictFPAttr>())
	// All calls within a strictfp function are marked strictfp
	Attrs = Attrs.addFnAttribute(getLLVMContext(), llvm::Attribute::StrictFP);

	// If -ffast-math is enabled and the function is guarded by an
	// '__attribute__((optnone)) adjust the memory attribute so the BE emits the
	// library call instead of the intrinsic.
	if (FD->hasAttr<OptimizeNoneAttr>() && getLangOpts().FastMath)
	CGM.AdjustMemoryAttribute(CalleePtr->getName(), Callee.getAbstractInfo(),
	Attrs);
	}
	// Add call-site nomerge attribute if exists.
	if (InNoMergeAttributedStmt)
	Attrs = Attrs.addFnAttribute(getLLVMContext(), llvm::Attribute::NoMerge);

	// Add call-site noinline attribute if exists.
	if (InNoInlineAttributedStmt)
	Attrs = Attrs.addFnAttribute(getLLVMContext(), llvm::Attribute::NoInline);

	// Add call-site always_inline attribute if exists.
	if (InAlwaysInlineAttributedStmt)
	Attrs =
	Attrs.addFnAttribute(getLLVMContext(), llvm::Attribute::AlwaysInline);

	// Apply some call-site-specific attributes.
	// TODO: work this into building the attribute set.

	// Apply always_inline to all calls within flatten functions.
	// FIXME: should this really take priority over __try, below?
	if (CurCodeDecl && CurCodeDecl->hasAttr<FlattenAttr>() &&
	!InNoInlineAttributedStmt &&
	!(TargetDecl && TargetDecl->hasAttr<NoInlineAttr>())) {
	Attrs =
	Attrs.addFnAttribute(getLLVMContext(), llvm::Attribute::AlwaysInline);
	}

	// Disable inlining inside SEH __try blocks.
	if (isSEHTryScope()) {
	Attrs = Attrs.addFnAttribute(getLLVMContext(), llvm::Attribute::NoInline);
	}

	// Decide whether to use a call or an invoke.
	bool CannotThrow;
	if (currentFunctionUsesSEHTry()) {
	// SEH cares about asynchronous exceptions, so everything can "throw."
	CannotThrow = false;
	} else if (isCleanupPadScope() &&
	EHPersonality::get(*this).isMSVCXXPersonality()) {
	// The MSVC++ personality will implicitly terminate the program if an
	// exception is thrown during a cleanup outside of a try/catch.
	// We don't need to model anything in IR to get this behavior.
	CannotThrow = true;
	} else {
	// Otherwise, nounwind call sites will never throw.
	CannotThrow = Attrs.hasFnAttr(llvm::Attribute::NoUnwind);

	if (auto *FPtr = dyn_cast<llvm::Function>(CalleePtr))
	if (FPtr->hasFnAttribute(llvm::Attribute::NoUnwind))
	CannotThrow = true;
	}

	// If we made a temporary, be sure to clean up after ourselves. Note that we
	// can't depend on being inside of an ExprWithCleanups, so we need to manually
	// pop this cleanup later on. Being eager about this is OK, since this
	// temporary is 'invisible' outside of the callee.
	if (UnusedReturnSizePtr)
	pushFullExprCleanup<CallLifetimeEnd>(NormalEHLifetimeMarker, SRetAlloca,
	UnusedReturnSizePtr);

	llvm::BasicBlock *InvokeDest = CannotThrow ? nullptr : getInvokeDest();

	SmallVector<llvm::OperandBundleDef, 1> BundleList =
	getBundlesForFunclet(CalleePtr);

	if (SanOpts.has(SanitizerKind::KCFI) &&
	!isa_and_nonnull<FunctionDecl>(TargetDecl))
	EmitKCFIOperandBundle(ConcreteCallee, BundleList);

	// Add the pointer-authentication bundle.
	EmitPointerAuthOperandBundle(ConcreteCallee.getPointerAuthInfo(), BundleList);

	if (const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(CurFuncDecl))
	if (FD->hasAttr<StrictFPAttr>())
	// All calls within a strictfp function are marked strictfp
	Attrs = Attrs.addFnAttribute(getLLVMContext(), llvm::Attribute::StrictFP);

	AssumeAlignedAttrEmitter AssumeAlignedAttrEmitter(*this, TargetDecl);
	Attrs = AssumeAlignedAttrEmitter.TryEmitAsCallSiteAttribute(Attrs);

	AllocAlignAttrEmitter AllocAlignAttrEmitter(*this, TargetDecl, CallArgs);
	Attrs = AllocAlignAttrEmitter.TryEmitAsCallSiteAttribute(Attrs);

	// Emit the actual call/invoke instruction.
	llvm::CallBase *CI;
	if (!InvokeDest) {
	CI = Builder.CreateCall(IRFuncTy, CalleePtr, IRCallArgs, BundleList);
	} else {
	llvm::BasicBlock *Cont = createBasicBlock("invoke.cont");
	CI = Builder.CreateInvoke(IRFuncTy, CalleePtr, Cont, InvokeDest, IRCallArgs,
	BundleList);
	EmitBlock(Cont);
	}
	if (CI->getCalledFunction() && CI->getCalledFunction()->hasName() &&
	CI->getCalledFunction()->getName().starts_with("_Z4sqrt")) {
	SetSqrtFPAccuracy(CI);
	}
	if (callOrInvoke)
	*callOrInvoke = CI;

	// If this is within a function that has the guard(nocf) attribute and is an
	// indirect call, add the "guard_nocf" attribute to this call to indicate that
	// Control Flow Guard checks should not be added, even if the call is inlined.
	if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CurFuncDecl)) {
	if (const auto *A = FD->getAttr<CFGuardAttr>()) {
	if (A->getGuard() == CFGuardAttr::GuardArg::nocf && !CI->getCalledFunction())
	Attrs = Attrs.addFnAttribute(getLLVMContext(), "guard_nocf");
	}
	}

	// Apply the attributes and calling convention.
	CI->setAttributes(Attrs);
	CI->setCallingConv(static_cast<llvm::CallingConv::ID>(CallingConv));

	// Apply various metadata.

	if (!CI->getType()->isVoidTy())
	CI->setName("call");

	if (CGM.shouldEmitConvergenceTokens() && CI->isConvergent())
	CI = addControlledConvergenceToken(CI);

	// Update largest vector width from the return type.
	LargestVectorWidth =
	std::max(LargestVectorWidth, getMaxVectorWidth(CI->getType()));

	// Insert instrumentation or attach profile metadata at indirect call sites.
	// For more details, see the comment before the definition of
	// IPVK_IndirectCallTarget in InstrProfData.inc.
	if (!CI->getCalledFunction())
	PGO.valueProfile(Builder, llvm::IPVK_IndirectCallTarget,
	CI, CalleePtr);

	// In ObjC ARC mode with no ObjC ARC exception safety, tell the ARC
	// optimizer it can aggressively ignore unwind edges.
	if (CGM.getLangOpts().ObjCAutoRefCount)
	AddObjCARCExceptionMetadata(CI);

	// Set tail call kind if necessary.
	if (llvm::CallInst *Call = dyn_cast<llvm::CallInst>(CI)) {
	if (TargetDecl && TargetDecl->hasAttr<NotTailCalledAttr>())
	Call->setTailCallKind(llvm::CallInst::TCK_NoTail);
	else if (IsMustTail) {
	if (getTarget().getTriple().isPPC()) {
	if (getTarget().getTriple().isOSAIX())
	CGM.getDiags().Report(Loc, diag::err_aix_musttail_unsupported);
	else if (!getTarget().hasFeature("pcrelative-memops")) {
	if (getTarget().hasFeature("longcall"))
	CGM.getDiags().Report(Loc, diag::err_ppc_impossible_musttail) << 0;
	else if (Call->isIndirectCall())
	CGM.getDiags().Report(Loc, diag::err_ppc_impossible_musttail) << 1;
	else if (isa_and_nonnull<FunctionDecl>(TargetDecl)) {
	if (!cast<FunctionDecl>(TargetDecl)->isDefined())
	// The undefined callee may be a forward declaration. Without
	// knowning all symbols in the module, we won't know the symbol is
	// defined or not. Collect all these symbols for later diagnosing.
	CGM.addUndefinedGlobalForTailCall(
	{cast<FunctionDecl>(TargetDecl), Loc});
	else {
	llvm::GlobalValue::LinkageTypes Linkage = CGM.getFunctionLinkage(
	GlobalDecl(cast<FunctionDecl>(TargetDecl)));
	if (llvm::GlobalValue::isWeakForLinker(Linkage) \|\|
	llvm::GlobalValue::isDiscardableIfUnused(Linkage))
	CGM.getDiags().Report(Loc, diag::err_ppc_impossible_musttail)
	<< 2;
	}
	}
	}
	}
	Call->setTailCallKind(llvm::CallInst::TCK_MustTail);
	}
	}

	// Add metadata for calls to MSAllocator functions
	if (getDebugInfo() && TargetDecl &&
	TargetDecl->hasAttr<MSAllocatorAttr>())
	getDebugInfo()->addHeapAllocSiteMetadata(CI, RetTy->getPointeeType(), Loc);

	// Add metadata if calling an __attribute__((error(""))) or warning fn.
	if (TargetDecl && TargetDecl->hasAttr<ErrorAttr>()) {
	llvm::ConstantInt *Line =
	llvm::ConstantInt::get(Int64Ty, Loc.getRawEncoding());
	llvm::ConstantAsMetadata *MD = llvm::ConstantAsMetadata::get(Line);
	llvm::MDTuple *MDT = llvm::MDNode::get(getLLVMContext(), {MD});
	CI->setMetadata("srcloc", MDT);
	}

	// 4. Finish the call.

	// If the call doesn't return, finish the basic block and clear the
	// insertion point; this allows the rest of IRGen to discard
	// unreachable code.
	if (CI->doesNotReturn()) {
	if (UnusedReturnSizePtr)
	PopCleanupBlock();

	// Strip away the noreturn attribute to better diagnose unreachable UB.
	if (SanOpts.has(SanitizerKind::Unreachable)) {
	// Also remove from function since CallBase::hasFnAttr additionally checks
	// attributes of the called function.
	if (auto *F = CI->getCalledFunction())
	F->removeFnAttr(llvm::Attribute::NoReturn);
	CI->removeFnAttr(llvm::Attribute::NoReturn);

	// Avoid incompatibility with ASan which relies on the `noreturn`
	// attribute to insert handler calls.
	if (SanOpts.hasOneOf(SanitizerKind::Address \|
	SanitizerKind::KernelAddress)) {
	SanitizerScope SanScope(this);
	llvm::IRBuilder<>::InsertPointGuard IPGuard(Builder);
	Builder.SetInsertPoint(CI);
	auto FnType = llvm::FunctionType::get(CGM.VoidTy, /isVarArg=*/false);
	llvm::FunctionCallee Fn =
	CGM.CreateRuntimeFunction(FnType, "__asan_handle_no_return");
	EmitNounwindRuntimeCall(Fn);
	}
	}

	EmitUnreachable(Loc);
	Builder.ClearInsertionPoint();

	// FIXME: For now, emit a dummy basic block because expr emitters in
	// generally are not ready to handle emitting expressions at unreachable
	// points.
	EnsureInsertPoint();

	// Return a reasonable RValue.
	return GetUndefRValue(RetTy);
	}

	// If this is a musttail call, return immediately. We do not branch to the
	// epilogue in this case.
	if (IsMustTail) {
	for (auto it = EHStack.find(CurrentCleanupScopeDepth); it != EHStack.end();
	++it) {
	EHCleanupScope Cleanup = dyn_cast<EHCleanupScope>(&it);
	if (!(Cleanup && Cleanup->getCleanup()->isRedundantBeforeReturn()))
	CGM.ErrorUnsupported(MustTailCall, "tail call skipping over cleanups");
	}
	if (CI->getType()->isVoidTy())
	Builder.CreateRetVoid();
	else
	Builder.CreateRet(CI);
	Builder.ClearInsertionPoint();
	EnsureInsertPoint();
	return GetUndefRValue(RetTy);
	}

	// Perform the swifterror writeback.
	if (swiftErrorTemp.isValid()) {
	llvm::Value *errorResult = Builder.CreateLoad(swiftErrorTemp);
	Builder.CreateStore(errorResult, swiftErrorArg);
	}

	// Emit any call-associated writebacks immediately. Arguably this
	// should happen after any return-value munging.
	if (CallArgs.hasWritebacks())
	emitWritebacks(*this, CallArgs);

	// The stack cleanup for inalloca arguments has to run out of the normal
	// lexical order, so deactivate it and run it manually here.
	CallArgs.freeArgumentMemory(*this);

	// Extract the return value.
	RValue Ret;

	// If the current function is a virtual function pointer thunk, avoid copying
	// the return value of the musttail call to a temporary.
	if (IsVirtualFunctionPointerThunk) {
	Ret = RValue::get(CI);
	} else {
	Ret = [&] {
	switch (RetAI.getKind()) {
	case ABIArgInfo::CoerceAndExpand: {
	auto coercionType = RetAI.getCoerceAndExpandType();

	Address addr = SRetPtr.withElementType(coercionType);

	assert(CI->getType() == RetAI.getUnpaddedCoerceAndExpandType());
	bool requiresExtract = isa<llvm::StructType>(CI->getType());

	unsigned unpaddedIndex = 0;
	for (unsigned i = 0, e = coercionType->getNumElements(); i != e; ++i) {
	llvm::Type *eltType = coercionType->getElementType(i);
	if (ABIArgInfo::isPaddingForCoerceAndExpand(eltType))
	continue;
	Address eltAddr = Builder.CreateStructGEP(addr, i);
	llvm::Value *elt = CI;
	if (requiresExtract)
	elt = Builder.CreateExtractValue(elt, unpaddedIndex++);
	else
	assert(unpaddedIndex == 0);
	Builder.CreateStore(elt, eltAddr);
	}
	[[fallthrough]];
	}

	case ABIArgInfo::InAlloca:
	case ABIArgInfo::Indirect: {
	RValue ret = convertTempToRValue(SRetPtr, RetTy, SourceLocation());
	if (UnusedReturnSizePtr)
	PopCleanupBlock();
	return ret;
	}

	case ABIArgInfo::Ignore:
	// If we are ignoring an argument that had a result, make sure to
	// construct the appropriate return value for our caller.
	return GetUndefRValue(RetTy);

	case ABIArgInfo::Extend:
	case ABIArgInfo::Direct: {
	llvm::Type *RetIRTy = ConvertType(RetTy);
	if (RetAI.getCoerceToType() == RetIRTy &&
	RetAI.getDirectOffset() == 0) {
	switch (getEvaluationKind(RetTy)) {
	case TEK_Complex: {
	llvm::Value *Real = Builder.CreateExtractValue(CI, 0);
	llvm::Value *Imag = Builder.CreateExtractValue(CI, 1);
	return RValue::getComplex(std::make_pair(Real, Imag));
	}
	- case TEK_Aggregate: {
	- Address DestPtr = ReturnValue.getAddress();
	- bool DestIsVolatile = ReturnValue.isVolatile();
	-
	- if (!DestPtr.isValid()) {
	- DestPtr = CreateMemTemp(RetTy, "agg.tmp");
	- DestIsVolatile = false;
	- }
	- EmitAggregateStore(CI, DestPtr, DestIsVolatile);
	- return RValue::getAggregate(DestPtr);
	- }
	+ case TEK_Aggregate:
	+ break;
	case TEK_Scalar: {
	// If the argument doesn't match, perform a bitcast to coerce it.
	// This can happen due to trivial type mismatches.
	llvm::Value *V = CI;
	if (V->getType() != RetIRTy)
	V = Builder.CreateBitCast(V, RetIRTy);
	return RValue::get(V);
	}
	}
	- llvm_unreachable("bad evaluation kind");
	}

	// If coercing a fixed vector from a scalable vector for ABI
	// compatibility, and the types match, use the llvm.vector.extract
	// intrinsic to perform the conversion.
	if (auto *FixedDstTy = dyn_cast<llvm::FixedVectorType>(RetIRTy)) {
	llvm::Value *V = CI;
	if (auto *ScalableSrcTy =
	dyn_cast<llvm::ScalableVectorType>(V->getType())) {
	if (FixedDstTy->getElementType() ==
	ScalableSrcTy->getElementType()) {
	llvm::Value *Zero = llvm::Constant::getNullValue(CGM.Int64Ty);
	V = Builder.CreateExtractVector(FixedDstTy, V, Zero,
	"cast.fixed");
	return RValue::get(V);
	}
	}
	}

	Address DestPtr = ReturnValue.getValue();
	bool DestIsVolatile = ReturnValue.isVolatile();
	+ uint64_t DestSize =
	+ getContext().getTypeInfoDataSizeInChars(RetTy).Width.getQuantity();

	if (!DestPtr.isValid()) {
	DestPtr = CreateMemTemp(RetTy, "coerce");
	DestIsVolatile = false;
	+ DestSize = getContext().getTypeSizeInChars(RetTy).getQuantity();
	}

	// An empty record can overlap other data (if declared with
	// no_unique_address); omit the store for such types - as there is no
	// actual data to store.
	if (!isEmptyRecord(getContext(), RetTy, true)) {
	// If the value is offset in memory, apply the offset now.
	Address StorePtr = emitAddressAtOffset(*this, DestPtr, RetAI);
	- CreateCoercedStore(CI, StorePtr, DestIsVolatile, *this);
	+ CreateCoercedStore(
	+ CI, StorePtr,
	+ llvm::TypeSize::getFixed(DestSize - RetAI.getDirectOffset()),
	+ DestIsVolatile);
	}

	return convertTempToRValue(DestPtr, RetTy, SourceLocation());
	}

	case ABIArgInfo::Expand:
	case ABIArgInfo::IndirectAliased:
	llvm_unreachable("Invalid ABI kind for return argument");
	}

	llvm_unreachable("Unhandled ABIArgInfo::Kind");
	}();
	}

	// Emit the assume_aligned check on the return value.
	if (Ret.isScalar() && TargetDecl) {
	AssumeAlignedAttrEmitter.EmitAsAnAssumption(Loc, RetTy, Ret);
	AllocAlignAttrEmitter.EmitAsAnAssumption(Loc, RetTy, Ret);
	}

	// Explicitly call CallLifetimeEnd::Emit just to re-use the code even though
	// we can't use the full cleanup mechanism.
	for (CallLifetimeEnd &LifetimeEnd : CallLifetimeEndAfterCall)
	LifetimeEnd.Emit(this, /Flags=*/{});

	if (!ReturnValue.isExternallyDestructed() &&
	RetTy.isDestructedType() == QualType::DK_nontrivial_c_struct)
	pushDestroy(QualType::DK_nontrivial_c_struct, Ret.getAggregateAddress(),
	RetTy);

	return Ret;
	}

	CGCallee CGCallee::prepareConcreteCallee(CodeGenFunction &CGF) const {
	if (isVirtual()) {
	const CallExpr *CE = getVirtualCallExpr();
	return CGF.CGM.getCXXABI().getVirtualFunctionPointer(
	CGF, getVirtualMethodDecl(), getThisAddress(), getVirtualFunctionType(),
	CE ? CE->getBeginLoc() : SourceLocation());
	}

	return *this;
	}

	/* VarArg handling */

	RValue CodeGenFunction::EmitVAArg(VAArgExpr *VE, Address &VAListAddr,
	AggValueSlot Slot) {
	VAListAddr = VE->isMicrosoftABI() ? EmitMSVAListRef(VE->getSubExpr())
	: EmitVAListRef(VE->getSubExpr());
	QualType Ty = VE->getType();
	if (VE->isMicrosoftABI())
	return CGM.getTypes().getABIInfo().EmitMSVAArg(*this, VAListAddr, Ty, Slot);
	return CGM.getTypes().getABIInfo().EmitVAArg(*this, VAListAddr, Ty, Slot);
	}
	diff --git a/contrib/llvm-project/clang/lib/CodeGen/CGExprAgg.cpp b/contrib/llvm-project/clang/lib/CodeGen/CGExprAgg.cpp
	index c3c10e73ff05..d9f44f4be617 100644
	--- a/contrib/llvm-project/clang/lib/CodeGen/CGExprAgg.cpp
	+++ b/contrib/llvm-project/clang/lib/CodeGen/CGExprAgg.cpp
	@@ -1,2212 +1,2217 @@
	//===--- CGExprAgg.cpp - Emit LLVM Code from Aggregate Expressions --------===//
	//
	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
	// See https://llvm.org/LICENSE.txt for license information.
	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
	//
	//===----------------------------------------------------------------------===//
	//
	// This contains code to emit Aggregate Expr nodes as LLVM code.
	//
	//===----------------------------------------------------------------------===//

	#include "CGCXXABI.h"
	#include "CGObjCRuntime.h"
	#include "CodeGenFunction.h"
	#include "CodeGenModule.h"
	#include "ConstantEmitter.h"
	#include "EHScopeStack.h"
	#include "TargetInfo.h"
	#include "clang/AST/ASTContext.h"
	#include "clang/AST/Attr.h"
	#include "clang/AST/DeclCXX.h"
	#include "clang/AST/DeclTemplate.h"
	#include "clang/AST/StmtVisitor.h"
	#include "llvm/IR/Constants.h"
	#include "llvm/IR/Function.h"
	#include "llvm/IR/GlobalVariable.h"
	#include "llvm/IR/Instruction.h"
	#include "llvm/IR/IntrinsicInst.h"
	#include "llvm/IR/Intrinsics.h"
	using namespace clang;
	using namespace CodeGen;

	//===----------------------------------------------------------------------===//
	// Aggregate Expression Emitter
	//===----------------------------------------------------------------------===//

	namespace llvm {
	extern cl::opt<bool> EnableSingleByteCoverage;
	} // namespace llvm

	namespace {
	class AggExprEmitter : public StmtVisitor<AggExprEmitter> {
	CodeGenFunction &CGF;
	CGBuilderTy &Builder;
	AggValueSlot Dest;
	bool IsResultUnused;

	AggValueSlot EnsureSlot(QualType T) {
	if (!Dest.isIgnored()) return Dest;
	return CGF.CreateAggTemp(T, "agg.tmp.ensured");
	}
	void EnsureDest(QualType T) {
	if (!Dest.isIgnored()) return;
	Dest = CGF.CreateAggTemp(T, "agg.tmp.ensured");
	}

	// Calls `Fn` with a valid return value slot, potentially creating a temporary
	// to do so. If a temporary is created, an appropriate copy into `Dest` will
	// be emitted, as will lifetime markers.
	//
	// The given function should take a ReturnValueSlot, and return an RValue that
	// points to said slot.
	void withReturnValueSlot(const Expr *E,
	llvm::function_ref<RValue(ReturnValueSlot)> Fn);

	public:
	AggExprEmitter(CodeGenFunction &cgf, AggValueSlot Dest, bool IsResultUnused)
	: CGF(cgf), Builder(CGF.Builder), Dest(Dest),
	IsResultUnused(IsResultUnused) { }

	//===--------------------------------------------------------------------===//
	// Utilities
	//===--------------------------------------------------------------------===//

	/// EmitAggLoadOfLValue - Given an expression with aggregate type that
	/// represents a value lvalue, this method emits the address of the lvalue,
	/// then loads the result into DestPtr.
	void EmitAggLoadOfLValue(const Expr *E);

	/// EmitFinalDestCopy - Perform the final copy to DestPtr, if desired.
	/// SrcIsRValue is true if source comes from an RValue.
	void EmitFinalDestCopy(QualType type, const LValue &src,
	CodeGenFunction::ExprValueKind SrcValueKind =
	CodeGenFunction::EVK_NonRValue);
	void EmitFinalDestCopy(QualType type, RValue src);
	void EmitCopy(QualType type, const AggValueSlot &dest,
	const AggValueSlot &src);

	void EmitArrayInit(Address DestPtr, llvm::ArrayType *AType, QualType ArrayQTy,
	Expr ExprToVisit, ArrayRef<Expr > Args,
	Expr *ArrayFiller);

	AggValueSlot::NeedsGCBarriers_t needsGC(QualType T) {
	if (CGF.getLangOpts().getGC() && TypeRequiresGCollection(T))
	return AggValueSlot::NeedsGCBarriers;
	return AggValueSlot::DoesNotNeedGCBarriers;
	}

	bool TypeRequiresGCollection(QualType T);

	//===--------------------------------------------------------------------===//
	// Visitor Methods
	//===--------------------------------------------------------------------===//

	void Visit(Expr *E) {
	ApplyDebugLocation DL(CGF, E);
	StmtVisitor<AggExprEmitter>::Visit(E);
	}

	void VisitStmt(Stmt *S) {
	CGF.ErrorUnsupported(S, "aggregate expression");
	}
	void VisitParenExpr(ParenExpr *PE) { Visit(PE->getSubExpr()); }
	void VisitGenericSelectionExpr(GenericSelectionExpr *GE) {
	Visit(GE->getResultExpr());
	}
	void VisitCoawaitExpr(CoawaitExpr *E) {
	CGF.EmitCoawaitExpr(*E, Dest, IsResultUnused);
	}
	void VisitCoyieldExpr(CoyieldExpr *E) {
	CGF.EmitCoyieldExpr(*E, Dest, IsResultUnused);
	}
	void VisitUnaryCoawait(UnaryOperator *E) { Visit(E->getSubExpr()); }
	void VisitUnaryExtension(UnaryOperator *E) { Visit(E->getSubExpr()); }
	void VisitSubstNonTypeTemplateParmExpr(SubstNonTypeTemplateParmExpr *E) {
	return Visit(E->getReplacement());
	}

	void VisitConstantExpr(ConstantExpr *E) {
	EnsureDest(E->getType());

	if (llvm::Value *Result = ConstantEmitter(CGF).tryEmitConstantExpr(E)) {
	- Address StoreDest = Dest.getAddress();
	- // The emitted value is guaranteed to have the same size as the
	- // destination but can have a different type. Just do a bitcast in this
	- // case to avoid incorrect GEPs.
	- if (Result->getType() != StoreDest.getType())
	- StoreDest = StoreDest.withElementType(Result->getType());
	-
	- CGF.EmitAggregateStore(Result, StoreDest,
	- E->getType().isVolatileQualified());
	+ CGF.CreateCoercedStore(
	+ Result, Dest.getAddress(),
	+ llvm::TypeSize::getFixed(
	+ Dest.getPreferredSize(CGF.getContext(), E->getType())
	+ .getQuantity()),
	+ E->getType().isVolatileQualified());
	return;
	}
	return Visit(E->getSubExpr());
	}

	// l-values.
	void VisitDeclRefExpr(DeclRefExpr *E) { EmitAggLoadOfLValue(E); }
	void VisitMemberExpr(MemberExpr *ME) { EmitAggLoadOfLValue(ME); }
	void VisitUnaryDeref(UnaryOperator *E) { EmitAggLoadOfLValue(E); }
	void VisitStringLiteral(StringLiteral *E) { EmitAggLoadOfLValue(E); }
	void VisitCompoundLiteralExpr(CompoundLiteralExpr *E);
	void VisitArraySubscriptExpr(ArraySubscriptExpr *E) {
	EmitAggLoadOfLValue(E);
	}
	void VisitPredefinedExpr(const PredefinedExpr *E) {
	EmitAggLoadOfLValue(E);
	}

	// Operators.
	void VisitCastExpr(CastExpr *E);
	void VisitCallExpr(const CallExpr *E);
	void VisitStmtExpr(const StmtExpr *E);
	void VisitBinaryOperator(const BinaryOperator *BO);
	void VisitPointerToDataMemberBinaryOperator(const BinaryOperator *BO);
	void VisitBinAssign(const BinaryOperator *E);
	void VisitBinComma(const BinaryOperator *E);
	void VisitBinCmp(const BinaryOperator *E);
	void VisitCXXRewrittenBinaryOperator(CXXRewrittenBinaryOperator *E) {
	Visit(E->getSemanticForm());
	}

	void VisitObjCMessageExpr(ObjCMessageExpr *E);
	void VisitObjCIvarRefExpr(ObjCIvarRefExpr *E) {
	EmitAggLoadOfLValue(E);
	}

	void VisitDesignatedInitUpdateExpr(DesignatedInitUpdateExpr *E);
	void VisitAbstractConditionalOperator(const AbstractConditionalOperator *CO);
	void VisitChooseExpr(const ChooseExpr *CE);
	void VisitInitListExpr(InitListExpr *E);
	void VisitCXXParenListOrInitListExpr(Expr ExprToVisit, ArrayRef<Expr > Args,
	FieldDecl *InitializedFieldInUnion,
	Expr *ArrayFiller);
	void VisitArrayInitLoopExpr(const ArrayInitLoopExpr *E,
	llvm::Value *outerBegin = nullptr);
	void VisitImplicitValueInitExpr(ImplicitValueInitExpr *E);
	void VisitNoInitExpr(NoInitExpr *E) { } // Do nothing.
	void VisitCXXDefaultArgExpr(CXXDefaultArgExpr *DAE) {
	CodeGenFunction::CXXDefaultArgExprScope Scope(CGF, DAE);
	Visit(DAE->getExpr());
	}
	void VisitCXXDefaultInitExpr(CXXDefaultInitExpr *DIE) {
	CodeGenFunction::CXXDefaultInitExprScope Scope(CGF, DIE);
	Visit(DIE->getExpr());
	}
	void VisitCXXBindTemporaryExpr(CXXBindTemporaryExpr *E);
	void VisitCXXConstructExpr(const CXXConstructExpr *E);
	void VisitCXXInheritedCtorInitExpr(const CXXInheritedCtorInitExpr *E);
	void VisitLambdaExpr(LambdaExpr *E);
	void VisitCXXStdInitializerListExpr(CXXStdInitializerListExpr *E);
	void VisitExprWithCleanups(ExprWithCleanups *E);
	void VisitCXXScalarValueInitExpr(CXXScalarValueInitExpr *E);
	void VisitCXXTypeidExpr(CXXTypeidExpr *E) { EmitAggLoadOfLValue(E); }
	void VisitMaterializeTemporaryExpr(MaterializeTemporaryExpr *E);
	void VisitOpaqueValueExpr(OpaqueValueExpr *E);

	void VisitPseudoObjectExpr(PseudoObjectExpr *E) {
	if (E->isGLValue()) {
	LValue LV = CGF.EmitPseudoObjectLValue(E);
	return EmitFinalDestCopy(E->getType(), LV);
	}

	AggValueSlot Slot = EnsureSlot(E->getType());
	bool NeedsDestruction =
	!Slot.isExternallyDestructed() &&
	E->getType().isDestructedType() == QualType::DK_nontrivial_c_struct;
	if (NeedsDestruction)
	Slot.setExternallyDestructed();
	CGF.EmitPseudoObjectRValue(E, Slot);
	if (NeedsDestruction)
	CGF.pushDestroy(QualType::DK_nontrivial_c_struct, Slot.getAddress(),
	E->getType());
	}

	void VisitVAArgExpr(VAArgExpr *E);
	void VisitCXXParenListInitExpr(CXXParenListInitExpr *E);
	void VisitCXXParenListOrInitListExpr(Expr ExprToVisit, ArrayRef<Expr > Args,
	Expr *ArrayFiller);

	void EmitInitializationToLValue(Expr *E, LValue Address);
	void EmitNullInitializationToLValue(LValue Address);
	// case Expr::ChooseExprClass:
	void VisitCXXThrowExpr(const CXXThrowExpr *E) { CGF.EmitCXXThrowExpr(E); }
	void VisitAtomicExpr(AtomicExpr *E) {
	RValue Res = CGF.EmitAtomicExpr(E);
	EmitFinalDestCopy(E->getType(), Res);
	}
	void VisitPackIndexingExpr(PackIndexingExpr *E) {
	Visit(E->getSelectedExpr());
	}
	};
	} // end anonymous namespace.

	//===----------------------------------------------------------------------===//
	// Utilities
	//===----------------------------------------------------------------------===//

	/// EmitAggLoadOfLValue - Given an expression with aggregate type that
	/// represents a value lvalue, this method emits the address of the lvalue,
	/// then loads the result into DestPtr.
	void AggExprEmitter::EmitAggLoadOfLValue(const Expr *E) {
	LValue LV = CGF.EmitLValue(E);

	// If the type of the l-value is atomic, then do an atomic load.
	if (LV.getType()->isAtomicType() \|\| CGF.LValueIsSuitableForInlineAtomic(LV)) {
	CGF.EmitAtomicLoad(LV, E->getExprLoc(), Dest);
	return;
	}

	EmitFinalDestCopy(E->getType(), LV);
	}

	/// True if the given aggregate type requires special GC API calls.
	bool AggExprEmitter::TypeRequiresGCollection(QualType T) {
	// Only record types have members that might require garbage collection.
	const RecordType *RecordTy = T->getAs<RecordType>();
	if (!RecordTy) return false;

	// Don't mess with non-trivial C++ types.
	RecordDecl *Record = RecordTy->getDecl();
	if (isa<CXXRecordDecl>(Record) &&
	(cast<CXXRecordDecl>(Record)->hasNonTrivialCopyConstructor() \|\|
	!cast<CXXRecordDecl>(Record)->hasTrivialDestructor()))
	return false;

	// Check whether the type has an object member.
	return Record->hasObjectMember();
	}

	void AggExprEmitter::withReturnValueSlot(
	const Expr *E, llvm::function_ref<RValue(ReturnValueSlot)> EmitCall) {
	QualType RetTy = E->getType();
	bool RequiresDestruction =
	!Dest.isExternallyDestructed() &&
	RetTy.isDestructedType() == QualType::DK_nontrivial_c_struct;

	// If it makes no observable difference, save a memcpy + temporary.
	//
	// We need to always provide our own temporary if destruction is required.
	// Otherwise, EmitCall will emit its own, notice that it's "unused", and end
	// its lifetime before we have the chance to emit a proper destructor call.
	bool UseTemp = Dest.isPotentiallyAliased() \|\| Dest.requiresGCollection() \|\|
	(RequiresDestruction && Dest.isIgnored());

	Address RetAddr = Address::invalid();
	RawAddress RetAllocaAddr = RawAddress::invalid();

	EHScopeStack::stable_iterator LifetimeEndBlock;
	llvm::Value *LifetimeSizePtr = nullptr;
	llvm::IntrinsicInst *LifetimeStartInst = nullptr;
	if (!UseTemp) {
	RetAddr = Dest.getAddress();
	} else {
	RetAddr = CGF.CreateMemTemp(RetTy, "tmp", &RetAllocaAddr);
	llvm::TypeSize Size =
	CGF.CGM.getDataLayout().getTypeAllocSize(CGF.ConvertTypeForMem(RetTy));
	LifetimeSizePtr = CGF.EmitLifetimeStart(Size, RetAllocaAddr.getPointer());
	if (LifetimeSizePtr) {
	LifetimeStartInst =
	cast<llvm::IntrinsicInst>(std::prev(Builder.GetInsertPoint()));
	assert(LifetimeStartInst->getIntrinsicID() ==
	llvm::Intrinsic::lifetime_start &&
	"Last insertion wasn't a lifetime.start?");

	CGF.pushFullExprCleanup<CodeGenFunction::CallLifetimeEnd>(
	NormalEHLifetimeMarker, RetAllocaAddr, LifetimeSizePtr);
	LifetimeEndBlock = CGF.EHStack.stable_begin();
	}
	}

	RValue Src =
	EmitCall(ReturnValueSlot(RetAddr, Dest.isVolatile(), IsResultUnused,
	Dest.isExternallyDestructed()));

	if (!UseTemp)
	return;

	assert(Dest.isIgnored() \|\| Dest.emitRawPointer(CGF) !=
	Src.getAggregatePointer(E->getType(), CGF));
	EmitFinalDestCopy(E->getType(), Src);

	if (!RequiresDestruction && LifetimeStartInst) {
	// If there's no dtor to run, the copy was the last use of our temporary.
	// Since we're not guaranteed to be in an ExprWithCleanups, clean up
	// eagerly.
	CGF.DeactivateCleanupBlock(LifetimeEndBlock, LifetimeStartInst);
	CGF.EmitLifetimeEnd(LifetimeSizePtr, RetAllocaAddr.getPointer());
	}
	}

	/// EmitFinalDestCopy - Perform the final copy to DestPtr, if desired.
	void AggExprEmitter::EmitFinalDestCopy(QualType type, RValue src) {
	assert(src.isAggregate() && "value must be aggregate value!");
	LValue srcLV = CGF.MakeAddrLValue(src.getAggregateAddress(), type);
	EmitFinalDestCopy(type, srcLV, CodeGenFunction::EVK_RValue);
	}

	/// EmitFinalDestCopy - Perform the final copy to DestPtr, if desired.
	void AggExprEmitter::EmitFinalDestCopy(
	QualType type, const LValue &src,
	CodeGenFunction::ExprValueKind SrcValueKind) {
	// If Dest is ignored, then we're evaluating an aggregate expression
	// in a context that doesn't care about the result. Note that loads
	// from volatile l-values force the existence of a non-ignored
	// destination.
	if (Dest.isIgnored())
	return;

	// Copy non-trivial C structs here.
	LValue DstLV = CGF.MakeAddrLValue(
	Dest.getAddress(), Dest.isVolatile() ? type.withVolatile() : type);

	if (SrcValueKind == CodeGenFunction::EVK_RValue) {
	if (type.isNonTrivialToPrimitiveDestructiveMove() == QualType::PCK_Struct) {
	if (Dest.isPotentiallyAliased())
	CGF.callCStructMoveAssignmentOperator(DstLV, src);
	else
	CGF.callCStructMoveConstructor(DstLV, src);
	return;
	}
	} else {
	if (type.isNonTrivialToPrimitiveCopy() == QualType::PCK_Struct) {
	if (Dest.isPotentiallyAliased())
	CGF.callCStructCopyAssignmentOperator(DstLV, src);
	else
	CGF.callCStructCopyConstructor(DstLV, src);
	return;
	}
	}

	AggValueSlot srcAgg = AggValueSlot::forLValue(
	src, AggValueSlot::IsDestructed, needsGC(type), AggValueSlot::IsAliased,
	AggValueSlot::MayOverlap);
	EmitCopy(type, Dest, srcAgg);
	}

	/// Perform a copy from the source into the destination.
	///
	/// \param type - the type of the aggregate being copied; qualifiers are
	/// ignored
	void AggExprEmitter::EmitCopy(QualType type, const AggValueSlot &dest,
	const AggValueSlot &src) {
	if (dest.requiresGCollection()) {
	CharUnits sz = dest.getPreferredSize(CGF.getContext(), type);
	llvm::Value *size = llvm::ConstantInt::get(CGF.SizeTy, sz.getQuantity());
	CGF.CGM.getObjCRuntime().EmitGCMemmoveCollectable(CGF,
	dest.getAddress(),
	src.getAddress(),
	size);
	return;
	}

	// If the result of the assignment is used, copy the LHS there also.
	// It's volatile if either side is. Use the minimum alignment of
	// the two sides.
	LValue DestLV = CGF.MakeAddrLValue(dest.getAddress(), type);
	LValue SrcLV = CGF.MakeAddrLValue(src.getAddress(), type);
	CGF.EmitAggregateCopy(DestLV, SrcLV, type, dest.mayOverlap(),
	dest.isVolatile() \|\| src.isVolatile());
	}

	/// Emit the initializer for a std::initializer_list initialized with a
	/// real initializer list.
	void
	AggExprEmitter::VisitCXXStdInitializerListExpr(CXXStdInitializerListExpr *E) {
	// Emit an array containing the elements. The array is externally destructed
	// if the std::initializer_list object is.
	ASTContext &Ctx = CGF.getContext();
	LValue Array = CGF.EmitLValue(E->getSubExpr());
	assert(Array.isSimple() && "initializer_list array not a simple lvalue");
	Address ArrayPtr = Array.getAddress();

	const ConstantArrayType *ArrayType =
	Ctx.getAsConstantArrayType(E->getSubExpr()->getType());
	assert(ArrayType && "std::initializer_list constructed from non-array");

	RecordDecl *Record = E->getType()->castAs<RecordType>()->getDecl();
	RecordDecl::field_iterator Field = Record->field_begin();
	assert(Field != Record->field_end() &&
	Ctx.hasSameType(Field->getType()->getPointeeType(),
	ArrayType->getElementType()) &&
	"Expected std::initializer_list first field to be const E *");

	// Start pointer.
	AggValueSlot Dest = EnsureSlot(E->getType());
	LValue DestLV = CGF.MakeAddrLValue(Dest.getAddress(), E->getType());
	LValue Start = CGF.EmitLValueForFieldInitialization(DestLV, *Field);
	llvm::Value *ArrayStart = ArrayPtr.emitRawPointer(CGF);
	CGF.EmitStoreThroughLValue(RValue::get(ArrayStart), Start);
	++Field;
	assert(Field != Record->field_end() &&
	"Expected std::initializer_list to have two fields");

	llvm::Value *Size = Builder.getInt(ArrayType->getSize());
	LValue EndOrLength = CGF.EmitLValueForFieldInitialization(DestLV, *Field);
	if (Ctx.hasSameType(Field->getType(), Ctx.getSizeType())) {
	// Length.
	CGF.EmitStoreThroughLValue(RValue::get(Size), EndOrLength);

	} else {
	// End pointer.
	assert(Field->getType()->isPointerType() &&
	Ctx.hasSameType(Field->getType()->getPointeeType(),
	ArrayType->getElementType()) &&
	"Expected std::initializer_list second field to be const E *");
	llvm::Value *Zero = llvm::ConstantInt::get(CGF.PtrDiffTy, 0);
	llvm::Value *IdxEnd[] = { Zero, Size };
	llvm::Value *ArrayEnd = Builder.CreateInBoundsGEP(
	ArrayPtr.getElementType(), ArrayPtr.emitRawPointer(CGF), IdxEnd,
	"arrayend");
	CGF.EmitStoreThroughLValue(RValue::get(ArrayEnd), EndOrLength);
	}

	assert(++Field == Record->field_end() &&
	"Expected std::initializer_list to only have two fields");
	}

	/// Determine if E is a trivial array filler, that is, one that is
	/// equivalent to zero-initialization.
	static bool isTrivialFiller(Expr *E) {
	if (!E)
	return true;

	if (isa<ImplicitValueInitExpr>(E))
	return true;

	if (auto *ILE = dyn_cast<InitListExpr>(E)) {
	if (ILE->getNumInits())
	return false;
	return isTrivialFiller(ILE->getArrayFiller());
	}

	if (auto *Cons = dyn_cast_or_null<CXXConstructExpr>(E))
	return Cons->getConstructor()->isDefaultConstructor() &&
	Cons->getConstructor()->isTrivial();

	// FIXME: Are there other cases where we can avoid emitting an initializer?
	return false;
	}

	/// Emit initialization of an array from an initializer list. ExprToVisit must
	/// be either an InitListEpxr a CXXParenInitListExpr.
	void AggExprEmitter::EmitArrayInit(Address DestPtr, llvm::ArrayType *AType,
	QualType ArrayQTy, Expr *ExprToVisit,
	ArrayRef<Expr > Args, Expr ArrayFiller) {
	uint64_t NumInitElements = Args.size();

	uint64_t NumArrayElements = AType->getNumElements();
	for (const auto *Init : Args) {
	if (const auto *Embed = dyn_cast<EmbedExpr>(Init->IgnoreParenImpCasts())) {
	NumInitElements += Embed->getDataElementCount() - 1;
	if (NumInitElements > NumArrayElements) {
	NumInitElements = NumArrayElements;
	break;
	}
	}
	}

	assert(NumInitElements <= NumArrayElements);

	QualType elementType =
	CGF.getContext().getAsArrayType(ArrayQTy)->getElementType();
	CharUnits elementSize = CGF.getContext().getTypeSizeInChars(elementType);
	CharUnits elementAlign =
	DestPtr.getAlignment().alignmentOfArrayElement(elementSize);
	llvm::Type *llvmElementType = CGF.ConvertTypeForMem(elementType);

	// Consider initializing the array by copying from a global. For this to be
	// more efficient than per-element initialization, the size of the elements
	// with explicit initializers should be large enough.
	if (NumInitElements * elementSize.getQuantity() > 16 &&
	elementType.isTriviallyCopyableType(CGF.getContext())) {
	CodeGen::CodeGenModule &CGM = CGF.CGM;
	ConstantEmitter Emitter(CGF);
	QualType GVArrayQTy = CGM.getContext().getAddrSpaceQualType(
	CGM.getContext().removeAddrSpaceQualType(ArrayQTy),
	CGM.GetGlobalConstantAddressSpace());
	LangAS AS = GVArrayQTy.getAddressSpace();
	if (llvm::Constant *C =
	Emitter.tryEmitForInitializer(ExprToVisit, AS, GVArrayQTy)) {
	auto GV = new llvm::GlobalVariable(
	CGM.getModule(), C->getType(),
	/* isConstant= */ true, llvm::GlobalValue::PrivateLinkage, C,
	"constinit",
	/* InsertBefore= */ nullptr, llvm::GlobalVariable::NotThreadLocal,
	CGM.getContext().getTargetAddressSpace(AS));
	Emitter.finalize(GV);
	CharUnits Align = CGM.getContext().getTypeAlignInChars(GVArrayQTy);
	GV->setAlignment(Align.getAsAlign());
	Address GVAddr(GV, GV->getValueType(), Align);
	EmitFinalDestCopy(ArrayQTy, CGF.MakeAddrLValue(GVAddr, GVArrayQTy));
	return;
	}
	}

	// Exception safety requires us to destroy all the
	// already-constructed members if an initializer throws.
	// For that, we'll need an EH cleanup.
	QualType::DestructionKind dtorKind = elementType.isDestructedType();
	Address endOfInit = Address::invalid();
	CodeGenFunction::CleanupDeactivationScope deactivation(CGF);

	llvm::Value *begin = DestPtr.emitRawPointer(CGF);
	if (dtorKind) {
	CodeGenFunction::AllocaTrackerRAII allocaTracker(CGF);
	// In principle we could tell the cleanup where we are more
	// directly, but the control flow can get so varied here that it
	// would actually be quite complex. Therefore we go through an
	// alloca.
	llvm::Instruction *dominatingIP =
	Builder.CreateFlagLoad(llvm::ConstantInt::getNullValue(CGF.Int8PtrTy));
	endOfInit = CGF.CreateTempAlloca(begin->getType(), CGF.getPointerAlign(),
	"arrayinit.endOfInit");
	Builder.CreateStore(begin, endOfInit);
	CGF.pushIrregularPartialArrayCleanup(begin, endOfInit, elementType,
	elementAlign,
	CGF.getDestroyer(dtorKind));
	cast<EHCleanupScope>(*CGF.EHStack.find(CGF.EHStack.stable_begin()))
	.AddAuxAllocas(allocaTracker.Take());

	CGF.DeferredDeactivationCleanupStack.push_back(
	{CGF.EHStack.stable_begin(), dominatingIP});
	}

	llvm::Value *one = llvm::ConstantInt::get(CGF.SizeTy, 1);

	auto Emit = [&](Expr *Init, uint64_t ArrayIndex) {
	llvm::Value *element = begin;
	if (ArrayIndex > 0) {
	element = Builder.CreateInBoundsGEP(
	llvmElementType, begin,
	llvm::ConstantInt::get(CGF.SizeTy, ArrayIndex), "arrayinit.element");

	// Tell the cleanup that it needs to destroy up to this
	// element. TODO: some of these stores can be trivially
	// observed to be unnecessary.
	if (endOfInit.isValid())
	Builder.CreateStore(element, endOfInit);
	}

	LValue elementLV = CGF.MakeAddrLValue(
	Address(element, llvmElementType, elementAlign), elementType);
	EmitInitializationToLValue(Init, elementLV);
	return true;
	};

	unsigned ArrayIndex = 0;
	// Emit the explicit initializers.
	for (uint64_t i = 0; i != NumInitElements; ++i) {
	if (ArrayIndex >= NumInitElements)
	break;
	if (auto *EmbedS = dyn_cast<EmbedExpr>(Args[i]->IgnoreParenImpCasts())) {
	EmbedS->doForEachDataElement(Emit, ArrayIndex);
	} else {
	Emit(Args[i], ArrayIndex);
	ArrayIndex++;
	}
	}

	// Check whether there's a non-trivial array-fill expression.
	bool hasTrivialFiller = isTrivialFiller(ArrayFiller);

	// Any remaining elements need to be zero-initialized, possibly
	// using the filler expression. We can skip this if the we're
	// emitting to zeroed memory.
	if (NumInitElements != NumArrayElements &&
	!(Dest.isZeroed() && hasTrivialFiller &&
	CGF.getTypes().isZeroInitializable(elementType))) {

	// Use an actual loop. This is basically
	// do { *array++ = filler; } while (array != end);

	// Advance to the start of the rest of the array.
	llvm::Value *element = begin;
	if (NumInitElements) {
	element = Builder.CreateInBoundsGEP(
	llvmElementType, element,
	llvm::ConstantInt::get(CGF.SizeTy, NumInitElements),
	"arrayinit.start");
	if (endOfInit.isValid()) Builder.CreateStore(element, endOfInit);
	}

	// Compute the end of the array.
	llvm::Value *end = Builder.CreateInBoundsGEP(
	llvmElementType, begin,
	llvm::ConstantInt::get(CGF.SizeTy, NumArrayElements), "arrayinit.end");

	llvm::BasicBlock *entryBB = Builder.GetInsertBlock();
	llvm::BasicBlock *bodyBB = CGF.createBasicBlock("arrayinit.body");

	// Jump into the body.
	CGF.EmitBlock(bodyBB);
	llvm::PHINode *currentElement =
	Builder.CreatePHI(element->getType(), 2, "arrayinit.cur");
	currentElement->addIncoming(element, entryBB);

	// Emit the actual filler expression.
	{
	// C++1z [class.temporary]p5:
	// when a default constructor is called to initialize an element of
	// an array with no corresponding initializer [...] the destruction of
	// every temporary created in a default argument is sequenced before
	// the construction of the next array element, if any
	CodeGenFunction::RunCleanupsScope CleanupsScope(CGF);
	LValue elementLV = CGF.MakeAddrLValue(
	Address(currentElement, llvmElementType, elementAlign), elementType);
	if (ArrayFiller)
	EmitInitializationToLValue(ArrayFiller, elementLV);
	else
	EmitNullInitializationToLValue(elementLV);
	}

	// Move on to the next element.
	llvm::Value *nextElement = Builder.CreateInBoundsGEP(
	llvmElementType, currentElement, one, "arrayinit.next");

	// Tell the EH cleanup that we finished with the last element.
	if (endOfInit.isValid()) Builder.CreateStore(nextElement, endOfInit);

	// Leave the loop if we're done.
	llvm::Value *done = Builder.CreateICmpEQ(nextElement, end,
	"arrayinit.done");
	llvm::BasicBlock *endBB = CGF.createBasicBlock("arrayinit.end");
	Builder.CreateCondBr(done, endBB, bodyBB);
	currentElement->addIncoming(nextElement, Builder.GetInsertBlock());

	CGF.EmitBlock(endBB);
	}
	}

	//===----------------------------------------------------------------------===//
	// Visitor Methods
	//===----------------------------------------------------------------------===//

	void AggExprEmitter::VisitMaterializeTemporaryExpr(MaterializeTemporaryExpr *E){
	Visit(E->getSubExpr());
	}

	void AggExprEmitter::VisitOpaqueValueExpr(OpaqueValueExpr *e) {
	// If this is a unique OVE, just visit its source expression.
	if (e->isUnique())
	Visit(e->getSourceExpr());
	else
	EmitFinalDestCopy(e->getType(), CGF.getOrCreateOpaqueLValueMapping(e));
	}

	void
	AggExprEmitter::VisitCompoundLiteralExpr(CompoundLiteralExpr *E) {
	if (Dest.isPotentiallyAliased() &&
	E->getType().isPODType(CGF.getContext())) {
	// For a POD type, just emit a load of the lvalue + a copy, because our
	// compound literal might alias the destination.
	EmitAggLoadOfLValue(E);
	return;
	}

	AggValueSlot Slot = EnsureSlot(E->getType());

	// Block-scope compound literals are destroyed at the end of the enclosing
	// scope in C.
	bool Destruct =
	!CGF.getLangOpts().CPlusPlus && !Slot.isExternallyDestructed();
	if (Destruct)
	Slot.setExternallyDestructed();

	CGF.EmitAggExpr(E->getInitializer(), Slot);

	if (Destruct)
	if (QualType::DestructionKind DtorKind = E->getType().isDestructedType())
	CGF.pushLifetimeExtendedDestroy(
	CGF.getCleanupKind(DtorKind), Slot.getAddress(), E->getType(),
	CGF.getDestroyer(DtorKind), DtorKind & EHCleanup);
	}

	/// Attempt to look through various unimportant expressions to find a
	/// cast of the given kind.
	static Expr findPeephole(Expr op, CastKind kind, const ASTContext &ctx) {
	op = op->IgnoreParenNoopCasts(ctx);
	if (auto castE = dyn_cast<CastExpr>(op)) {
	if (castE->getCastKind() == kind)
	return castE->getSubExpr();
	}
	return nullptr;
	}

	void AggExprEmitter::VisitCastExpr(CastExpr *E) {
	if (const auto *ECE = dyn_cast<ExplicitCastExpr>(E))
	CGF.CGM.EmitExplicitCastExprType(ECE, &CGF);
	switch (E->getCastKind()) {
	case CK_Dynamic: {
	// FIXME: Can this actually happen? We have no test coverage for it.
	assert(isa<CXXDynamicCastExpr>(E) && "CK_Dynamic without a dynamic_cast?");
	LValue LV = CGF.EmitCheckedLValue(E->getSubExpr(),
	CodeGenFunction::TCK_Load);
	// FIXME: Do we also need to handle property references here?
	if (LV.isSimple())
	CGF.EmitDynamicCast(LV.getAddress(), cast<CXXDynamicCastExpr>(E));
	else
	CGF.CGM.ErrorUnsupported(E, "non-simple lvalue dynamic_cast");

	if (!Dest.isIgnored())
	CGF.CGM.ErrorUnsupported(E, "lvalue dynamic_cast with a destination");
	break;
	}

	case CK_ToUnion: {
	// Evaluate even if the destination is ignored.
	if (Dest.isIgnored()) {
	CGF.EmitAnyExpr(E->getSubExpr(), AggValueSlot::ignored(),
	/ignoreResult=/true);
	break;
	}

	// GCC union extension
	QualType Ty = E->getSubExpr()->getType();
	Address CastPtr = Dest.getAddress().withElementType(CGF.ConvertType(Ty));
	EmitInitializationToLValue(E->getSubExpr(),
	CGF.MakeAddrLValue(CastPtr, Ty));
	break;
	}

	case CK_LValueToRValueBitCast: {
	if (Dest.isIgnored()) {
	CGF.EmitAnyExpr(E->getSubExpr(), AggValueSlot::ignored(),
	/ignoreResult=/true);
	break;
	}

	LValue SourceLV = CGF.EmitLValue(E->getSubExpr());
	Address SourceAddress = SourceLV.getAddress().withElementType(CGF.Int8Ty);
	Address DestAddress = Dest.getAddress().withElementType(CGF.Int8Ty);
	llvm::Value *SizeVal = llvm::ConstantInt::get(
	CGF.SizeTy,
	CGF.getContext().getTypeSizeInChars(E->getType()).getQuantity());
	Builder.CreateMemCpy(DestAddress, SourceAddress, SizeVal);
	break;
	}

	case CK_DerivedToBase:
	case CK_BaseToDerived:
	case CK_UncheckedDerivedToBase: {
	llvm_unreachable("cannot perform hierarchy conversion in EmitAggExpr: "
	"should have been unpacked before we got here");
	}

	case CK_NonAtomicToAtomic:
	case CK_AtomicToNonAtomic: {
	bool isToAtomic = (E->getCastKind() == CK_NonAtomicToAtomic);

	// Determine the atomic and value types.
	QualType atomicType = E->getSubExpr()->getType();
	QualType valueType = E->getType();
	if (isToAtomic) std::swap(atomicType, valueType);

	assert(atomicType->isAtomicType());
	assert(CGF.getContext().hasSameUnqualifiedType(valueType,
	atomicType->castAs<AtomicType>()->getValueType()));

	// Just recurse normally if we're ignoring the result or the
	// atomic type doesn't change representation.
	if (Dest.isIgnored() \|\| !CGF.CGM.isPaddedAtomicType(atomicType)) {
	return Visit(E->getSubExpr());
	}

	CastKind peepholeTarget =
	(isToAtomic ? CK_AtomicToNonAtomic : CK_NonAtomicToAtomic);

	// These two cases are reverses of each other; try to peephole them.
	if (Expr *op =
	findPeephole(E->getSubExpr(), peepholeTarget, CGF.getContext())) {
	assert(CGF.getContext().hasSameUnqualifiedType(op->getType(),
	E->getType()) &&
	"peephole significantly changed types?");
	return Visit(op);
	}

	// If we're converting an r-value of non-atomic type to an r-value
	// of atomic type, just emit directly into the relevant sub-object.
	if (isToAtomic) {
	AggValueSlot valueDest = Dest;
	if (!valueDest.isIgnored() && CGF.CGM.isPaddedAtomicType(atomicType)) {
	// Zero-initialize. (Strictly speaking, we only need to initialize
	// the padding at the end, but this is simpler.)
	if (!Dest.isZeroed())
	CGF.EmitNullInitialization(Dest.getAddress(), atomicType);

	// Build a GEP to refer to the subobject.
	Address valueAddr =
	CGF.Builder.CreateStructGEP(valueDest.getAddress(), 0);
	valueDest = AggValueSlot::forAddr(valueAddr,
	valueDest.getQualifiers(),
	valueDest.isExternallyDestructed(),
	valueDest.requiresGCollection(),
	valueDest.isPotentiallyAliased(),
	AggValueSlot::DoesNotOverlap,
	AggValueSlot::IsZeroed);
	}

	CGF.EmitAggExpr(E->getSubExpr(), valueDest);
	return;
	}

	// Otherwise, we're converting an atomic type to a non-atomic type.
	// Make an atomic temporary, emit into that, and then copy the value out.
	AggValueSlot atomicSlot =
	CGF.CreateAggTemp(atomicType, "atomic-to-nonatomic.temp");
	CGF.EmitAggExpr(E->getSubExpr(), atomicSlot);

	Address valueAddr = Builder.CreateStructGEP(atomicSlot.getAddress(), 0);
	RValue rvalue = RValue::getAggregate(valueAddr, atomicSlot.isVolatile());
	return EmitFinalDestCopy(valueType, rvalue);
	}
	case CK_AddressSpaceConversion:
	return Visit(E->getSubExpr());

	case CK_LValueToRValue:
	// If we're loading from a volatile type, force the destination
	// into existence.
	if (E->getSubExpr()->getType().isVolatileQualified()) {
	bool Destruct =
	!Dest.isExternallyDestructed() &&
	E->getType().isDestructedType() == QualType::DK_nontrivial_c_struct;
	if (Destruct)
	Dest.setExternallyDestructed();
	EnsureDest(E->getType());
	Visit(E->getSubExpr());

	if (Destruct)
	CGF.pushDestroy(QualType::DK_nontrivial_c_struct, Dest.getAddress(),
	E->getType());

	return;
	}

	[[fallthrough]];

	case CK_HLSLArrayRValue:
	Visit(E->getSubExpr());
	break;

	case CK_NoOp:
	case CK_UserDefinedConversion:
	case CK_ConstructorConversion:
	assert(CGF.getContext().hasSameUnqualifiedType(E->getSubExpr()->getType(),
	E->getType()) &&
	"Implicit cast types must be compatible");
	Visit(E->getSubExpr());
	break;

	case CK_LValueBitCast:
	llvm_unreachable("should not be emitting lvalue bitcast as rvalue");

	case CK_Dependent:
	case CK_BitCast:
	case CK_ArrayToPointerDecay:
	case CK_FunctionToPointerDecay:
	case CK_NullToPointer:
	case CK_NullToMemberPointer:
	case CK_BaseToDerivedMemberPointer:
	case CK_DerivedToBaseMemberPointer:
	case CK_MemberPointerToBoolean:
	case CK_ReinterpretMemberPointer:
	case CK_IntegralToPointer:
	case CK_PointerToIntegral:
	case CK_PointerToBoolean:
	case CK_ToVoid:
	case CK_VectorSplat:
	case CK_IntegralCast:
	case CK_BooleanToSignedIntegral:
	case CK_IntegralToBoolean:
	case CK_IntegralToFloating:
	case CK_FloatingToIntegral:
	case CK_FloatingToBoolean:
	case CK_FloatingCast:
	case CK_CPointerToObjCPointerCast:
	case CK_BlockPointerToObjCPointerCast:
	case CK_AnyPointerToBlockPointerCast:
	case CK_ObjCObjectLValueCast:
	case CK_FloatingRealToComplex:
	case CK_FloatingComplexToReal:
	case CK_FloatingComplexToBoolean:
	case CK_FloatingComplexCast:
	case CK_FloatingComplexToIntegralComplex:
	case CK_IntegralRealToComplex:
	case CK_IntegralComplexToReal:
	case CK_IntegralComplexToBoolean:
	case CK_IntegralComplexCast:
	case CK_IntegralComplexToFloatingComplex:
	case CK_ARCProduceObject:
	case CK_ARCConsumeObject:
	case CK_ARCReclaimReturnedObject:
	case CK_ARCExtendBlockObject:
	case CK_CopyAndAutoreleaseBlockObject:
	case CK_BuiltinFnToFnPtr:
	case CK_ZeroToOCLOpaqueType:
	case CK_MatrixCast:
	case CK_HLSLVectorTruncation:

	case CK_IntToOCLSampler:
	case CK_FloatingToFixedPoint:
	case CK_FixedPointToFloating:
	case CK_FixedPointCast:
	case CK_FixedPointToBoolean:
	case CK_FixedPointToIntegral:
	case CK_IntegralToFixedPoint:
	llvm_unreachable("cast kind invalid for aggregate types");
	}
	}

	void AggExprEmitter::VisitCallExpr(const CallExpr *E) {
	if (E->getCallReturnType(CGF.getContext())->isReferenceType()) {
	EmitAggLoadOfLValue(E);
	return;
	}

	withReturnValueSlot(E, [&](ReturnValueSlot Slot) {
	return CGF.EmitCallExpr(E, Slot);
	});
	}

	void AggExprEmitter::VisitObjCMessageExpr(ObjCMessageExpr *E) {
	withReturnValueSlot(E, [&](ReturnValueSlot Slot) {
	return CGF.EmitObjCMessageExpr(E, Slot);
	});
	}

	void AggExprEmitter::VisitBinComma(const BinaryOperator *E) {
	CGF.EmitIgnoredExpr(E->getLHS());
	Visit(E->getRHS());
	}

	void AggExprEmitter::VisitStmtExpr(const StmtExpr *E) {
	CodeGenFunction::StmtExprEvaluation eval(CGF);
	CGF.EmitCompoundStmt(*E->getSubStmt(), true, Dest);
	}

	enum CompareKind {
	CK_Less,
	CK_Greater,
	CK_Equal,
	};

	static llvm::Value *EmitCompare(CGBuilderTy &Builder, CodeGenFunction &CGF,
	const BinaryOperator E, llvm::Value LHS,
	llvm::Value *RHS, CompareKind Kind,
	const char *NameSuffix = "") {
	QualType ArgTy = E->getLHS()->getType();
	if (const ComplexType *CT = ArgTy->getAs<ComplexType>())
	ArgTy = CT->getElementType();

	if (const auto *MPT = ArgTy->getAs<MemberPointerType>()) {
	assert(Kind == CK_Equal &&
	"member pointers may only be compared for equality");
	return CGF.CGM.getCXXABI().EmitMemberPointerComparison(
	CGF, LHS, RHS, MPT, /IsInequality/ false);
	}

	// Compute the comparison instructions for the specified comparison kind.
	struct CmpInstInfo {
	const char *Name;
	llvm::CmpInst::Predicate FCmp;
	llvm::CmpInst::Predicate SCmp;
	llvm::CmpInst::Predicate UCmp;
	};
	CmpInstInfo InstInfo = [&]() -> CmpInstInfo {
	using FI = llvm::FCmpInst;
	using II = llvm::ICmpInst;
	switch (Kind) {
	case CK_Less:
	return {"cmp.lt", FI::FCMP_OLT, II::ICMP_SLT, II::ICMP_ULT};
	case CK_Greater:
	return {"cmp.gt", FI::FCMP_OGT, II::ICMP_SGT, II::ICMP_UGT};
	case CK_Equal:
	return {"cmp.eq", FI::FCMP_OEQ, II::ICMP_EQ, II::ICMP_EQ};
	}
	llvm_unreachable("Unrecognised CompareKind enum");
	}();

	if (ArgTy->hasFloatingRepresentation())
	return Builder.CreateFCmp(InstInfo.FCmp, LHS, RHS,
	llvm::Twine(InstInfo.Name) + NameSuffix);
	if (ArgTy->isIntegralOrEnumerationType() \|\| ArgTy->isPointerType()) {
	auto Inst =
	ArgTy->hasSignedIntegerRepresentation() ? InstInfo.SCmp : InstInfo.UCmp;
	return Builder.CreateICmp(Inst, LHS, RHS,
	llvm::Twine(InstInfo.Name) + NameSuffix);
	}

	llvm_unreachable("unsupported aggregate binary expression should have "
	"already been handled");
	}

	void AggExprEmitter::VisitBinCmp(const BinaryOperator *E) {
	using llvm::BasicBlock;
	using llvm::PHINode;
	using llvm::Value;
	assert(CGF.getContext().hasSameType(E->getLHS()->getType(),
	E->getRHS()->getType()));
	const ComparisonCategoryInfo &CmpInfo =
	CGF.getContext().CompCategories.getInfoForType(E->getType());
	assert(CmpInfo.Record->isTriviallyCopyable() &&
	"cannot copy non-trivially copyable aggregate");

	QualType ArgTy = E->getLHS()->getType();

	if (!ArgTy->isIntegralOrEnumerationType() && !ArgTy->isRealFloatingType() &&
	!ArgTy->isNullPtrType() && !ArgTy->isPointerType() &&
	!ArgTy->isMemberPointerType() && !ArgTy->isAnyComplexType()) {
	return CGF.ErrorUnsupported(E, "aggregate three-way comparison");
	}
	bool IsComplex = ArgTy->isAnyComplexType();

	// Evaluate the operands to the expression and extract their values.
	auto EmitOperand = [&](Expr E) -> std::pair<Value , Value *> {
	RValue RV = CGF.EmitAnyExpr(E);
	if (RV.isScalar())
	return {RV.getScalarVal(), nullptr};
	if (RV.isAggregate())
	return {RV.getAggregatePointer(E->getType(), CGF), nullptr};
	assert(RV.isComplex());
	return RV.getComplexVal();
	};
	auto LHSValues = EmitOperand(E->getLHS()),
	RHSValues = EmitOperand(E->getRHS());

	auto EmitCmp = [&](CompareKind K) {
	Value *Cmp = EmitCompare(Builder, CGF, E, LHSValues.first, RHSValues.first,
	K, IsComplex ? ".r" : "");
	if (!IsComplex)
	return Cmp;
	assert(K == CompareKind::CK_Equal);
	Value *CmpImag = EmitCompare(Builder, CGF, E, LHSValues.second,
	RHSValues.second, K, ".i");
	return Builder.CreateAnd(Cmp, CmpImag, "and.eq");
	};
	auto EmitCmpRes = [&](const ComparisonCategoryInfo::ValueInfo *VInfo) {
	return Builder.getInt(VInfo->getIntValue());
	};

	Value *Select;
	if (ArgTy->isNullPtrType()) {
	Select = EmitCmpRes(CmpInfo.getEqualOrEquiv());
	} else if (!CmpInfo.isPartial()) {
	Value *SelectOne =
	Builder.CreateSelect(EmitCmp(CK_Less), EmitCmpRes(CmpInfo.getLess()),
	EmitCmpRes(CmpInfo.getGreater()), "sel.lt");
	Select = Builder.CreateSelect(EmitCmp(CK_Equal),
	EmitCmpRes(CmpInfo.getEqualOrEquiv()),
	SelectOne, "sel.eq");
	} else {
	Value *SelectEq = Builder.CreateSelect(
	EmitCmp(CK_Equal), EmitCmpRes(CmpInfo.getEqualOrEquiv()),
	EmitCmpRes(CmpInfo.getUnordered()), "sel.eq");
	Value *SelectGT = Builder.CreateSelect(EmitCmp(CK_Greater),
	EmitCmpRes(CmpInfo.getGreater()),
	SelectEq, "sel.gt");
	Select = Builder.CreateSelect(
	EmitCmp(CK_Less), EmitCmpRes(CmpInfo.getLess()), SelectGT, "sel.lt");
	}
	// Create the return value in the destination slot.
	EnsureDest(E->getType());
	LValue DestLV = CGF.MakeAddrLValue(Dest.getAddress(), E->getType());

	// Emit the address of the first (and only) field in the comparison category
	// type, and initialize it from the constant integer value selected above.
	LValue FieldLV = CGF.EmitLValueForFieldInitialization(
	DestLV, *CmpInfo.Record->field_begin());
	CGF.EmitStoreThroughLValue(RValue::get(Select), FieldLV, /IsInit/ true);

	// All done! The result is in the Dest slot.
	}

	void AggExprEmitter::VisitBinaryOperator(const BinaryOperator *E) {
	if (E->getOpcode() == BO_PtrMemD \|\| E->getOpcode() == BO_PtrMemI)
	VisitPointerToDataMemberBinaryOperator(E);
	else
	CGF.ErrorUnsupported(E, "aggregate binary expression");
	}

	void AggExprEmitter::VisitPointerToDataMemberBinaryOperator(
	const BinaryOperator *E) {
	LValue LV = CGF.EmitPointerToDataMemberBinaryExpr(E);
	EmitFinalDestCopy(E->getType(), LV);
	}

	/// Is the value of the given expression possibly a reference to or
	/// into a __block variable?
	static bool isBlockVarRef(const Expr *E) {
	// Make sure we look through parens.
	E = E->IgnoreParens();

	// Check for a direct reference to a __block variable.
	if (const DeclRefExpr *DRE = dyn_cast<DeclRefExpr>(E)) {
	const VarDecl *var = dyn_cast<VarDecl>(DRE->getDecl());
	return (var && var->hasAttr<BlocksAttr>());
	}

	// More complicated stuff.

	// Binary operators.
	if (const BinaryOperator *op = dyn_cast<BinaryOperator>(E)) {
	// For an assignment or pointer-to-member operation, just care
	// about the LHS.
	if (op->isAssignmentOp() \|\| op->isPtrMemOp())
	return isBlockVarRef(op->getLHS());

	// For a comma, just care about the RHS.
	if (op->getOpcode() == BO_Comma)
	return isBlockVarRef(op->getRHS());

	// FIXME: pointer arithmetic?
	return false;

	// Check both sides of a conditional operator.
	} else if (const AbstractConditionalOperator *op
	= dyn_cast<AbstractConditionalOperator>(E)) {
	return isBlockVarRef(op->getTrueExpr())
	\|\| isBlockVarRef(op->getFalseExpr());

	// OVEs are required to support BinaryConditionalOperators.
	} else if (const OpaqueValueExpr *op
	= dyn_cast<OpaqueValueExpr>(E)) {
	if (const Expr *src = op->getSourceExpr())
	return isBlockVarRef(src);

	// Casts are necessary to get things like ((int)&var) = foo().
	// We don't really care about the kind of cast here, except
	// we don't want to look through l2r casts, because it's okay
	// to get the value in a __block variable.
	} else if (const CastExpr *cast = dyn_cast<CastExpr>(E)) {
	if (cast->getCastKind() == CK_LValueToRValue)
	return false;
	return isBlockVarRef(cast->getSubExpr());

	// Handle unary operators. Again, just aggressively look through
	// it, ignoring the operation.
	} else if (const UnaryOperator *uop = dyn_cast<UnaryOperator>(E)) {
	return isBlockVarRef(uop->getSubExpr());

	// Look into the base of a field access.
	} else if (const MemberExpr *mem = dyn_cast<MemberExpr>(E)) {
	return isBlockVarRef(mem->getBase());

	// Look into the base of a subscript.
	} else if (const ArraySubscriptExpr *sub = dyn_cast<ArraySubscriptExpr>(E)) {
	return isBlockVarRef(sub->getBase());
	}

	return false;
	}

	void AggExprEmitter::VisitBinAssign(const BinaryOperator *E) {
	// For an assignment to work, the value on the right has
	// to be compatible with the value on the left.
	assert(CGF.getContext().hasSameUnqualifiedType(E->getLHS()->getType(),
	E->getRHS()->getType())
	&& "Invalid assignment");

	// If the LHS might be a __block variable, and the RHS can
	// potentially cause a block copy, we need to evaluate the RHS first
	// so that the assignment goes the right place.
	// This is pretty semantically fragile.
	if (isBlockVarRef(E->getLHS()) &&
	E->getRHS()->HasSideEffects(CGF.getContext())) {
	// Ensure that we have a destination, and evaluate the RHS into that.
	EnsureDest(E->getRHS()->getType());
	Visit(E->getRHS());

	// Now emit the LHS and copy into it.
	LValue LHS = CGF.EmitCheckedLValue(E->getLHS(), CodeGenFunction::TCK_Store);

	// That copy is an atomic copy if the LHS is atomic.
	if (LHS.getType()->isAtomicType() \|\|
	CGF.LValueIsSuitableForInlineAtomic(LHS)) {
	CGF.EmitAtomicStore(Dest.asRValue(), LHS, /isInit/ false);
	return;
	}

	EmitCopy(E->getLHS()->getType(),
	AggValueSlot::forLValue(LHS, AggValueSlot::IsDestructed,
	needsGC(E->getLHS()->getType()),
	AggValueSlot::IsAliased,
	AggValueSlot::MayOverlap),
	Dest);
	return;
	}

	LValue LHS = CGF.EmitLValue(E->getLHS());

	// If we have an atomic type, evaluate into the destination and then
	// do an atomic copy.
	if (LHS.getType()->isAtomicType() \|\|
	CGF.LValueIsSuitableForInlineAtomic(LHS)) {
	EnsureDest(E->getRHS()->getType());
	Visit(E->getRHS());
	CGF.EmitAtomicStore(Dest.asRValue(), LHS, /isInit/ false);
	return;
	}

	// Codegen the RHS so that it stores directly into the LHS.
	AggValueSlot LHSSlot = AggValueSlot::forLValue(
	LHS, AggValueSlot::IsDestructed, needsGC(E->getLHS()->getType()),
	AggValueSlot::IsAliased, AggValueSlot::MayOverlap);
	// A non-volatile aggregate destination might have volatile member.
	if (!LHSSlot.isVolatile() &&
	CGF.hasVolatileMember(E->getLHS()->getType()))
	LHSSlot.setVolatile(true);

	CGF.EmitAggExpr(E->getRHS(), LHSSlot);

	// Copy into the destination if the assignment isn't ignored.
	EmitFinalDestCopy(E->getType(), LHS);

	if (!Dest.isIgnored() && !Dest.isExternallyDestructed() &&
	E->getType().isDestructedType() == QualType::DK_nontrivial_c_struct)
	CGF.pushDestroy(QualType::DK_nontrivial_c_struct, Dest.getAddress(),
	E->getType());
	}

	void AggExprEmitter::
	VisitAbstractConditionalOperator(const AbstractConditionalOperator *E) {
	llvm::BasicBlock *LHSBlock = CGF.createBasicBlock("cond.true");
	llvm::BasicBlock *RHSBlock = CGF.createBasicBlock("cond.false");
	llvm::BasicBlock *ContBlock = CGF.createBasicBlock("cond.end");

	// Bind the common expression if necessary.
	CodeGenFunction::OpaqueValueMapping binding(CGF, E);

	CodeGenFunction::ConditionalEvaluation eval(CGF);
	CGF.EmitBranchOnBoolExpr(E->getCond(), LHSBlock, RHSBlock,
	CGF.getProfileCount(E));

	// Save whether the destination's lifetime is externally managed.
	bool isExternallyDestructed = Dest.isExternallyDestructed();
	bool destructNonTrivialCStruct =
	!isExternallyDestructed &&
	E->getType().isDestructedType() == QualType::DK_nontrivial_c_struct;
	isExternallyDestructed \|= destructNonTrivialCStruct;
	Dest.setExternallyDestructed(isExternallyDestructed);

	eval.begin(CGF);
	CGF.EmitBlock(LHSBlock);
	if (llvm::EnableSingleByteCoverage)
	CGF.incrementProfileCounter(E->getTrueExpr());
	else
	CGF.incrementProfileCounter(E);
	Visit(E->getTrueExpr());
	eval.end(CGF);

	assert(CGF.HaveInsertPoint() && "expression evaluation ended with no IP!");
	CGF.Builder.CreateBr(ContBlock);

	// If the result of an agg expression is unused, then the emission
	// of the LHS might need to create a destination slot. That's fine
	// with us, and we can safely emit the RHS into the same slot, but
	// we shouldn't claim that it's already being destructed.
	Dest.setExternallyDestructed(isExternallyDestructed);

	eval.begin(CGF);
	CGF.EmitBlock(RHSBlock);
	if (llvm::EnableSingleByteCoverage)
	CGF.incrementProfileCounter(E->getFalseExpr());
	Visit(E->getFalseExpr());
	eval.end(CGF);

	if (destructNonTrivialCStruct)
	CGF.pushDestroy(QualType::DK_nontrivial_c_struct, Dest.getAddress(),
	E->getType());

	CGF.EmitBlock(ContBlock);
	if (llvm::EnableSingleByteCoverage)
	CGF.incrementProfileCounter(E);
	}

	void AggExprEmitter::VisitChooseExpr(const ChooseExpr *CE) {
	Visit(CE->getChosenSubExpr());
	}

	void AggExprEmitter::VisitVAArgExpr(VAArgExpr *VE) {
	Address ArgValue = Address::invalid();
	CGF.EmitVAArg(VE, ArgValue, Dest);

	// If EmitVAArg fails, emit an error.
	if (!ArgValue.isValid()) {
	CGF.ErrorUnsupported(VE, "aggregate va_arg expression");
	return;
	}
	}

	void AggExprEmitter::VisitCXXBindTemporaryExpr(CXXBindTemporaryExpr *E) {
	// Ensure that we have a slot, but if we already do, remember
	// whether it was externally destructed.
	bool wasExternallyDestructed = Dest.isExternallyDestructed();
	EnsureDest(E->getType());

	// We're going to push a destructor if there isn't already one.
	Dest.setExternallyDestructed();

	Visit(E->getSubExpr());

	// Push that destructor we promised.
	if (!wasExternallyDestructed)
	CGF.EmitCXXTemporary(E->getTemporary(), E->getType(), Dest.getAddress());
	}

	void
	AggExprEmitter::VisitCXXConstructExpr(const CXXConstructExpr *E) {
	AggValueSlot Slot = EnsureSlot(E->getType());
	CGF.EmitCXXConstructExpr(E, Slot);
	}

	void AggExprEmitter::VisitCXXInheritedCtorInitExpr(
	const CXXInheritedCtorInitExpr *E) {
	AggValueSlot Slot = EnsureSlot(E->getType());
	CGF.EmitInheritedCXXConstructorCall(
	E->getConstructor(), E->constructsVBase(), Slot.getAddress(),
	E->inheritedFromVBase(), E);
	}

	void
	AggExprEmitter::VisitLambdaExpr(LambdaExpr *E) {
	AggValueSlot Slot = EnsureSlot(E->getType());
	LValue SlotLV = CGF.MakeAddrLValue(Slot.getAddress(), E->getType());

	// We'll need to enter cleanup scopes in case any of the element
	// initializers throws an exception or contains branch out of the expressions.
	CodeGenFunction::CleanupDeactivationScope scope(CGF);

	CXXRecordDecl::field_iterator CurField = E->getLambdaClass()->field_begin();
	for (LambdaExpr::const_capture_init_iterator i = E->capture_init_begin(),
	e = E->capture_init_end();
	i != e; ++i, ++CurField) {
	// Emit initialization
	LValue LV = CGF.EmitLValueForFieldInitialization(SlotLV, *CurField);
	if (CurField->hasCapturedVLAType()) {
	CGF.EmitLambdaVLACapture(CurField->getCapturedVLAType(), LV);
	continue;
	}

	EmitInitializationToLValue(*i, LV);

	// Push a destructor if necessary.
	if (QualType::DestructionKind DtorKind =
	CurField->getType().isDestructedType()) {
	assert(LV.isSimple());
	if (DtorKind)
	CGF.pushDestroyAndDeferDeactivation(NormalAndEHCleanup, LV.getAddress(),
	CurField->getType(),
	CGF.getDestroyer(DtorKind), false);
	}
	}
	}

	void AggExprEmitter::VisitExprWithCleanups(ExprWithCleanups *E) {
	CodeGenFunction::RunCleanupsScope cleanups(CGF);
	Visit(E->getSubExpr());
	}

	void AggExprEmitter::VisitCXXScalarValueInitExpr(CXXScalarValueInitExpr *E) {
	QualType T = E->getType();
	AggValueSlot Slot = EnsureSlot(T);
	EmitNullInitializationToLValue(CGF.MakeAddrLValue(Slot.getAddress(), T));
	}

	void AggExprEmitter::VisitImplicitValueInitExpr(ImplicitValueInitExpr *E) {
	QualType T = E->getType();
	AggValueSlot Slot = EnsureSlot(T);
	EmitNullInitializationToLValue(CGF.MakeAddrLValue(Slot.getAddress(), T));
	}

	/// Determine whether the given cast kind is known to always convert values
	/// with all zero bits in their value representation to values with all zero
	/// bits in their value representation.
	static bool castPreservesZero(const CastExpr *CE) {
	switch (CE->getCastKind()) {
	// No-ops.
	case CK_NoOp:
	case CK_UserDefinedConversion:
	case CK_ConstructorConversion:
	case CK_BitCast:
	case CK_ToUnion:
	case CK_ToVoid:
	// Conversions between (possibly-complex) integral, (possibly-complex)
	// floating-point, and bool.
	case CK_BooleanToSignedIntegral:
	case CK_FloatingCast:
	case CK_FloatingComplexCast:
	case CK_FloatingComplexToBoolean:
	case CK_FloatingComplexToIntegralComplex:
	case CK_FloatingComplexToReal:
	case CK_FloatingRealToComplex:
	case CK_FloatingToBoolean:
	case CK_FloatingToIntegral:
	case CK_IntegralCast:
	case CK_IntegralComplexCast:
	case CK_IntegralComplexToBoolean:
	case CK_IntegralComplexToFloatingComplex:
	case CK_IntegralComplexToReal:
	case CK_IntegralRealToComplex:
	case CK_IntegralToBoolean:
	case CK_IntegralToFloating:
	// Reinterpreting integers as pointers and vice versa.
	case CK_IntegralToPointer:
	case CK_PointerToIntegral:
	// Language extensions.
	case CK_VectorSplat:
	case CK_MatrixCast:
	case CK_NonAtomicToAtomic:
	case CK_AtomicToNonAtomic:
	case CK_HLSLVectorTruncation:
	return true;

	case CK_BaseToDerivedMemberPointer:
	case CK_DerivedToBaseMemberPointer:
	case CK_MemberPointerToBoolean:
	case CK_NullToMemberPointer:
	case CK_ReinterpretMemberPointer:
	// FIXME: ABI-dependent.
	return false;

	case CK_AnyPointerToBlockPointerCast:
	case CK_BlockPointerToObjCPointerCast:
	case CK_CPointerToObjCPointerCast:
	case CK_ObjCObjectLValueCast:
	case CK_IntToOCLSampler:
	case CK_ZeroToOCLOpaqueType:
	// FIXME: Check these.
	return false;

	case CK_FixedPointCast:
	case CK_FixedPointToBoolean:
	case CK_FixedPointToFloating:
	case CK_FixedPointToIntegral:
	case CK_FloatingToFixedPoint:
	case CK_IntegralToFixedPoint:
	// FIXME: Do all fixed-point types represent zero as all 0 bits?
	return false;

	case CK_AddressSpaceConversion:
	case CK_BaseToDerived:
	case CK_DerivedToBase:
	case CK_Dynamic:
	case CK_NullToPointer:
	case CK_PointerToBoolean:
	// FIXME: Preserves zeroes only if zero pointers and null pointers have the
	// same representation in all involved address spaces.
	return false;

	case CK_ARCConsumeObject:
	case CK_ARCExtendBlockObject:
	case CK_ARCProduceObject:
	case CK_ARCReclaimReturnedObject:
	case CK_CopyAndAutoreleaseBlockObject:
	case CK_ArrayToPointerDecay:
	case CK_FunctionToPointerDecay:
	case CK_BuiltinFnToFnPtr:
	case CK_Dependent:
	case CK_LValueBitCast:
	case CK_LValueToRValue:
	case CK_LValueToRValueBitCast:
	case CK_UncheckedDerivedToBase:
	case CK_HLSLArrayRValue:
	return false;
	}
	llvm_unreachable("Unhandled clang::CastKind enum");
	}

	/// isSimpleZero - If emitting this value will obviously just cause a store of
	/// zero to memory, return true. This can return false if uncertain, so it just
	/// handles simple cases.
	static bool isSimpleZero(const Expr *E, CodeGenFunction &CGF) {
	E = E->IgnoreParens();
	while (auto *CE = dyn_cast<CastExpr>(E)) {
	if (!castPreservesZero(CE))
	break;
	E = CE->getSubExpr()->IgnoreParens();
	}

	// 0
	if (const IntegerLiteral *IL = dyn_cast<IntegerLiteral>(E))
	return IL->getValue() == 0;
	// +0.0
	if (const FloatingLiteral *FL = dyn_cast<FloatingLiteral>(E))
	return FL->getValue().isPosZero();
	// int()
	if ((isa<ImplicitValueInitExpr>(E) \|\| isa<CXXScalarValueInitExpr>(E)) &&
	CGF.getTypes().isZeroInitializable(E->getType()))
	return true;
	// (int*)0 - Null pointer expressions.
	if (const CastExpr *ICE = dyn_cast<CastExpr>(E))
	return ICE->getCastKind() == CK_NullToPointer &&
	CGF.getTypes().isPointerZeroInitializable(E->getType()) &&
	!E->HasSideEffects(CGF.getContext());
	// '\0'
	if (const CharacterLiteral *CL = dyn_cast<CharacterLiteral>(E))
	return CL->getValue() == 0;

	// Otherwise, hard case: conservatively return false.
	return false;
	}


	void
	AggExprEmitter::EmitInitializationToLValue(Expr *E, LValue LV) {
	QualType type = LV.getType();
	// FIXME: Ignore result?
	// FIXME: Are initializers affected by volatile?
	if (Dest.isZeroed() && isSimpleZero(E, CGF)) {
	// Storing "i32 0" to a zero'd memory location is a noop.
	return;
	} else if (isa<ImplicitValueInitExpr>(E) \|\| isa<CXXScalarValueInitExpr>(E)) {
	return EmitNullInitializationToLValue(LV);
	} else if (isa<NoInitExpr>(E)) {
	// Do nothing.
	return;
	} else if (type->isReferenceType()) {
	RValue RV = CGF.EmitReferenceBindingToExpr(E);
	return CGF.EmitStoreThroughLValue(RV, LV);
	}

	switch (CGF.getEvaluationKind(type)) {
	case TEK_Complex:
	CGF.EmitComplexExprIntoLValue(E, LV, /isInit/ true);
	return;
	case TEK_Aggregate:
	CGF.EmitAggExpr(
	E, AggValueSlot::forLValue(LV, AggValueSlot::IsDestructed,
	AggValueSlot::DoesNotNeedGCBarriers,
	AggValueSlot::IsNotAliased,
	AggValueSlot::MayOverlap, Dest.isZeroed()));
	return;
	case TEK_Scalar:
	if (LV.isSimple()) {
	CGF.EmitScalarInit(E, /D=/nullptr, LV, /Captured=/false);
	} else {
	CGF.EmitStoreThroughLValue(RValue::get(CGF.EmitScalarExpr(E)), LV);
	}
	return;
	}
	llvm_unreachable("bad evaluation kind");
	}

	void AggExprEmitter::EmitNullInitializationToLValue(LValue lv) {
	QualType type = lv.getType();

	// If the destination slot is already zeroed out before the aggregate is
	// copied into it, we don't have to emit any zeros here.
	if (Dest.isZeroed() && CGF.getTypes().isZeroInitializable(type))
	return;

	if (CGF.hasScalarEvaluationKind(type)) {
	// For non-aggregates, we can store the appropriate null constant.
	llvm::Value *null = CGF.CGM.EmitNullConstant(type);
	// Note that the following is not equivalent to
	// EmitStoreThroughBitfieldLValue for ARC types.
	if (lv.isBitField()) {
	CGF.EmitStoreThroughBitfieldLValue(RValue::get(null), lv);
	} else {
	assert(lv.isSimple());
	CGF.EmitStoreOfScalar(null, lv, /* isInitialization */ true);
	}
	} else {
	// There's a potential optimization opportunity in combining
	// memsets; that would be easy for arrays, but relatively
	// difficult for structures with the current code.
	CGF.EmitNullInitialization(lv.getAddress(), lv.getType());
	}
	}

	void AggExprEmitter::VisitCXXParenListInitExpr(CXXParenListInitExpr *E) {
	VisitCXXParenListOrInitListExpr(E, E->getInitExprs(),
	E->getInitializedFieldInUnion(),
	E->getArrayFiller());
	}

	void AggExprEmitter::VisitInitListExpr(InitListExpr *E) {
	if (E->hadArrayRangeDesignator())
	CGF.ErrorUnsupported(E, "GNU array range designator extension");

	if (E->isTransparent())
	return Visit(E->getInit(0));

	VisitCXXParenListOrInitListExpr(
	E, E->inits(), E->getInitializedFieldInUnion(), E->getArrayFiller());
	}

	void AggExprEmitter::VisitCXXParenListOrInitListExpr(
	Expr ExprToVisit, ArrayRef<Expr > InitExprs,
	FieldDecl InitializedFieldInUnion, Expr ArrayFiller) {
	#if 0
	// FIXME: Assess perf here? Figure out what cases are worth optimizing here
	// (Length of globals? Chunks of zeroed-out space?).
	//
	// If we can, prefer a copy from a global; this is a lot less code for long
	// globals, and it's easier for the current optimizers to analyze.
	if (llvm::Constant *C =
	CGF.CGM.EmitConstantExpr(ExprToVisit, ExprToVisit->getType(), &CGF)) {
	llvm::GlobalVariable* GV =
	new llvm::GlobalVariable(CGF.CGM.getModule(), C->getType(), true,
	llvm::GlobalValue::InternalLinkage, C, "");
	EmitFinalDestCopy(ExprToVisit->getType(),
	CGF.MakeAddrLValue(GV, ExprToVisit->getType()));
	return;
	}
	#endif

	AggValueSlot Dest = EnsureSlot(ExprToVisit->getType());

	LValue DestLV = CGF.MakeAddrLValue(Dest.getAddress(), ExprToVisit->getType());

	// Handle initialization of an array.
	if (ExprToVisit->getType()->isConstantArrayType()) {
	auto AType = cast<llvm::ArrayType>(Dest.getAddress().getElementType());
	EmitArrayInit(Dest.getAddress(), AType, ExprToVisit->getType(), ExprToVisit,
	InitExprs, ArrayFiller);
	return;
	} else if (ExprToVisit->getType()->isVariableArrayType()) {
	// A variable array type that has an initializer can only do empty
	// initialization. And because this feature is not exposed as an extension
	// in C++, we can safely memset the array memory to zero.
	assert(InitExprs.size() == 0 &&
	"you can only use an empty initializer with VLAs");
	CGF.EmitNullInitialization(Dest.getAddress(), ExprToVisit->getType());
	return;
	}

	assert(ExprToVisit->getType()->isRecordType() &&
	"Only support structs/unions here!");

	// Do struct initialization; this code just sets each individual member
	// to the approprate value. This makes bitfield support automatic;
	// the disadvantage is that the generated code is more difficult for
	// the optimizer, especially with bitfields.
	unsigned NumInitElements = InitExprs.size();
	RecordDecl *record = ExprToVisit->getType()->castAs<RecordType>()->getDecl();

	// We'll need to enter cleanup scopes in case any of the element
	// initializers throws an exception.
	SmallVector<EHScopeStack::stable_iterator, 16> cleanups;
	CodeGenFunction::CleanupDeactivationScope DeactivateCleanups(CGF);

	unsigned curInitIndex = 0;

	// Emit initialization of base classes.
	if (auto *CXXRD = dyn_cast<CXXRecordDecl>(record)) {
	assert(NumInitElements >= CXXRD->getNumBases() &&
	"missing initializer for base class");
	for (auto &Base : CXXRD->bases()) {
	assert(!Base.isVirtual() && "should not see vbases here");
	auto *BaseRD = Base.getType()->getAsCXXRecordDecl();
	Address V = CGF.GetAddressOfDirectBaseInCompleteClass(
	Dest.getAddress(), CXXRD, BaseRD,
	/isBaseVirtual/ false);
	AggValueSlot AggSlot = AggValueSlot::forAddr(
	V, Qualifiers(),
	AggValueSlot::IsDestructed,
	AggValueSlot::DoesNotNeedGCBarriers,
	AggValueSlot::IsNotAliased,
	CGF.getOverlapForBaseInit(CXXRD, BaseRD, Base.isVirtual()));
	CGF.EmitAggExpr(InitExprs[curInitIndex++], AggSlot);

	if (QualType::DestructionKind dtorKind =
	Base.getType().isDestructedType())
	CGF.pushDestroyAndDeferDeactivation(dtorKind, V, Base.getType());
	}
	}

	// Prepare a 'this' for CXXDefaultInitExprs.
	CodeGenFunction::FieldConstructionScope FCS(CGF, Dest.getAddress());

	if (record->isUnion()) {
	// Only initialize one field of a union. The field itself is
	// specified by the initializer list.
	if (!InitializedFieldInUnion) {
	// Empty union; we have nothing to do.

	#ifndef NDEBUG
	// Make sure that it's really an empty and not a failure of
	// semantic analysis.
	for (const auto *Field : record->fields())
	assert(
	(Field->isUnnamedBitField() \|\| Field->isAnonymousStructOrUnion()) &&
	"Only unnamed bitfields or anonymous class allowed");
	#endif
	return;
	}

	// FIXME: volatility
	FieldDecl *Field = InitializedFieldInUnion;

	LValue FieldLoc = CGF.EmitLValueForFieldInitialization(DestLV, Field);
	if (NumInitElements) {
	// Store the initializer into the field
	EmitInitializationToLValue(InitExprs[0], FieldLoc);
	} else {
	// Default-initialize to null.
	EmitNullInitializationToLValue(FieldLoc);
	}

	return;
	}

	// Here we iterate over the fields; this makes it simpler to both
	// default-initialize fields and skip over unnamed fields.
	for (const auto *field : record->fields()) {
	// We're done once we hit the flexible array member.
	if (field->getType()->isIncompleteArrayType())
	break;

	// Always skip anonymous bitfields.
	if (field->isUnnamedBitField())
	continue;

	// We're done if we reach the end of the explicit initializers, we
	// have a zeroed object, and the rest of the fields are
	// zero-initializable.
	if (curInitIndex == NumInitElements && Dest.isZeroed() &&
	CGF.getTypes().isZeroInitializable(ExprToVisit->getType()))
	break;


	LValue LV = CGF.EmitLValueForFieldInitialization(DestLV, field);
	// We never generate write-barries for initialized fields.
	LV.setNonGC(true);

	if (curInitIndex < NumInitElements) {
	// Store the initializer into the field.
	EmitInitializationToLValue(InitExprs[curInitIndex++], LV);
	} else {
	// We're out of initializers; default-initialize to null
	EmitNullInitializationToLValue(LV);
	}

	// Push a destructor if necessary.
	// FIXME: if we have an array of structures, all explicitly
	// initialized, we can end up pushing a linear number of cleanups.
	if (QualType::DestructionKind dtorKind
	= field->getType().isDestructedType()) {
	assert(LV.isSimple());
	if (dtorKind) {
	CGF.pushDestroyAndDeferDeactivation(NormalAndEHCleanup, LV.getAddress(),
	field->getType(),
	CGF.getDestroyer(dtorKind), false);
	}
	}
	}
	}

	void AggExprEmitter::VisitArrayInitLoopExpr(const ArrayInitLoopExpr *E,
	llvm::Value *outerBegin) {
	// Emit the common subexpression.
	CodeGenFunction::OpaqueValueMapping binding(CGF, E->getCommonExpr());

	Address destPtr = EnsureSlot(E->getType()).getAddress();
	uint64_t numElements = E->getArraySize().getZExtValue();

	if (!numElements)
	return;

	// destPtr is an array. Construct an elementType by drilling down a level.
	llvm::Value *zero = llvm::ConstantInt::get(CGF.SizeTy, 0);
	llvm::Value *indices[] = {zero, zero};
	llvm::Value *begin = Builder.CreateInBoundsGEP(destPtr.getElementType(),
	destPtr.emitRawPointer(CGF),
	indices, "arrayinit.begin");

	// Prepare to special-case multidimensional array initialization: we avoid
	// emitting multiple destructor loops in that case.
	if (!outerBegin)
	outerBegin = begin;
	ArrayInitLoopExpr *InnerLoop = dyn_cast<ArrayInitLoopExpr>(E->getSubExpr());

	QualType elementType =
	CGF.getContext().getAsArrayType(E->getType())->getElementType();
	CharUnits elementSize = CGF.getContext().getTypeSizeInChars(elementType);
	CharUnits elementAlign =
	destPtr.getAlignment().alignmentOfArrayElement(elementSize);
	llvm::Type *llvmElementType = CGF.ConvertTypeForMem(elementType);

	llvm::BasicBlock *entryBB = Builder.GetInsertBlock();
	llvm::BasicBlock *bodyBB = CGF.createBasicBlock("arrayinit.body");

	// Jump into the body.
	CGF.EmitBlock(bodyBB);
	llvm::PHINode *index =
	Builder.CreatePHI(zero->getType(), 2, "arrayinit.index");
	index->addIncoming(zero, entryBB);
	llvm::Value *element =
	Builder.CreateInBoundsGEP(llvmElementType, begin, index);

	// Prepare for a cleanup.
	QualType::DestructionKind dtorKind = elementType.isDestructedType();
	EHScopeStack::stable_iterator cleanup;
	if (CGF.needsEHCleanup(dtorKind) && !InnerLoop) {
	if (outerBegin->getType() != element->getType())
	outerBegin = Builder.CreateBitCast(outerBegin, element->getType());
	CGF.pushRegularPartialArrayCleanup(outerBegin, element, elementType,
	elementAlign,
	CGF.getDestroyer(dtorKind));
	cleanup = CGF.EHStack.stable_begin();
	} else {
	dtorKind = QualType::DK_none;
	}

	// Emit the actual filler expression.
	{
	// Temporaries created in an array initialization loop are destroyed
	// at the end of each iteration.
	CodeGenFunction::RunCleanupsScope CleanupsScope(CGF);
	CodeGenFunction::ArrayInitLoopExprScope Scope(CGF, index);
	LValue elementLV = CGF.MakeAddrLValue(
	Address(element, llvmElementType, elementAlign), elementType);

	if (InnerLoop) {
	// If the subexpression is an ArrayInitLoopExpr, share its cleanup.
	auto elementSlot = AggValueSlot::forLValue(
	elementLV, AggValueSlot::IsDestructed,
	AggValueSlot::DoesNotNeedGCBarriers, AggValueSlot::IsNotAliased,
	AggValueSlot::DoesNotOverlap);
	AggExprEmitter(CGF, elementSlot, false)
	.VisitArrayInitLoopExpr(InnerLoop, outerBegin);
	} else
	EmitInitializationToLValue(E->getSubExpr(), elementLV);
	}

	// Move on to the next element.
	llvm::Value *nextIndex = Builder.CreateNUWAdd(
	index, llvm::ConstantInt::get(CGF.SizeTy, 1), "arrayinit.next");
	index->addIncoming(nextIndex, Builder.GetInsertBlock());

	// Leave the loop if we're done.
	llvm::Value *done = Builder.CreateICmpEQ(
	nextIndex, llvm::ConstantInt::get(CGF.SizeTy, numElements),
	"arrayinit.done");
	llvm::BasicBlock *endBB = CGF.createBasicBlock("arrayinit.end");
	Builder.CreateCondBr(done, endBB, bodyBB);

	CGF.EmitBlock(endBB);

	// Leave the partial-array cleanup if we entered one.
	if (dtorKind)
	CGF.DeactivateCleanupBlock(cleanup, index);
	}

	void AggExprEmitter::VisitDesignatedInitUpdateExpr(DesignatedInitUpdateExpr *E) {
	AggValueSlot Dest = EnsureSlot(E->getType());

	LValue DestLV = CGF.MakeAddrLValue(Dest.getAddress(), E->getType());
	EmitInitializationToLValue(E->getBase(), DestLV);
	VisitInitListExpr(E->getUpdater());
	}

	//===----------------------------------------------------------------------===//
	// Entry Points into this File
	//===----------------------------------------------------------------------===//

	/// GetNumNonZeroBytesInInit - Get an approximate count of the number of
	/// non-zero bytes that will be stored when outputting the initializer for the
	/// specified initializer expression.
	static CharUnits GetNumNonZeroBytesInInit(const Expr *E, CodeGenFunction &CGF) {
	if (auto *MTE = dyn_cast<MaterializeTemporaryExpr>(E))
	E = MTE->getSubExpr();
	E = E->IgnoreParenNoopCasts(CGF.getContext());

	// 0 and 0.0 won't require any non-zero stores!
	if (isSimpleZero(E, CGF)) return CharUnits::Zero();

	// If this is an initlist expr, sum up the size of sizes of the (present)
	// elements. If this is something weird, assume the whole thing is non-zero.
	const InitListExpr *ILE = dyn_cast<InitListExpr>(E);
	while (ILE && ILE->isTransparent())
	ILE = dyn_cast<InitListExpr>(ILE->getInit(0));
	if (!ILE \|\| !CGF.getTypes().isZeroInitializable(ILE->getType()))
	return CGF.getContext().getTypeSizeInChars(E->getType());

	// InitListExprs for structs have to be handled carefully. If there are
	// reference members, we need to consider the size of the reference, not the
	// referencee. InitListExprs for unions and arrays can't have references.
	if (const RecordType *RT = E->getType()->getAs<RecordType>()) {
	if (!RT->isUnionType()) {
	RecordDecl *SD = RT->getDecl();
	CharUnits NumNonZeroBytes = CharUnits::Zero();

	unsigned ILEElement = 0;
	if (auto *CXXRD = dyn_cast<CXXRecordDecl>(SD))
	while (ILEElement != CXXRD->getNumBases())
	NumNonZeroBytes +=
	GetNumNonZeroBytesInInit(ILE->getInit(ILEElement++), CGF);
	for (const auto *Field : SD->fields()) {
	// We're done once we hit the flexible array member or run out of
	// InitListExpr elements.
	if (Field->getType()->isIncompleteArrayType() \|\|
	ILEElement == ILE->getNumInits())
	break;
	if (Field->isUnnamedBitField())
	continue;

	const Expr *E = ILE->getInit(ILEElement++);

	// Reference values are always non-null and have the width of a pointer.
	if (Field->getType()->isReferenceType())
	NumNonZeroBytes += CGF.getContext().toCharUnitsFromBits(
	CGF.getTarget().getPointerWidth(LangAS::Default));
	else
	NumNonZeroBytes += GetNumNonZeroBytesInInit(E, CGF);
	}

	return NumNonZeroBytes;
	}
	}

	// FIXME: This overestimates the number of non-zero bytes for bit-fields.
	CharUnits NumNonZeroBytes = CharUnits::Zero();
	for (unsigned i = 0, e = ILE->getNumInits(); i != e; ++i)
	NumNonZeroBytes += GetNumNonZeroBytesInInit(ILE->getInit(i), CGF);
	return NumNonZeroBytes;
	}

	/// CheckAggExprForMemSetUse - If the initializer is large and has a lot of
	/// zeros in it, emit a memset and avoid storing the individual zeros.
	///
	static void CheckAggExprForMemSetUse(AggValueSlot &Slot, const Expr *E,
	CodeGenFunction &CGF) {
	// If the slot is already known to be zeroed, nothing to do. Don't mess with
	// volatile stores.
	if (Slot.isZeroed() \|\| Slot.isVolatile() \|\| !Slot.getAddress().isValid())
	return;

	// C++ objects with a user-declared constructor don't need zero'ing.
	if (CGF.getLangOpts().CPlusPlus)
	if (const RecordType *RT = CGF.getContext()
	.getBaseElementType(E->getType())->getAs<RecordType>()) {
	const CXXRecordDecl *RD = cast<CXXRecordDecl>(RT->getDecl());
	if (RD->hasUserDeclaredConstructor())
	return;
	}

	// If the type is 16-bytes or smaller, prefer individual stores over memset.
	CharUnits Size = Slot.getPreferredSize(CGF.getContext(), E->getType());
	if (Size <= CharUnits::fromQuantity(16))
	return;

	// Check to see if over 3/4 of the initializer are known to be zero. If so,
	// we prefer to emit memset + individual stores for the rest.
	CharUnits NumNonZeroBytes = GetNumNonZeroBytesInInit(E, CGF);
	if (NumNonZeroBytes*4 > Size)
	return;

	// Okay, it seems like a good idea to use an initial memset, emit the call.
	llvm::Constant *SizeVal = CGF.Builder.getInt64(Size.getQuantity());

	Address Loc = Slot.getAddress().withElementType(CGF.Int8Ty);
	CGF.Builder.CreateMemSet(Loc, CGF.Builder.getInt8(0), SizeVal, false);

	// Tell the AggExprEmitter that the slot is known zero.
	Slot.setZeroed();
	}




	/// EmitAggExpr - Emit the computation of the specified expression of aggregate
	/// type. The result is computed into DestPtr. Note that if DestPtr is null,
	/// the value of the aggregate expression is not needed. If VolatileDest is
	/// true, DestPtr cannot be 0.
	void CodeGenFunction::EmitAggExpr(const Expr *E, AggValueSlot Slot) {
	assert(E && hasAggregateEvaluationKind(E->getType()) &&
	"Invalid aggregate expression to emit");
	assert((Slot.getAddress().isValid() \|\| Slot.isIgnored()) &&
	"slot has bits but no address");

	// Optimize the slot if possible.
	CheckAggExprForMemSetUse(Slot, E, *this);

	AggExprEmitter(this, Slot, Slot.isIgnored()).Visit(const_cast<Expr>(E));
	}

	LValue CodeGenFunction::EmitAggExprToLValue(const Expr *E) {
	assert(hasAggregateEvaluationKind(E->getType()) && "Invalid argument!");
	Address Temp = CreateMemTemp(E->getType());
	LValue LV = MakeAddrLValue(Temp, E->getType());
	EmitAggExpr(E, AggValueSlot::forLValue(LV, AggValueSlot::IsNotDestructed,
	AggValueSlot::DoesNotNeedGCBarriers,
	AggValueSlot::IsNotAliased,
	AggValueSlot::DoesNotOverlap));
	return LV;
	}

	void CodeGenFunction::EmitAggFinalDestCopy(QualType Type, AggValueSlot Dest,
	const LValue &Src,
	ExprValueKind SrcKind) {
	return AggExprEmitter(*this, Dest, Dest.isIgnored())
	.EmitFinalDestCopy(Type, Src, SrcKind);
	}

	AggValueSlot::Overlap_t
	CodeGenFunction::getOverlapForFieldInit(const FieldDecl *FD) {
	if (!FD->hasAttr<NoUniqueAddressAttr>() \|\| !FD->getType()->isRecordType())
	return AggValueSlot::DoesNotOverlap;

	+ // Empty fields can overlap earlier fields.
	+ if (FD->getType()->getAsCXXRecordDecl()->isEmpty())
	+ return AggValueSlot::MayOverlap;
	+
	// If the field lies entirely within the enclosing class's nvsize, its tail
	// padding cannot overlap any already-initialized object. (The only subobjects
	// with greater addresses that might already be initialized are vbases.)
	const RecordDecl *ClassRD = FD->getParent();
	const ASTRecordLayout &Layout = getContext().getASTRecordLayout(ClassRD);
	if (Layout.getFieldOffset(FD->getFieldIndex()) +
	getContext().getTypeSize(FD->getType()) <=
	(uint64_t)getContext().toBits(Layout.getNonVirtualSize()))
	return AggValueSlot::DoesNotOverlap;

	// The tail padding may contain values we need to preserve.
	return AggValueSlot::MayOverlap;
	}

	AggValueSlot::Overlap_t CodeGenFunction::getOverlapForBaseInit(
	const CXXRecordDecl RD, const CXXRecordDecl BaseRD, bool IsVirtual) {
	// If the most-derived object is a field declared with [[no_unique_address]],
	// the tail padding of any virtual base could be reused for other subobjects
	// of that field's class.
	if (IsVirtual)
	return AggValueSlot::MayOverlap;

	+ // Empty bases can overlap earlier bases.
	+ if (BaseRD->isEmpty())
	+ return AggValueSlot::MayOverlap;
	+
	// If the base class is laid out entirely within the nvsize of the derived
	// class, its tail padding cannot yet be initialized, so we can issue
	// stores at the full width of the base class.
	const ASTRecordLayout &Layout = getContext().getASTRecordLayout(RD);
	if (Layout.getBaseClassOffset(BaseRD) +
	getContext().getASTRecordLayout(BaseRD).getSize() <=
	Layout.getNonVirtualSize())
	return AggValueSlot::DoesNotOverlap;

	// The tail padding may contain values we need to preserve.
	return AggValueSlot::MayOverlap;
	}

	void CodeGenFunction::EmitAggregateCopy(LValue Dest, LValue Src, QualType Ty,
	AggValueSlot::Overlap_t MayOverlap,
	bool isVolatile) {
	assert(!Ty->isAnyComplexType() && "Shouldn't happen for complex");

	Address DestPtr = Dest.getAddress();
	Address SrcPtr = Src.getAddress();

	if (getLangOpts().CPlusPlus) {
	if (const RecordType *RT = Ty->getAs<RecordType>()) {
	CXXRecordDecl *Record = cast<CXXRecordDecl>(RT->getDecl());
	assert((Record->hasTrivialCopyConstructor() \|\|
	Record->hasTrivialCopyAssignment() \|\|
	Record->hasTrivialMoveConstructor() \|\|
	Record->hasTrivialMoveAssignment() \|\|
	Record->hasAttr<TrivialABIAttr>() \|\| Record->isUnion()) &&
	"Trying to aggregate-copy a type without a trivial copy/move "
	"constructor or assignment operator");
	// Ignore empty classes in C++.
	if (Record->isEmpty())
	return;
	}
	}

	if (getLangOpts().CUDAIsDevice) {
	if (Ty->isCUDADeviceBuiltinSurfaceType()) {
	if (getTargetHooks().emitCUDADeviceBuiltinSurfaceDeviceCopy(*this, Dest,
	Src))
	return;
	} else if (Ty->isCUDADeviceBuiltinTextureType()) {
	if (getTargetHooks().emitCUDADeviceBuiltinTextureDeviceCopy(*this, Dest,
	Src))
	return;
	}
	}

	// Aggregate assignment turns into llvm.memcpy. This is almost valid per
	// C99 6.5.16.1p3, which states "If the value being stored in an object is
	// read from another object that overlaps in anyway the storage of the first
	// object, then the overlap shall be exact and the two objects shall have
	// qualified or unqualified versions of a compatible type."
	//
	// memcpy is not defined if the source and destination pointers are exactly
	// equal, but other compilers do this optimization, and almost every memcpy
	// implementation handles this case safely. If there is a libc that does not
	// safely handle this, we can add a target hook.

	// Get data size info for this aggregate. Don't copy the tail padding if this
	// might be a potentially-overlapping subobject, since the tail padding might
	// be occupied by a different object. Otherwise, copying it is fine.
	TypeInfoChars TypeInfo;
	if (MayOverlap)
	TypeInfo = getContext().getTypeInfoDataSizeInChars(Ty);
	else
	TypeInfo = getContext().getTypeInfoInChars(Ty);

	llvm::Value *SizeVal = nullptr;
	if (TypeInfo.Width.isZero()) {
	// But note that getTypeInfo returns 0 for a VLA.
	if (auto *VAT = dyn_cast_or_null<VariableArrayType>(
	getContext().getAsArrayType(Ty))) {
	QualType BaseEltTy;
	SizeVal = emitArrayLength(VAT, BaseEltTy, DestPtr);
	TypeInfo = getContext().getTypeInfoInChars(BaseEltTy);
	assert(!TypeInfo.Width.isZero());
	SizeVal = Builder.CreateNUWMul(
	SizeVal,
	llvm::ConstantInt::get(SizeTy, TypeInfo.Width.getQuantity()));
	}
	}
	if (!SizeVal) {
	SizeVal = llvm::ConstantInt::get(SizeTy, TypeInfo.Width.getQuantity());
	}

	// FIXME: If we have a volatile struct, the optimizer can remove what might
	// appear to be `extra' memory ops:
	//
	// volatile struct { int i; } a, b;
	//
	// int main() {
	// a = b;
	// a = b;
	// }
	//
	// we need to use a different call here. We use isVolatile to indicate when
	// either the source or the destination is volatile.

	DestPtr = DestPtr.withElementType(Int8Ty);
	SrcPtr = SrcPtr.withElementType(Int8Ty);

	// Don't do any of the memmove_collectable tests if GC isn't set.
	if (CGM.getLangOpts().getGC() == LangOptions::NonGC) {
	// fall through
	} else if (const RecordType *RecordTy = Ty->getAs<RecordType>()) {
	RecordDecl *Record = RecordTy->getDecl();
	if (Record->hasObjectMember()) {
	CGM.getObjCRuntime().EmitGCMemmoveCollectable(*this, DestPtr, SrcPtr,
	SizeVal);
	return;
	}
	} else if (Ty->isArrayType()) {
	QualType BaseType = getContext().getBaseElementType(Ty);
	if (const RecordType *RecordTy = BaseType->getAs<RecordType>()) {
	if (RecordTy->getDecl()->hasObjectMember()) {
	CGM.getObjCRuntime().EmitGCMemmoveCollectable(*this, DestPtr, SrcPtr,
	SizeVal);
	return;
	}
	}
	}

	auto Inst = Builder.CreateMemCpy(DestPtr, SrcPtr, SizeVal, isVolatile);

	// Determine the metadata to describe the position of any padding in this
	// memcpy, as well as the TBAA tags for the members of the struct, in case
	// the optimizer wishes to expand it in to scalar memory operations.
	if (llvm::MDNode *TBAAStructTag = CGM.getTBAAStructInfo(Ty))
	Inst->setMetadata(llvm::LLVMContext::MD_tbaa_struct, TBAAStructTag);

	if (CGM.getCodeGenOpts().NewStructPathTBAA) {
	TBAAAccessInfo TBAAInfo = CGM.mergeTBAAInfoForMemoryTransfer(
	Dest.getTBAAInfo(), Src.getTBAAInfo());
	CGM.DecorateInstructionWithTBAA(Inst, TBAAInfo);
	}
	}
	diff --git a/contrib/llvm-project/clang/lib/CodeGen/CGStmt.cpp b/contrib/llvm-project/clang/lib/CodeGen/CGStmt.cpp
	index aa97f685ac7a..2f466602d2f6 100644
	--- a/contrib/llvm-project/clang/lib/CodeGen/CGStmt.cpp
	+++ b/contrib/llvm-project/clang/lib/CodeGen/CGStmt.cpp
	@@ -1,3268 +1,3268 @@
	//===--- CGStmt.cpp - Emit LLVM Code from Statements ----------------------===//
	//
	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
	// See https://llvm.org/LICENSE.txt for license information.
	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
	//
	//===----------------------------------------------------------------------===//
	//
	// This contains code to emit Stmt nodes as LLVM code.
	//
	//===----------------------------------------------------------------------===//

	#include "CGDebugInfo.h"
	#include "CGOpenMPRuntime.h"
	#include "CodeGenFunction.h"
	#include "CodeGenModule.h"
	#include "TargetInfo.h"
	#include "clang/AST/Attr.h"
	#include "clang/AST/Expr.h"
	#include "clang/AST/Stmt.h"
	#include "clang/AST/StmtVisitor.h"
	#include "clang/Basic/Builtins.h"
	#include "clang/Basic/DiagnosticSema.h"
	#include "clang/Basic/PrettyStackTrace.h"
	#include "clang/Basic/SourceManager.h"
	#include "clang/Basic/TargetInfo.h"
	#include "llvm/ADT/ArrayRef.h"
	#include "llvm/ADT/DenseMap.h"
	#include "llvm/ADT/SmallSet.h"
	#include "llvm/ADT/StringExtras.h"
	#include "llvm/IR/Assumptions.h"
	#include "llvm/IR/DataLayout.h"
	#include "llvm/IR/InlineAsm.h"
	#include "llvm/IR/Intrinsics.h"
	#include "llvm/IR/MDBuilder.h"
	#include "llvm/Support/SaveAndRestore.h"
	#include <optional>

	using namespace clang;
	using namespace CodeGen;

	//===----------------------------------------------------------------------===//
	// Statement Emission
	//===----------------------------------------------------------------------===//

	namespace llvm {
	extern cl::opt<bool> EnableSingleByteCoverage;
	} // namespace llvm

	void CodeGenFunction::EmitStopPoint(const Stmt *S) {
	if (CGDebugInfo *DI = getDebugInfo()) {
	SourceLocation Loc;
	Loc = S->getBeginLoc();
	DI->EmitLocation(Builder, Loc);

	LastStopPoint = Loc;
	}
	}

	void CodeGenFunction::EmitStmt(const Stmt S, ArrayRef<const Attr > Attrs) {
	assert(S && "Null statement?");
	PGO.setCurrentStmt(S);

	// These statements have their own debug info handling.
	if (EmitSimpleStmt(S, Attrs))
	return;

	// Check if we are generating unreachable code.
	if (!HaveInsertPoint()) {
	// If so, and the statement doesn't contain a label, then we do not need to
	// generate actual code. This is safe because (1) the current point is
	// unreachable, so we don't need to execute the code, and (2) we've already
	// handled the statements which update internal data structures (like the
	// local variable map) which could be used by subsequent statements.
	if (!ContainsLabel(S)) {
	// Verify that any decl statements were handled as simple, they may be in
	// scope of subsequent reachable statements.
	assert(!isa<DeclStmt>(*S) && "Unexpected DeclStmt!");
	return;
	}

	// Otherwise, make a new block to hold the code.
	EnsureInsertPoint();
	}

	// Generate a stoppoint if we are emitting debug info.
	EmitStopPoint(S);

	// Ignore all OpenMP directives except for simd if OpenMP with Simd is
	// enabled.
	if (getLangOpts().OpenMP && getLangOpts().OpenMPSimd) {
	if (const auto *D = dyn_cast<OMPExecutableDirective>(S)) {
	EmitSimpleOMPExecutableDirective(*D);
	return;
	}
	}

	switch (S->getStmtClass()) {
	case Stmt::NoStmtClass:
	case Stmt::CXXCatchStmtClass:
	case Stmt::SEHExceptStmtClass:
	case Stmt::SEHFinallyStmtClass:
	case Stmt::MSDependentExistsStmtClass:
	llvm_unreachable("invalid statement class to emit generically");
	case Stmt::NullStmtClass:
	case Stmt::CompoundStmtClass:
	case Stmt::DeclStmtClass:
	case Stmt::LabelStmtClass:
	case Stmt::AttributedStmtClass:
	case Stmt::GotoStmtClass:
	case Stmt::BreakStmtClass:
	case Stmt::ContinueStmtClass:
	case Stmt::DefaultStmtClass:
	case Stmt::CaseStmtClass:
	case Stmt::SEHLeaveStmtClass:
	llvm_unreachable("should have emitted these statements as simple");

	#define STMT(Type, Base)
	#define ABSTRACT_STMT(Op)
	#define EXPR(Type, Base) \
	case Stmt::Type##Class:
	#include "clang/AST/StmtNodes.inc"
	{
	// Remember the block we came in on.
	llvm::BasicBlock *incoming = Builder.GetInsertBlock();
	assert(incoming && "expression emission must have an insertion point");

	EmitIgnoredExpr(cast<Expr>(S));

	llvm::BasicBlock *outgoing = Builder.GetInsertBlock();
	assert(outgoing && "expression emission cleared block!");

	// The expression emitters assume (reasonably!) that the insertion
	// point is always set. To maintain that, the call-emission code
	// for noreturn functions has to enter a new block with no
	// predecessors. We want to kill that block and mark the current
	// insertion point unreachable in the common case of a call like
	// "exit();". Since expression emission doesn't otherwise create
	// blocks with no predecessors, we can just test for that.
	// However, we must be careful not to do this to our incoming
	// block, because statement emission does sometimes create
	// reachable blocks which will have no predecessors until later in
	// the function. This occurs with, e.g., labels that are not
	// reachable by fallthrough.
	if (incoming != outgoing && outgoing->use_empty()) {
	outgoing->eraseFromParent();
	Builder.ClearInsertionPoint();
	}
	break;
	}

	case Stmt::IndirectGotoStmtClass:
	EmitIndirectGotoStmt(cast<IndirectGotoStmt>(*S)); break;

	case Stmt::IfStmtClass: EmitIfStmt(cast<IfStmt>(*S)); break;
	case Stmt::WhileStmtClass: EmitWhileStmt(cast<WhileStmt>(*S), Attrs); break;
	case Stmt::DoStmtClass: EmitDoStmt(cast<DoStmt>(*S), Attrs); break;
	case Stmt::ForStmtClass: EmitForStmt(cast<ForStmt>(*S), Attrs); break;

	case Stmt::ReturnStmtClass: EmitReturnStmt(cast<ReturnStmt>(*S)); break;

	case Stmt::SwitchStmtClass: EmitSwitchStmt(cast<SwitchStmt>(*S)); break;
	case Stmt::GCCAsmStmtClass: // Intentional fall-through.
	case Stmt::MSAsmStmtClass: EmitAsmStmt(cast<AsmStmt>(*S)); break;
	case Stmt::CoroutineBodyStmtClass:
	EmitCoroutineBody(cast<CoroutineBodyStmt>(*S));
	break;
	case Stmt::CoreturnStmtClass:
	EmitCoreturnStmt(cast<CoreturnStmt>(*S));
	break;
	case Stmt::CapturedStmtClass: {
	const CapturedStmt *CS = cast<CapturedStmt>(S);
	EmitCapturedStmt(*CS, CS->getCapturedRegionKind());
	}
	break;
	case Stmt::ObjCAtTryStmtClass:
	EmitObjCAtTryStmt(cast<ObjCAtTryStmt>(*S));
	break;
	case Stmt::ObjCAtCatchStmtClass:
	llvm_unreachable(
	"@catch statements should be handled by EmitObjCAtTryStmt");
	case Stmt::ObjCAtFinallyStmtClass:
	llvm_unreachable(
	"@finally statements should be handled by EmitObjCAtTryStmt");
	case Stmt::ObjCAtThrowStmtClass:
	EmitObjCAtThrowStmt(cast<ObjCAtThrowStmt>(*S));
	break;
	case Stmt::ObjCAtSynchronizedStmtClass:
	EmitObjCAtSynchronizedStmt(cast<ObjCAtSynchronizedStmt>(*S));
	break;
	case Stmt::ObjCForCollectionStmtClass:
	EmitObjCForCollectionStmt(cast<ObjCForCollectionStmt>(*S));
	break;
	case Stmt::ObjCAutoreleasePoolStmtClass:
	EmitObjCAutoreleasePoolStmt(cast<ObjCAutoreleasePoolStmt>(*S));
	break;

	case Stmt::CXXTryStmtClass:
	EmitCXXTryStmt(cast<CXXTryStmt>(*S));
	break;
	case Stmt::CXXForRangeStmtClass:
	EmitCXXForRangeStmt(cast<CXXForRangeStmt>(*S), Attrs);
	break;
	case Stmt::SEHTryStmtClass:
	EmitSEHTryStmt(cast<SEHTryStmt>(*S));
	break;
	case Stmt::OMPMetaDirectiveClass:
	EmitOMPMetaDirective(cast<OMPMetaDirective>(*S));
	break;
	case Stmt::OMPCanonicalLoopClass:
	EmitOMPCanonicalLoop(cast<OMPCanonicalLoop>(S));
	break;
	case Stmt::OMPParallelDirectiveClass:
	EmitOMPParallelDirective(cast<OMPParallelDirective>(*S));
	break;
	case Stmt::OMPSimdDirectiveClass:
	EmitOMPSimdDirective(cast<OMPSimdDirective>(*S));
	break;
	case Stmt::OMPTileDirectiveClass:
	EmitOMPTileDirective(cast<OMPTileDirective>(*S));
	break;
	case Stmt::OMPUnrollDirectiveClass:
	EmitOMPUnrollDirective(cast<OMPUnrollDirective>(*S));
	break;
	case Stmt::OMPReverseDirectiveClass:
	EmitOMPReverseDirective(cast<OMPReverseDirective>(*S));
	break;
	case Stmt::OMPInterchangeDirectiveClass:
	EmitOMPInterchangeDirective(cast<OMPInterchangeDirective>(*S));
	break;
	case Stmt::OMPForDirectiveClass:
	EmitOMPForDirective(cast<OMPForDirective>(*S));
	break;
	case Stmt::OMPForSimdDirectiveClass:
	EmitOMPForSimdDirective(cast<OMPForSimdDirective>(*S));
	break;
	case Stmt::OMPSectionsDirectiveClass:
	EmitOMPSectionsDirective(cast<OMPSectionsDirective>(*S));
	break;
	case Stmt::OMPSectionDirectiveClass:
	EmitOMPSectionDirective(cast<OMPSectionDirective>(*S));
	break;
	case Stmt::OMPSingleDirectiveClass:
	EmitOMPSingleDirective(cast<OMPSingleDirective>(*S));
	break;
	case Stmt::OMPMasterDirectiveClass:
	EmitOMPMasterDirective(cast<OMPMasterDirective>(*S));
	break;
	case Stmt::OMPCriticalDirectiveClass:
	EmitOMPCriticalDirective(cast<OMPCriticalDirective>(*S));
	break;
	case Stmt::OMPParallelForDirectiveClass:
	EmitOMPParallelForDirective(cast<OMPParallelForDirective>(*S));
	break;
	case Stmt::OMPParallelForSimdDirectiveClass:
	EmitOMPParallelForSimdDirective(cast<OMPParallelForSimdDirective>(*S));
	break;
	case Stmt::OMPParallelMasterDirectiveClass:
	EmitOMPParallelMasterDirective(cast<OMPParallelMasterDirective>(*S));
	break;
	case Stmt::OMPParallelSectionsDirectiveClass:
	EmitOMPParallelSectionsDirective(cast<OMPParallelSectionsDirective>(*S));
	break;
	case Stmt::OMPTaskDirectiveClass:
	EmitOMPTaskDirective(cast<OMPTaskDirective>(*S));
	break;
	case Stmt::OMPTaskyieldDirectiveClass:
	EmitOMPTaskyieldDirective(cast<OMPTaskyieldDirective>(*S));
	break;
	case Stmt::OMPErrorDirectiveClass:
	EmitOMPErrorDirective(cast<OMPErrorDirective>(*S));
	break;
	case Stmt::OMPBarrierDirectiveClass:
	EmitOMPBarrierDirective(cast<OMPBarrierDirective>(*S));
	break;
	case Stmt::OMPTaskwaitDirectiveClass:
	EmitOMPTaskwaitDirective(cast<OMPTaskwaitDirective>(*S));
	break;
	case Stmt::OMPTaskgroupDirectiveClass:
	EmitOMPTaskgroupDirective(cast<OMPTaskgroupDirective>(*S));
	break;
	case Stmt::OMPFlushDirectiveClass:
	EmitOMPFlushDirective(cast<OMPFlushDirective>(*S));
	break;
	case Stmt::OMPDepobjDirectiveClass:
	EmitOMPDepobjDirective(cast<OMPDepobjDirective>(*S));
	break;
	case Stmt::OMPScanDirectiveClass:
	EmitOMPScanDirective(cast<OMPScanDirective>(*S));
	break;
	case Stmt::OMPOrderedDirectiveClass:
	EmitOMPOrderedDirective(cast<OMPOrderedDirective>(*S));
	break;
	case Stmt::OMPAtomicDirectiveClass:
	EmitOMPAtomicDirective(cast<OMPAtomicDirective>(*S));
	break;
	case Stmt::OMPTargetDirectiveClass:
	EmitOMPTargetDirective(cast<OMPTargetDirective>(*S));
	break;
	case Stmt::OMPTeamsDirectiveClass:
	EmitOMPTeamsDirective(cast<OMPTeamsDirective>(*S));
	break;
	case Stmt::OMPCancellationPointDirectiveClass:
	EmitOMPCancellationPointDirective(cast<OMPCancellationPointDirective>(*S));
	break;
	case Stmt::OMPCancelDirectiveClass:
	EmitOMPCancelDirective(cast<OMPCancelDirective>(*S));
	break;
	case Stmt::OMPTargetDataDirectiveClass:
	EmitOMPTargetDataDirective(cast<OMPTargetDataDirective>(*S));
	break;
	case Stmt::OMPTargetEnterDataDirectiveClass:
	EmitOMPTargetEnterDataDirective(cast<OMPTargetEnterDataDirective>(*S));
	break;
	case Stmt::OMPTargetExitDataDirectiveClass:
	EmitOMPTargetExitDataDirective(cast<OMPTargetExitDataDirective>(*S));
	break;
	case Stmt::OMPTargetParallelDirectiveClass:
	EmitOMPTargetParallelDirective(cast<OMPTargetParallelDirective>(*S));
	break;
	case Stmt::OMPTargetParallelForDirectiveClass:
	EmitOMPTargetParallelForDirective(cast<OMPTargetParallelForDirective>(*S));
	break;
	case Stmt::OMPTaskLoopDirectiveClass:
	EmitOMPTaskLoopDirective(cast<OMPTaskLoopDirective>(*S));
	break;
	case Stmt::OMPTaskLoopSimdDirectiveClass:
	EmitOMPTaskLoopSimdDirective(cast<OMPTaskLoopSimdDirective>(*S));
	break;
	case Stmt::OMPMasterTaskLoopDirectiveClass:
	EmitOMPMasterTaskLoopDirective(cast<OMPMasterTaskLoopDirective>(*S));
	break;
	case Stmt::OMPMaskedTaskLoopDirectiveClass:
	llvm_unreachable("masked taskloop directive not supported yet.");
	break;
	case Stmt::OMPMasterTaskLoopSimdDirectiveClass:
	EmitOMPMasterTaskLoopSimdDirective(
	cast<OMPMasterTaskLoopSimdDirective>(*S));
	break;
	case Stmt::OMPMaskedTaskLoopSimdDirectiveClass:
	llvm_unreachable("masked taskloop simd directive not supported yet.");
	break;
	case Stmt::OMPParallelMasterTaskLoopDirectiveClass:
	EmitOMPParallelMasterTaskLoopDirective(
	cast<OMPParallelMasterTaskLoopDirective>(*S));
	break;
	case Stmt::OMPParallelMaskedTaskLoopDirectiveClass:
	llvm_unreachable("parallel masked taskloop directive not supported yet.");
	break;
	case Stmt::OMPParallelMasterTaskLoopSimdDirectiveClass:
	EmitOMPParallelMasterTaskLoopSimdDirective(
	cast<OMPParallelMasterTaskLoopSimdDirective>(*S));
	break;
	case Stmt::OMPParallelMaskedTaskLoopSimdDirectiveClass:
	llvm_unreachable(
	"parallel masked taskloop simd directive not supported yet.");
	break;
	case Stmt::OMPDistributeDirectiveClass:
	EmitOMPDistributeDirective(cast<OMPDistributeDirective>(*S));
	break;
	case Stmt::OMPTargetUpdateDirectiveClass:
	EmitOMPTargetUpdateDirective(cast<OMPTargetUpdateDirective>(*S));
	break;
	case Stmt::OMPDistributeParallelForDirectiveClass:
	EmitOMPDistributeParallelForDirective(
	cast<OMPDistributeParallelForDirective>(*S));
	break;
	case Stmt::OMPDistributeParallelForSimdDirectiveClass:
	EmitOMPDistributeParallelForSimdDirective(
	cast<OMPDistributeParallelForSimdDirective>(*S));
	break;
	case Stmt::OMPDistributeSimdDirectiveClass:
	EmitOMPDistributeSimdDirective(cast<OMPDistributeSimdDirective>(*S));
	break;
	case Stmt::OMPTargetParallelForSimdDirectiveClass:
	EmitOMPTargetParallelForSimdDirective(
	cast<OMPTargetParallelForSimdDirective>(*S));
	break;
	case Stmt::OMPTargetSimdDirectiveClass:
	EmitOMPTargetSimdDirective(cast<OMPTargetSimdDirective>(*S));
	break;
	case Stmt::OMPTeamsDistributeDirectiveClass:
	EmitOMPTeamsDistributeDirective(cast<OMPTeamsDistributeDirective>(*S));
	break;
	case Stmt::OMPTeamsDistributeSimdDirectiveClass:
	EmitOMPTeamsDistributeSimdDirective(
	cast<OMPTeamsDistributeSimdDirective>(*S));
	break;
	case Stmt::OMPTeamsDistributeParallelForSimdDirectiveClass:
	EmitOMPTeamsDistributeParallelForSimdDirective(
	cast<OMPTeamsDistributeParallelForSimdDirective>(*S));
	break;
	case Stmt::OMPTeamsDistributeParallelForDirectiveClass:
	EmitOMPTeamsDistributeParallelForDirective(
	cast<OMPTeamsDistributeParallelForDirective>(*S));
	break;
	case Stmt::OMPTargetTeamsDirectiveClass:
	EmitOMPTargetTeamsDirective(cast<OMPTargetTeamsDirective>(*S));
	break;
	case Stmt::OMPTargetTeamsDistributeDirectiveClass:
	EmitOMPTargetTeamsDistributeDirective(
	cast<OMPTargetTeamsDistributeDirective>(*S));
	break;
	case Stmt::OMPTargetTeamsDistributeParallelForDirectiveClass:
	EmitOMPTargetTeamsDistributeParallelForDirective(
	cast<OMPTargetTeamsDistributeParallelForDirective>(*S));
	break;
	case Stmt::OMPTargetTeamsDistributeParallelForSimdDirectiveClass:
	EmitOMPTargetTeamsDistributeParallelForSimdDirective(
	cast<OMPTargetTeamsDistributeParallelForSimdDirective>(*S));
	break;
	case Stmt::OMPTargetTeamsDistributeSimdDirectiveClass:
	EmitOMPTargetTeamsDistributeSimdDirective(
	cast<OMPTargetTeamsDistributeSimdDirective>(*S));
	break;
	case Stmt::OMPInteropDirectiveClass:
	EmitOMPInteropDirective(cast<OMPInteropDirective>(*S));
	break;
	case Stmt::OMPDispatchDirectiveClass:
	CGM.ErrorUnsupported(S, "OpenMP dispatch directive");
	break;
	case Stmt::OMPScopeDirectiveClass:
	CGM.ErrorUnsupported(S, "scope with FE outlining");
	break;
	case Stmt::OMPMaskedDirectiveClass:
	EmitOMPMaskedDirective(cast<OMPMaskedDirective>(*S));
	break;
	case Stmt::OMPGenericLoopDirectiveClass:
	EmitOMPGenericLoopDirective(cast<OMPGenericLoopDirective>(*S));
	break;
	case Stmt::OMPTeamsGenericLoopDirectiveClass:
	EmitOMPTeamsGenericLoopDirective(cast<OMPTeamsGenericLoopDirective>(*S));
	break;
	case Stmt::OMPTargetTeamsGenericLoopDirectiveClass:
	EmitOMPTargetTeamsGenericLoopDirective(
	cast<OMPTargetTeamsGenericLoopDirective>(*S));
	break;
	case Stmt::OMPParallelGenericLoopDirectiveClass:
	EmitOMPParallelGenericLoopDirective(
	cast<OMPParallelGenericLoopDirective>(*S));
	break;
	case Stmt::OMPTargetParallelGenericLoopDirectiveClass:
	EmitOMPTargetParallelGenericLoopDirective(
	cast<OMPTargetParallelGenericLoopDirective>(*S));
	break;
	case Stmt::OMPParallelMaskedDirectiveClass:
	EmitOMPParallelMaskedDirective(cast<OMPParallelMaskedDirective>(*S));
	break;
	case Stmt::OpenACCComputeConstructClass:
	EmitOpenACCComputeConstruct(cast<OpenACCComputeConstruct>(*S));
	break;
	case Stmt::OpenACCLoopConstructClass:
	EmitOpenACCLoopConstruct(cast<OpenACCLoopConstruct>(*S));
	break;
	}
	}

	bool CodeGenFunction::EmitSimpleStmt(const Stmt *S,
	ArrayRef<const Attr *> Attrs) {
	switch (S->getStmtClass()) {
	default:
	return false;
	case Stmt::NullStmtClass:
	break;
	case Stmt::CompoundStmtClass:
	EmitCompoundStmt(cast<CompoundStmt>(*S));
	break;
	case Stmt::DeclStmtClass:
	EmitDeclStmt(cast<DeclStmt>(*S));
	break;
	case Stmt::LabelStmtClass:
	EmitLabelStmt(cast<LabelStmt>(*S));
	break;
	case Stmt::AttributedStmtClass:
	EmitAttributedStmt(cast<AttributedStmt>(*S));
	break;
	case Stmt::GotoStmtClass:
	EmitGotoStmt(cast<GotoStmt>(*S));
	break;
	case Stmt::BreakStmtClass:
	EmitBreakStmt(cast<BreakStmt>(*S));
	break;
	case Stmt::ContinueStmtClass:
	EmitContinueStmt(cast<ContinueStmt>(*S));
	break;
	case Stmt::DefaultStmtClass:
	EmitDefaultStmt(cast<DefaultStmt>(*S), Attrs);
	break;
	case Stmt::CaseStmtClass:
	EmitCaseStmt(cast<CaseStmt>(*S), Attrs);
	break;
	case Stmt::SEHLeaveStmtClass:
	EmitSEHLeaveStmt(cast<SEHLeaveStmt>(*S));
	break;
	}
	return true;
	}

	/// EmitCompoundStmt - Emit a compound statement {..} node. If GetLast is true,
	/// this captures the expression result of the last sub-statement and returns it
	/// (for use by the statement expression extension).
	Address CodeGenFunction::EmitCompoundStmt(const CompoundStmt &S, bool GetLast,
	AggValueSlot AggSlot) {
	PrettyStackTraceLoc CrashInfo(getContext().getSourceManager(),S.getLBracLoc(),
	"LLVM IR generation of compound statement ('{}')");

	// Keep track of the current cleanup stack depth, including debug scopes.
	LexicalScope Scope(*this, S.getSourceRange());

	return EmitCompoundStmtWithoutScope(S, GetLast, AggSlot);
	}

	Address
	CodeGenFunction::EmitCompoundStmtWithoutScope(const CompoundStmt &S,
	bool GetLast,
	AggValueSlot AggSlot) {

	const Stmt *ExprResult = S.getStmtExprResult();
	assert((!GetLast \|\| (GetLast && ExprResult)) &&
	"If GetLast is true then the CompoundStmt must have a StmtExprResult");

	Address RetAlloca = Address::invalid();

	for (auto *CurStmt : S.body()) {
	if (GetLast && ExprResult == CurStmt) {
	// We have to special case labels here. They are statements, but when put
	// at the end of a statement expression, they yield the value of their
	// subexpression. Handle this by walking through all labels we encounter,
	// emitting them before we evaluate the subexpr.
	// Similar issues arise for attributed statements.
	while (!isa<Expr>(ExprResult)) {
	if (const auto *LS = dyn_cast<LabelStmt>(ExprResult)) {
	EmitLabel(LS->getDecl());
	ExprResult = LS->getSubStmt();
	} else if (const auto *AS = dyn_cast<AttributedStmt>(ExprResult)) {
	// FIXME: Update this if we ever have attributes that affect the
	// semantics of an expression.
	ExprResult = AS->getSubStmt();
	} else {
	llvm_unreachable("unknown value statement");
	}
	}

	EnsureInsertPoint();

	const Expr *E = cast<Expr>(ExprResult);
	QualType ExprTy = E->getType();
	if (hasAggregateEvaluationKind(ExprTy)) {
	EmitAggExpr(E, AggSlot);
	} else {
	// We can't return an RValue here because there might be cleanups at
	// the end of the StmtExpr. Because of that, we have to emit the result
	// here into a temporary alloca.
	RetAlloca = CreateMemTemp(ExprTy);
	EmitAnyExprToMem(E, RetAlloca, Qualifiers(),
	/IsInit/ false);
	}
	} else {
	EmitStmt(CurStmt);
	}
	}

	return RetAlloca;
	}

	void CodeGenFunction::SimplifyForwardingBlocks(llvm::BasicBlock *BB) {
	llvm::BranchInst *BI = dyn_cast<llvm::BranchInst>(BB->getTerminator());

	// If there is a cleanup stack, then we it isn't worth trying to
	// simplify this block (we would need to remove it from the scope map
	// and cleanup entry).
	if (!EHStack.empty())
	return;

	// Can only simplify direct branches.
	if (!BI \|\| !BI->isUnconditional())
	return;

	// Can only simplify empty blocks.
	if (BI->getIterator() != BB->begin())
	return;

	BB->replaceAllUsesWith(BI->getSuccessor(0));
	BI->eraseFromParent();
	BB->eraseFromParent();
	}

	void CodeGenFunction::EmitBlock(llvm::BasicBlock *BB, bool IsFinished) {
	llvm::BasicBlock *CurBB = Builder.GetInsertBlock();

	// Fall out of the current block (if necessary).
	EmitBranch(BB);

	if (IsFinished && BB->use_empty()) {
	delete BB;
	return;
	}

	// Place the block after the current block, if possible, or else at
	// the end of the function.
	if (CurBB && CurBB->getParent())
	CurFn->insert(std::next(CurBB->getIterator()), BB);
	else
	CurFn->insert(CurFn->end(), BB);
	Builder.SetInsertPoint(BB);
	}

	void CodeGenFunction::EmitBranch(llvm::BasicBlock *Target) {
	// Emit a branch from the current block to the target one if this
	// was a real block. If this was just a fall-through block after a
	// terminator, don't emit it.
	llvm::BasicBlock *CurBB = Builder.GetInsertBlock();

	if (!CurBB \|\| CurBB->getTerminator()) {
	// If there is no insert point or the previous block is already
	// terminated, don't touch it.
	} else {
	// Otherwise, create a fall-through branch.
	Builder.CreateBr(Target);
	}

	Builder.ClearInsertionPoint();
	}

	void CodeGenFunction::EmitBlockAfterUses(llvm::BasicBlock *block) {
	bool inserted = false;
	for (llvm::User *u : block->users()) {
	if (llvm::Instruction *insn = dyn_cast<llvm::Instruction>(u)) {
	CurFn->insert(std::next(insn->getParent()->getIterator()), block);
	inserted = true;
	break;
	}
	}

	if (!inserted)
	CurFn->insert(CurFn->end(), block);

	Builder.SetInsertPoint(block);
	}

	CodeGenFunction::JumpDest
	CodeGenFunction::getJumpDestForLabel(const LabelDecl *D) {
	JumpDest &Dest = LabelMap[D];
	if (Dest.isValid()) return Dest;

	// Create, but don't insert, the new block.
	Dest = JumpDest(createBasicBlock(D->getName()),
	EHScopeStack::stable_iterator::invalid(),
	NextCleanupDestIndex++);
	return Dest;
	}

	void CodeGenFunction::EmitLabel(const LabelDecl *D) {
	// Add this label to the current lexical scope if we're within any
	// normal cleanups. Jumps "in" to this label --- when permitted by
	// the language --- may need to be routed around such cleanups.
	if (EHStack.hasNormalCleanups() && CurLexicalScope)
	CurLexicalScope->addLabel(D);

	JumpDest &Dest = LabelMap[D];

	// If we didn't need a forward reference to this label, just go
	// ahead and create a destination at the current scope.
	if (!Dest.isValid()) {
	Dest = getJumpDestInCurrentScope(D->getName());

	// Otherwise, we need to give this label a target depth and remove
	// it from the branch-fixups list.
	} else {
	assert(!Dest.getScopeDepth().isValid() && "already emitted label!");
	Dest.setScopeDepth(EHStack.stable_begin());
	ResolveBranchFixups(Dest.getBlock());
	}

	EmitBlock(Dest.getBlock());

	// Emit debug info for labels.
	if (CGDebugInfo *DI = getDebugInfo()) {
	if (CGM.getCodeGenOpts().hasReducedDebugInfo()) {
	DI->setLocation(D->getLocation());
	DI->EmitLabel(D, Builder);
	}
	}

	incrementProfileCounter(D->getStmt());
	}

	/// Change the cleanup scope of the labels in this lexical scope to
	/// match the scope of the enclosing context.
	void CodeGenFunction::LexicalScope::rescopeLabels() {
	assert(!Labels.empty());
	EHScopeStack::stable_iterator innermostScope
	= CGF.EHStack.getInnermostNormalCleanup();

	// Change the scope depth of all the labels.
	for (SmallVectorImpl<const LabelDecl*>::const_iterator
	i = Labels.begin(), e = Labels.end(); i != e; ++i) {
	assert(CGF.LabelMap.count(*i));
	JumpDest &dest = CGF.LabelMap.find(*i)->second;
	assert(dest.getScopeDepth().isValid());
	assert(innermostScope.encloses(dest.getScopeDepth()));
	dest.setScopeDepth(innermostScope);
	}

	// Reparent the labels if the new scope also has cleanups.
	if (innermostScope != EHScopeStack::stable_end() && ParentScope) {
	ParentScope->Labels.append(Labels.begin(), Labels.end());
	}
	}


	void CodeGenFunction::EmitLabelStmt(const LabelStmt &S) {
	EmitLabel(S.getDecl());

	// IsEHa - emit eha.scope.begin if it's a side entry of a scope
	if (getLangOpts().EHAsynch && S.isSideEntry())
	EmitSehCppScopeBegin();

	EmitStmt(S.getSubStmt());
	}

	void CodeGenFunction::EmitAttributedStmt(const AttributedStmt &S) {
	bool nomerge = false;
	bool noinline = false;
	bool alwaysinline = false;
	const CallExpr *musttail = nullptr;

	for (const auto *A : S.getAttrs()) {
	switch (A->getKind()) {
	default:
	break;
	case attr::NoMerge:
	nomerge = true;
	break;
	case attr::NoInline:
	noinline = true;
	break;
	case attr::AlwaysInline:
	alwaysinline = true;
	break;
	case attr::MustTail: {
	const Stmt *Sub = S.getSubStmt();
	const ReturnStmt *R = cast<ReturnStmt>(Sub);
	musttail = cast<CallExpr>(R->getRetValue()->IgnoreParens());
	} break;
	case attr::CXXAssume: {
	const Expr *Assumption = cast<CXXAssumeAttr>(A)->getAssumption();
	- if (getLangOpts().CXXAssumptions &&
	+ if (getLangOpts().CXXAssumptions && Builder.GetInsertBlock() &&
	!Assumption->HasSideEffects(getContext())) {
	llvm::Value *AssumptionVal = EvaluateExprAsBool(Assumption);
	Builder.CreateAssumption(AssumptionVal);
	}
	} break;
	}
	}
	SaveAndRestore save_nomerge(InNoMergeAttributedStmt, nomerge);
	SaveAndRestore save_noinline(InNoInlineAttributedStmt, noinline);
	SaveAndRestore save_alwaysinline(InAlwaysInlineAttributedStmt, alwaysinline);
	SaveAndRestore save_musttail(MustTailCall, musttail);
	EmitStmt(S.getSubStmt(), S.getAttrs());
	}

	void CodeGenFunction::EmitGotoStmt(const GotoStmt &S) {
	// If this code is reachable then emit a stop point (if generating
	// debug info). We have to do this ourselves because we are on the
	// "simple" statement path.
	if (HaveInsertPoint())
	EmitStopPoint(&S);

	EmitBranchThroughCleanup(getJumpDestForLabel(S.getLabel()));
	}


	void CodeGenFunction::EmitIndirectGotoStmt(const IndirectGotoStmt &S) {
	if (const LabelDecl *Target = S.getConstantTarget()) {
	EmitBranchThroughCleanup(getJumpDestForLabel(Target));
	return;
	}

	// Ensure that we have an i8* for our PHI node.
	llvm::Value *V = Builder.CreateBitCast(EmitScalarExpr(S.getTarget()),
	Int8PtrTy, "addr");
	llvm::BasicBlock *CurBB = Builder.GetInsertBlock();

	// Get the basic block for the indirect goto.
	llvm::BasicBlock *IndGotoBB = GetIndirectGotoBlock();

	// The first instruction in the block has to be the PHI for the switch dest,
	// add an entry for this branch.
	cast<llvm::PHINode>(IndGotoBB->begin())->addIncoming(V, CurBB);

	EmitBranch(IndGotoBB);
	}

	void CodeGenFunction::EmitIfStmt(const IfStmt &S) {
	// The else branch of a consteval if statement is always the only branch that
	// can be runtime evaluated.
	if (S.isConsteval()) {
	const Stmt *Executed = S.isNegatedConsteval() ? S.getThen() : S.getElse();
	if (Executed) {
	RunCleanupsScope ExecutedScope(*this);
	EmitStmt(Executed);
	}
	return;
	}

	// C99 6.8.4.1: The first substatement is executed if the expression compares
	// unequal to 0. The condition must be a scalar type.
	LexicalScope ConditionScope(*this, S.getCond()->getSourceRange());

	if (S.getInit())
	EmitStmt(S.getInit());

	if (S.getConditionVariable())
	EmitDecl(*S.getConditionVariable());

	// If the condition constant folds and can be elided, try to avoid emitting
	// the condition and the dead arm of the if/else.
	bool CondConstant;
	if (ConstantFoldsToSimpleInteger(S.getCond(), CondConstant,
	S.isConstexpr())) {
	// Figure out which block (then or else) is executed.
	const Stmt *Executed = S.getThen();
	const Stmt *Skipped = S.getElse();
	if (!CondConstant) // Condition false?
	std::swap(Executed, Skipped);

	// If the skipped block has no labels in it, just emit the executed block.
	// This avoids emitting dead code and simplifies the CFG substantially.
	if (S.isConstexpr() \|\| !ContainsLabel(Skipped)) {
	if (CondConstant)
	incrementProfileCounter(&S);
	if (Executed) {
	RunCleanupsScope ExecutedScope(*this);
	EmitStmt(Executed);
	}
	return;
	}
	}

	// Otherwise, the condition did not fold, or we couldn't elide it. Just emit
	// the conditional branch.
	llvm::BasicBlock *ThenBlock = createBasicBlock("if.then");
	llvm::BasicBlock *ContBlock = createBasicBlock("if.end");
	llvm::BasicBlock *ElseBlock = ContBlock;
	if (S.getElse())
	ElseBlock = createBasicBlock("if.else");

	// Prefer the PGO based weights over the likelihood attribute.
	// When the build isn't optimized the metadata isn't used, so don't generate
	// it.
	// Also, differentiate between disabled PGO and a never executed branch with
	// PGO. Assuming PGO is in use:
	// - we want to ignore the [[likely]] attribute if the branch is never
	// executed,
	// - assuming the profile is poor, preserving the attribute may still be
	// beneficial.
	// As an approximation, preserve the attribute only if both the branch and the
	// parent context were not executed.
	Stmt::Likelihood LH = Stmt::LH_None;
	uint64_t ThenCount = getProfileCount(S.getThen());
	if (!ThenCount && !getCurrentProfileCount() &&
	CGM.getCodeGenOpts().OptimizationLevel)
	LH = Stmt::getLikelihood(S.getThen(), S.getElse());

	// When measuring MC/DC, always fully evaluate the condition up front using
	// EvaluateExprAsBool() so that the test vector bitmap can be updated prior to
	// executing the body of the if.then or if.else. This is useful for when
	// there is a 'return' within the body, but this is particularly beneficial
	// when one if-stmt is nested within another if-stmt so that all of the MC/DC
	// updates are kept linear and consistent.
	if (!CGM.getCodeGenOpts().MCDCCoverage)
	EmitBranchOnBoolExpr(S.getCond(), ThenBlock, ElseBlock, ThenCount, LH);
	else {
	llvm::Value *BoolCondVal = EvaluateExprAsBool(S.getCond());
	Builder.CreateCondBr(BoolCondVal, ThenBlock, ElseBlock);
	}

	// Emit the 'then' code.
	EmitBlock(ThenBlock);
	if (llvm::EnableSingleByteCoverage)
	incrementProfileCounter(S.getThen());
	else
	incrementProfileCounter(&S);
	{
	RunCleanupsScope ThenScope(*this);
	EmitStmt(S.getThen());
	}
	EmitBranch(ContBlock);

	// Emit the 'else' code if present.
	if (const Stmt *Else = S.getElse()) {
	{
	// There is no need to emit line number for an unconditional branch.
	auto NL = ApplyDebugLocation::CreateEmpty(*this);
	EmitBlock(ElseBlock);
	}
	// When single byte coverage mode is enabled, add a counter to else block.
	if (llvm::EnableSingleByteCoverage)
	incrementProfileCounter(Else);
	{
	RunCleanupsScope ElseScope(*this);
	EmitStmt(Else);
	}
	{
	// There is no need to emit line number for an unconditional branch.
	auto NL = ApplyDebugLocation::CreateEmpty(*this);
	EmitBranch(ContBlock);
	}
	}

	// Emit the continuation block for code after the if.
	EmitBlock(ContBlock, true);

	// When single byte coverage mode is enabled, add a counter to continuation
	// block.
	if (llvm::EnableSingleByteCoverage)
	incrementProfileCounter(&S);
	}

	bool CodeGenFunction::checkIfLoopMustProgress(const Expr *ControllingExpression,
	bool HasEmptyBody) {
	if (CGM.getCodeGenOpts().getFiniteLoops() ==
	CodeGenOptions::FiniteLoopsKind::Never)
	return false;

	// Now apply rules for plain C (see 6.8.5.6 in C11).
	// Loops with constant conditions do not have to make progress in any C
	// version.
	// As an extension, we consisider loops whose constant expression
	// can be constant-folded.
	Expr::EvalResult Result;
	bool CondIsConstInt =
	!ControllingExpression \|\|
	(ControllingExpression->EvaluateAsInt(Result, getContext()) &&
	Result.Val.isInt());

	bool CondIsTrue = CondIsConstInt && (!ControllingExpression \|\|
	Result.Val.getInt().getBoolValue());

	// Loops with non-constant conditions must make progress in C11 and later.
	if (getLangOpts().C11 && !CondIsConstInt)
	return true;

	// [C++26][intro.progress] (DR)
	// The implementation may assume that any thread will eventually do one of the
	// following:
	// [...]
	// - continue execution of a trivial infinite loop ([stmt.iter.general]).
	if (CGM.getCodeGenOpts().getFiniteLoops() ==
	CodeGenOptions::FiniteLoopsKind::Always \|\|
	getLangOpts().CPlusPlus11) {
	if (HasEmptyBody && CondIsTrue) {
	CurFn->removeFnAttr(llvm::Attribute::MustProgress);
	return false;
	}
	return true;
	}
	return false;
	}

	// [C++26][stmt.iter.general] (DR)
	// A trivially empty iteration statement is an iteration statement matching one
	// of the following forms:
	// - while ( expression ) ;
	// - while ( expression ) { }
	// - do ; while ( expression ) ;
	// - do { } while ( expression ) ;
	// - for ( init-statement expression(opt); ) ;
	// - for ( init-statement expression(opt); ) { }
	template <typename LoopStmt> static bool hasEmptyLoopBody(const LoopStmt &S) {
	if constexpr (std::is_same_v<LoopStmt, ForStmt>) {
	if (S.getInc())
	return false;
	}
	const Stmt *Body = S.getBody();
	if (!Body \|\| isa<NullStmt>(Body))
	return true;
	if (const CompoundStmt *Compound = dyn_cast<CompoundStmt>(Body))
	return Compound->body_empty();
	return false;
	}

	void CodeGenFunction::EmitWhileStmt(const WhileStmt &S,
	ArrayRef<const Attr *> WhileAttrs) {
	// Emit the header for the loop, which will also become
	// the continue target.
	JumpDest LoopHeader = getJumpDestInCurrentScope("while.cond");
	EmitBlock(LoopHeader.getBlock());

	if (CGM.shouldEmitConvergenceTokens())
	ConvergenceTokenStack.push_back(emitConvergenceLoopToken(
	LoopHeader.getBlock(), ConvergenceTokenStack.back()));

	// Create an exit block for when the condition fails, which will
	// also become the break target.
	JumpDest LoopExit = getJumpDestInCurrentScope("while.end");

	// Store the blocks to use for break and continue.
	BreakContinueStack.push_back(BreakContinue(LoopExit, LoopHeader));

	// C++ [stmt.while]p2:
	// When the condition of a while statement is a declaration, the
	// scope of the variable that is declared extends from its point
	// of declaration (3.3.2) to the end of the while statement.
	// [...]
	// The object created in a condition is destroyed and created
	// with each iteration of the loop.
	RunCleanupsScope ConditionScope(*this);

	if (S.getConditionVariable())
	EmitDecl(*S.getConditionVariable());

	// Evaluate the conditional in the while header. C99 6.8.5.1: The
	// evaluation of the controlling expression takes place before each
	// execution of the loop body.
	llvm::Value *BoolCondVal = EvaluateExprAsBool(S.getCond());

	// while(1) is common, avoid extra exit blocks. Be sure
	// to correctly handle break/continue though.
	llvm::ConstantInt *C = dyn_cast<llvm::ConstantInt>(BoolCondVal);
	bool EmitBoolCondBranch = !C \|\| !C->isOne();
	const SourceRange &R = S.getSourceRange();
	LoopStack.push(LoopHeader.getBlock(), CGM.getContext(), CGM.getCodeGenOpts(),
	WhileAttrs, SourceLocToDebugLoc(R.getBegin()),
	SourceLocToDebugLoc(R.getEnd()),
	checkIfLoopMustProgress(S.getCond(), hasEmptyLoopBody(S)));

	// When single byte coverage mode is enabled, add a counter to loop condition.
	if (llvm::EnableSingleByteCoverage)
	incrementProfileCounter(S.getCond());

	// As long as the condition is true, go to the loop body.
	llvm::BasicBlock *LoopBody = createBasicBlock("while.body");
	if (EmitBoolCondBranch) {
	llvm::BasicBlock *ExitBlock = LoopExit.getBlock();
	if (ConditionScope.requiresCleanups())
	ExitBlock = createBasicBlock("while.exit");
	llvm::MDNode *Weights =
	createProfileWeightsForLoop(S.getCond(), getProfileCount(S.getBody()));
	if (!Weights && CGM.getCodeGenOpts().OptimizationLevel)
	BoolCondVal = emitCondLikelihoodViaExpectIntrinsic(
	BoolCondVal, Stmt::getLikelihood(S.getBody()));
	Builder.CreateCondBr(BoolCondVal, LoopBody, ExitBlock, Weights);

	if (ExitBlock != LoopExit.getBlock()) {
	EmitBlock(ExitBlock);
	EmitBranchThroughCleanup(LoopExit);
	}
	} else if (const Attr *A = Stmt::getLikelihoodAttr(S.getBody())) {
	CGM.getDiags().Report(A->getLocation(),
	diag::warn_attribute_has_no_effect_on_infinite_loop)
	<< A << A->getRange();
	CGM.getDiags().Report(
	S.getWhileLoc(),
	diag::note_attribute_has_no_effect_on_infinite_loop_here)
	<< SourceRange(S.getWhileLoc(), S.getRParenLoc());
	}

	// Emit the loop body. We have to emit this in a cleanup scope
	// because it might be a singleton DeclStmt.
	{
	RunCleanupsScope BodyScope(*this);
	EmitBlock(LoopBody);
	// When single byte coverage mode is enabled, add a counter to the body.
	if (llvm::EnableSingleByteCoverage)
	incrementProfileCounter(S.getBody());
	else
	incrementProfileCounter(&S);
	EmitStmt(S.getBody());
	}

	BreakContinueStack.pop_back();

	// Immediately force cleanup.
	ConditionScope.ForceCleanup();

	EmitStopPoint(&S);
	// Branch to the loop header again.
	EmitBranch(LoopHeader.getBlock());

	LoopStack.pop();

	// Emit the exit block.
	EmitBlock(LoopExit.getBlock(), true);

	// The LoopHeader typically is just a branch if we skipped emitting
	// a branch, try to erase it.
	if (!EmitBoolCondBranch)
	SimplifyForwardingBlocks(LoopHeader.getBlock());

	// When single byte coverage mode is enabled, add a counter to continuation
	// block.
	if (llvm::EnableSingleByteCoverage)
	incrementProfileCounter(&S);

	if (CGM.shouldEmitConvergenceTokens())
	ConvergenceTokenStack.pop_back();
	}

	void CodeGenFunction::EmitDoStmt(const DoStmt &S,
	ArrayRef<const Attr *> DoAttrs) {
	JumpDest LoopExit = getJumpDestInCurrentScope("do.end");
	JumpDest LoopCond = getJumpDestInCurrentScope("do.cond");

	uint64_t ParentCount = getCurrentProfileCount();

	// Store the blocks to use for break and continue.
	BreakContinueStack.push_back(BreakContinue(LoopExit, LoopCond));

	// Emit the body of the loop.
	llvm::BasicBlock *LoopBody = createBasicBlock("do.body");

	if (llvm::EnableSingleByteCoverage)
	EmitBlockWithFallThrough(LoopBody, S.getBody());
	else
	EmitBlockWithFallThrough(LoopBody, &S);

	if (CGM.shouldEmitConvergenceTokens())
	ConvergenceTokenStack.push_back(
	emitConvergenceLoopToken(LoopBody, ConvergenceTokenStack.back()));

	{
	RunCleanupsScope BodyScope(*this);
	EmitStmt(S.getBody());
	}

	EmitBlock(LoopCond.getBlock());
	// When single byte coverage mode is enabled, add a counter to loop condition.
	if (llvm::EnableSingleByteCoverage)
	incrementProfileCounter(S.getCond());

	// C99 6.8.5.2: "The evaluation of the controlling expression takes place
	// after each execution of the loop body."

	// Evaluate the conditional in the while header.
	// C99 6.8.5p2/p4: The first substatement is executed if the expression
	// compares unequal to 0. The condition must be a scalar type.
	llvm::Value *BoolCondVal = EvaluateExprAsBool(S.getCond());

	BreakContinueStack.pop_back();

	// "do {} while (0)" is common in macros, avoid extra blocks. Be sure
	// to correctly handle break/continue though.
	llvm::ConstantInt *C = dyn_cast<llvm::ConstantInt>(BoolCondVal);
	bool EmitBoolCondBranch = !C \|\| !C->isZero();

	const SourceRange &R = S.getSourceRange();
	LoopStack.push(LoopBody, CGM.getContext(), CGM.getCodeGenOpts(), DoAttrs,
	SourceLocToDebugLoc(R.getBegin()),
	SourceLocToDebugLoc(R.getEnd()),
	checkIfLoopMustProgress(S.getCond(), hasEmptyLoopBody(S)));

	// As long as the condition is true, iterate the loop.
	if (EmitBoolCondBranch) {
	uint64_t BackedgeCount = getProfileCount(S.getBody()) - ParentCount;
	Builder.CreateCondBr(
	BoolCondVal, LoopBody, LoopExit.getBlock(),
	createProfileWeightsForLoop(S.getCond(), BackedgeCount));
	}

	LoopStack.pop();

	// Emit the exit block.
	EmitBlock(LoopExit.getBlock());

	// The DoCond block typically is just a branch if we skipped
	// emitting a branch, try to erase it.
	if (!EmitBoolCondBranch)
	SimplifyForwardingBlocks(LoopCond.getBlock());

	// When single byte coverage mode is enabled, add a counter to continuation
	// block.
	if (llvm::EnableSingleByteCoverage)
	incrementProfileCounter(&S);

	if (CGM.shouldEmitConvergenceTokens())
	ConvergenceTokenStack.pop_back();
	}

	void CodeGenFunction::EmitForStmt(const ForStmt &S,
	ArrayRef<const Attr *> ForAttrs) {
	JumpDest LoopExit = getJumpDestInCurrentScope("for.end");

	LexicalScope ForScope(*this, S.getSourceRange());

	// Evaluate the first part before the loop.
	if (S.getInit())
	EmitStmt(S.getInit());

	// Start the loop with a block that tests the condition.
	// If there's an increment, the continue scope will be overwritten
	// later.
	JumpDest CondDest = getJumpDestInCurrentScope("for.cond");
	llvm::BasicBlock *CondBlock = CondDest.getBlock();
	EmitBlock(CondBlock);

	if (CGM.shouldEmitConvergenceTokens())
	ConvergenceTokenStack.push_back(
	emitConvergenceLoopToken(CondBlock, ConvergenceTokenStack.back()));

	const SourceRange &R = S.getSourceRange();
	LoopStack.push(CondBlock, CGM.getContext(), CGM.getCodeGenOpts(), ForAttrs,
	SourceLocToDebugLoc(R.getBegin()),
	SourceLocToDebugLoc(R.getEnd()),
	checkIfLoopMustProgress(S.getCond(), hasEmptyLoopBody(S)));

	// Create a cleanup scope for the condition variable cleanups.
	LexicalScope ConditionScope(*this, S.getSourceRange());

	// If the for loop doesn't have an increment we can just use the condition as
	// the continue block. Otherwise, if there is no condition variable, we can
	// form the continue block now. If there is a condition variable, we can't
	// form the continue block until after we've emitted the condition, because
	// the condition is in scope in the increment, but Sema's jump diagnostics
	// ensure that there are no continues from the condition variable that jump
	// to the loop increment.
	JumpDest Continue;
	if (!S.getInc())
	Continue = CondDest;
	else if (!S.getConditionVariable())
	Continue = getJumpDestInCurrentScope("for.inc");
	BreakContinueStack.push_back(BreakContinue(LoopExit, Continue));

	if (S.getCond()) {
	// If the for statement has a condition scope, emit the local variable
	// declaration.
	if (S.getConditionVariable()) {
	EmitDecl(*S.getConditionVariable());

	// We have entered the condition variable's scope, so we're now able to
	// jump to the continue block.
	Continue = S.getInc() ? getJumpDestInCurrentScope("for.inc") : CondDest;
	BreakContinueStack.back().ContinueBlock = Continue;
	}

	// When single byte coverage mode is enabled, add a counter to loop
	// condition.
	if (llvm::EnableSingleByteCoverage)
	incrementProfileCounter(S.getCond());

	llvm::BasicBlock *ExitBlock = LoopExit.getBlock();
	// If there are any cleanups between here and the loop-exit scope,
	// create a block to stage a loop exit along.
	if (ForScope.requiresCleanups())
	ExitBlock = createBasicBlock("for.cond.cleanup");

	// As long as the condition is true, iterate the loop.
	llvm::BasicBlock *ForBody = createBasicBlock("for.body");

	// C99 6.8.5p2/p4: The first substatement is executed if the expression
	// compares unequal to 0. The condition must be a scalar type.
	llvm::Value *BoolCondVal = EvaluateExprAsBool(S.getCond());
	llvm::MDNode *Weights =
	createProfileWeightsForLoop(S.getCond(), getProfileCount(S.getBody()));
	if (!Weights && CGM.getCodeGenOpts().OptimizationLevel)
	BoolCondVal = emitCondLikelihoodViaExpectIntrinsic(
	BoolCondVal, Stmt::getLikelihood(S.getBody()));

	Builder.CreateCondBr(BoolCondVal, ForBody, ExitBlock, Weights);

	if (ExitBlock != LoopExit.getBlock()) {
	EmitBlock(ExitBlock);
	EmitBranchThroughCleanup(LoopExit);
	}

	EmitBlock(ForBody);
	} else {
	// Treat it as a non-zero constant. Don't even create a new block for the
	// body, just fall into it.
	}

	// When single byte coverage mode is enabled, add a counter to the body.
	if (llvm::EnableSingleByteCoverage)
	incrementProfileCounter(S.getBody());
	else
	incrementProfileCounter(&S);
	{
	// Create a separate cleanup scope for the body, in case it is not
	// a compound statement.
	RunCleanupsScope BodyScope(*this);
	EmitStmt(S.getBody());
	}

	// If there is an increment, emit it next.
	if (S.getInc()) {
	EmitBlock(Continue.getBlock());
	EmitStmt(S.getInc());
	if (llvm::EnableSingleByteCoverage)
	incrementProfileCounter(S.getInc());
	}

	BreakContinueStack.pop_back();

	ConditionScope.ForceCleanup();

	EmitStopPoint(&S);
	EmitBranch(CondBlock);

	ForScope.ForceCleanup();

	LoopStack.pop();

	// Emit the fall-through block.
	EmitBlock(LoopExit.getBlock(), true);

	// When single byte coverage mode is enabled, add a counter to continuation
	// block.
	if (llvm::EnableSingleByteCoverage)
	incrementProfileCounter(&S);

	if (CGM.shouldEmitConvergenceTokens())
	ConvergenceTokenStack.pop_back();
	}

	void
	CodeGenFunction::EmitCXXForRangeStmt(const CXXForRangeStmt &S,
	ArrayRef<const Attr *> ForAttrs) {
	JumpDest LoopExit = getJumpDestInCurrentScope("for.end");

	LexicalScope ForScope(*this, S.getSourceRange());

	// Evaluate the first pieces before the loop.
	if (S.getInit())
	EmitStmt(S.getInit());
	EmitStmt(S.getRangeStmt());
	EmitStmt(S.getBeginStmt());
	EmitStmt(S.getEndStmt());

	// Start the loop with a block that tests the condition.
	// If there's an increment, the continue scope will be overwritten
	// later.
	llvm::BasicBlock *CondBlock = createBasicBlock("for.cond");
	EmitBlock(CondBlock);

	if (CGM.shouldEmitConvergenceTokens())
	ConvergenceTokenStack.push_back(
	emitConvergenceLoopToken(CondBlock, ConvergenceTokenStack.back()));

	const SourceRange &R = S.getSourceRange();
	LoopStack.push(CondBlock, CGM.getContext(), CGM.getCodeGenOpts(), ForAttrs,
	SourceLocToDebugLoc(R.getBegin()),
	SourceLocToDebugLoc(R.getEnd()));

	// If there are any cleanups between here and the loop-exit scope,
	// create a block to stage a loop exit along.
	llvm::BasicBlock *ExitBlock = LoopExit.getBlock();
	if (ForScope.requiresCleanups())
	ExitBlock = createBasicBlock("for.cond.cleanup");

	// The loop body, consisting of the specified body and the loop variable.
	llvm::BasicBlock *ForBody = createBasicBlock("for.body");

	// The body is executed if the expression, contextually converted
	// to bool, is true.
	llvm::Value *BoolCondVal = EvaluateExprAsBool(S.getCond());
	llvm::MDNode *Weights =
	createProfileWeightsForLoop(S.getCond(), getProfileCount(S.getBody()));
	if (!Weights && CGM.getCodeGenOpts().OptimizationLevel)
	BoolCondVal = emitCondLikelihoodViaExpectIntrinsic(
	BoolCondVal, Stmt::getLikelihood(S.getBody()));
	Builder.CreateCondBr(BoolCondVal, ForBody, ExitBlock, Weights);

	if (ExitBlock != LoopExit.getBlock()) {
	EmitBlock(ExitBlock);
	EmitBranchThroughCleanup(LoopExit);
	}

	EmitBlock(ForBody);
	if (llvm::EnableSingleByteCoverage)
	incrementProfileCounter(S.getBody());
	else
	incrementProfileCounter(&S);

	// Create a block for the increment. In case of a 'continue', we jump there.
	JumpDest Continue = getJumpDestInCurrentScope("for.inc");

	// Store the blocks to use for break and continue.
	BreakContinueStack.push_back(BreakContinue(LoopExit, Continue));

	{
	// Create a separate cleanup scope for the loop variable and body.
	LexicalScope BodyScope(*this, S.getSourceRange());
	EmitStmt(S.getLoopVarStmt());
	EmitStmt(S.getBody());
	}

	EmitStopPoint(&S);
	// If there is an increment, emit it next.
	EmitBlock(Continue.getBlock());
	EmitStmt(S.getInc());

	BreakContinueStack.pop_back();

	EmitBranch(CondBlock);

	ForScope.ForceCleanup();

	LoopStack.pop();

	// Emit the fall-through block.
	EmitBlock(LoopExit.getBlock(), true);

	// When single byte coverage mode is enabled, add a counter to continuation
	// block.
	if (llvm::EnableSingleByteCoverage)
	incrementProfileCounter(&S);

	if (CGM.shouldEmitConvergenceTokens())
	ConvergenceTokenStack.pop_back();
	}

	void CodeGenFunction::EmitReturnOfRValue(RValue RV, QualType Ty) {
	if (RV.isScalar()) {
	Builder.CreateStore(RV.getScalarVal(), ReturnValue);
	} else if (RV.isAggregate()) {
	LValue Dest = MakeAddrLValue(ReturnValue, Ty);
	LValue Src = MakeAddrLValue(RV.getAggregateAddress(), Ty);
	EmitAggregateCopy(Dest, Src, Ty, getOverlapForReturnValue());
	} else {
	EmitStoreOfComplex(RV.getComplexVal(), MakeAddrLValue(ReturnValue, Ty),
	/init/ true);
	}
	EmitBranchThroughCleanup(ReturnBlock);
	}

	namespace {
	// RAII struct used to save and restore a return statment's result expression.
	struct SaveRetExprRAII {
	SaveRetExprRAII(const Expr *RetExpr, CodeGenFunction &CGF)
	: OldRetExpr(CGF.RetExpr), CGF(CGF) {
	CGF.RetExpr = RetExpr;
	}
	~SaveRetExprRAII() { CGF.RetExpr = OldRetExpr; }
	const Expr *OldRetExpr;
	CodeGenFunction &CGF;
	};
	} // namespace

	/// Determine if the given call uses the swiftasync calling convention.
	static bool isSwiftAsyncCallee(const CallExpr *CE) {
	auto calleeQualType = CE->getCallee()->getType();
	const FunctionType *calleeType = nullptr;
	if (calleeQualType->isFunctionPointerType() \|\|
	calleeQualType->isFunctionReferenceType() \|\|
	calleeQualType->isBlockPointerType() \|\|
	calleeQualType->isMemberFunctionPointerType()) {
	calleeType = calleeQualType->getPointeeType()->castAs<FunctionType>();
	} else if (auto *ty = dyn_cast<FunctionType>(calleeQualType)) {
	calleeType = ty;
	} else if (auto CMCE = dyn_cast<CXXMemberCallExpr>(CE)) {
	if (auto methodDecl = CMCE->getMethodDecl()) {
	// getMethodDecl() doesn't handle member pointers at the moment.
	calleeType = methodDecl->getType()->castAs<FunctionType>();
	} else {
	return false;
	}
	} else {
	return false;
	}
	return calleeType->getCallConv() == CallingConv::CC_SwiftAsync;
	}

	/// EmitReturnStmt - Note that due to GCC extensions, this can have an operand
	/// if the function returns void, or may be missing one if the function returns
	/// non-void. Fun stuff :).
	void CodeGenFunction::EmitReturnStmt(const ReturnStmt &S) {
	if (requiresReturnValueCheck()) {
	llvm::Constant *SLoc = EmitCheckSourceLocation(S.getBeginLoc());
	auto *SLocPtr =
	new llvm::GlobalVariable(CGM.getModule(), SLoc->getType(), false,
	llvm::GlobalVariable::PrivateLinkage, SLoc);
	SLocPtr->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
	CGM.getSanitizerMetadata()->disableSanitizerForGlobal(SLocPtr);
	assert(ReturnLocation.isValid() && "No valid return location");
	Builder.CreateStore(SLocPtr, ReturnLocation);
	}

	// Returning from an outlined SEH helper is UB, and we already warn on it.
	if (IsOutlinedSEHHelper) {
	Builder.CreateUnreachable();
	Builder.ClearInsertionPoint();
	}

	// Emit the result value, even if unused, to evaluate the side effects.
	const Expr *RV = S.getRetValue();

	// Record the result expression of the return statement. The recorded
	// expression is used to determine whether a block capture's lifetime should
	// end at the end of the full expression as opposed to the end of the scope
	// enclosing the block expression.
	//
	// This permits a small, easily-implemented exception to our over-conservative
	// rules about not jumping to statements following block literals with
	// non-trivial cleanups.
	SaveRetExprRAII SaveRetExpr(RV, *this);

	RunCleanupsScope cleanupScope(*this);
	if (const auto *EWC = dyn_cast_or_null<ExprWithCleanups>(RV))
	RV = EWC->getSubExpr();

	// If we're in a swiftasynccall function, and the return expression is a
	// call to a swiftasynccall function, mark the call as the musttail call.
	std::optional<llvm::SaveAndRestore<const CallExpr *>> SaveMustTail;
	if (RV && CurFnInfo &&
	CurFnInfo->getASTCallingConvention() == CallingConv::CC_SwiftAsync) {
	if (auto CE = dyn_cast<CallExpr>(RV)) {
	if (isSwiftAsyncCallee(CE)) {
	SaveMustTail.emplace(MustTailCall, CE);
	}
	}
	}

	// FIXME: Clean this up by using an LValue for ReturnTemp,
	// EmitStoreThroughLValue, and EmitAnyExpr.
	// Check if the NRVO candidate was not globalized in OpenMP mode.
	if (getLangOpts().ElideConstructors && S.getNRVOCandidate() &&
	S.getNRVOCandidate()->isNRVOVariable() &&
	(!getLangOpts().OpenMP \|\|
	!CGM.getOpenMPRuntime()
	.getAddressOfLocalVariable(*this, S.getNRVOCandidate())
	.isValid())) {
	// Apply the named return value optimization for this return statement,
	// which means doing nothing: the appropriate result has already been
	// constructed into the NRVO variable.

	// If there is an NRVO flag for this variable, set it to 1 into indicate
	// that the cleanup code should not destroy the variable.
	if (llvm::Value *NRVOFlag = NRVOFlags[S.getNRVOCandidate()])
	Builder.CreateFlagStore(Builder.getTrue(), NRVOFlag);
	} else if (!ReturnValue.isValid() \|\| (RV && RV->getType()->isVoidType())) {
	// Make sure not to return anything, but evaluate the expression
	// for side effects.
	if (RV) {
	EmitAnyExpr(RV);
	}
	} else if (!RV) {
	// Do nothing (return value is left uninitialized)
	} else if (FnRetTy->isReferenceType()) {
	// If this function returns a reference, take the address of the expression
	// rather than the value.
	RValue Result = EmitReferenceBindingToExpr(RV);
	Builder.CreateStore(Result.getScalarVal(), ReturnValue);
	} else {
	switch (getEvaluationKind(RV->getType())) {
	case TEK_Scalar: {
	llvm::Value *Ret = EmitScalarExpr(RV);
	if (CurFnInfo->getReturnInfo().getKind() == ABIArgInfo::Indirect)
	EmitStoreOfScalar(Ret, MakeAddrLValue(ReturnValue, RV->getType()),
	/isInit/ true);
	else
	Builder.CreateStore(Ret, ReturnValue);
	break;
	}
	case TEK_Complex:
	EmitComplexExprIntoLValue(RV, MakeAddrLValue(ReturnValue, RV->getType()),
	/isInit/ true);
	break;
	case TEK_Aggregate:
	EmitAggExpr(RV, AggValueSlot::forAddr(
	ReturnValue, Qualifiers(),
	AggValueSlot::IsDestructed,
	AggValueSlot::DoesNotNeedGCBarriers,
	AggValueSlot::IsNotAliased,
	getOverlapForReturnValue()));
	break;
	}
	}

	++NumReturnExprs;
	if (!RV \|\| RV->isEvaluatable(getContext()))
	++NumSimpleReturnExprs;

	cleanupScope.ForceCleanup();
	EmitBranchThroughCleanup(ReturnBlock);
	}

	void CodeGenFunction::EmitDeclStmt(const DeclStmt &S) {
	// As long as debug info is modeled with instructions, we have to ensure we
	// have a place to insert here and write the stop point here.
	if (HaveInsertPoint())
	EmitStopPoint(&S);

	for (const auto *I : S.decls())
	EmitDecl(*I);
	}

	void CodeGenFunction::EmitBreakStmt(const BreakStmt &S) {
	assert(!BreakContinueStack.empty() && "break stmt not in a loop or switch!");

	// If this code is reachable then emit a stop point (if generating
	// debug info). We have to do this ourselves because we are on the
	// "simple" statement path.
	if (HaveInsertPoint())
	EmitStopPoint(&S);

	EmitBranchThroughCleanup(BreakContinueStack.back().BreakBlock);
	}

	void CodeGenFunction::EmitContinueStmt(const ContinueStmt &S) {
	assert(!BreakContinueStack.empty() && "continue stmt not in a loop!");

	// If this code is reachable then emit a stop point (if generating
	// debug info). We have to do this ourselves because we are on the
	// "simple" statement path.
	if (HaveInsertPoint())
	EmitStopPoint(&S);

	EmitBranchThroughCleanup(BreakContinueStack.back().ContinueBlock);
	}

	/// EmitCaseStmtRange - If case statement range is not too big then
	/// add multiple cases to switch instruction, one for each value within
	/// the range. If range is too big then emit "if" condition check.
	void CodeGenFunction::EmitCaseStmtRange(const CaseStmt &S,
	ArrayRef<const Attr *> Attrs) {
	assert(S.getRHS() && "Expected RHS value in CaseStmt");

	llvm::APSInt LHS = S.getLHS()->EvaluateKnownConstInt(getContext());
	llvm::APSInt RHS = S.getRHS()->EvaluateKnownConstInt(getContext());

	// Emit the code for this case. We do this first to make sure it is
	// properly chained from our predecessor before generating the
	// switch machinery to enter this block.
	llvm::BasicBlock *CaseDest = createBasicBlock("sw.bb");
	EmitBlockWithFallThrough(CaseDest, &S);
	EmitStmt(S.getSubStmt());

	// If range is empty, do nothing.
	if (LHS.isSigned() ? RHS.slt(LHS) : RHS.ult(LHS))
	return;

	Stmt::Likelihood LH = Stmt::getLikelihood(Attrs);
	llvm::APInt Range = RHS - LHS;
	// FIXME: parameters such as this should not be hardcoded.
	if (Range.ult(llvm::APInt(Range.getBitWidth(), 64))) {
	// Range is small enough to add multiple switch instruction cases.
	uint64_t Total = getProfileCount(&S);
	unsigned NCases = Range.getZExtValue() + 1;
	// We only have one region counter for the entire set of cases here, so we
	// need to divide the weights evenly between the generated cases, ensuring
	// that the total weight is preserved. E.g., a weight of 5 over three cases
	// will be distributed as weights of 2, 2, and 1.
	uint64_t Weight = Total / NCases, Rem = Total % NCases;
	for (unsigned I = 0; I != NCases; ++I) {
	if (SwitchWeights)
	SwitchWeights->push_back(Weight + (Rem ? 1 : 0));
	else if (SwitchLikelihood)
	SwitchLikelihood->push_back(LH);

	if (Rem)
	Rem--;
	SwitchInsn->addCase(Builder.getInt(LHS), CaseDest);
	++LHS;
	}
	return;
	}

	// The range is too big. Emit "if" condition into a new block,
	// making sure to save and restore the current insertion point.
	llvm::BasicBlock *RestoreBB = Builder.GetInsertBlock();

	// Push this test onto the chain of range checks (which terminates
	// in the default basic block). The switch's default will be changed
	// to the top of this chain after switch emission is complete.
	llvm::BasicBlock *FalseDest = CaseRangeBlock;
	CaseRangeBlock = createBasicBlock("sw.caserange");

	CurFn->insert(CurFn->end(), CaseRangeBlock);
	Builder.SetInsertPoint(CaseRangeBlock);

	// Emit range check.
	llvm::Value *Diff =
	Builder.CreateSub(SwitchInsn->getCondition(), Builder.getInt(LHS));
	llvm::Value *Cond =
	Builder.CreateICmpULE(Diff, Builder.getInt(Range), "inbounds");

	llvm::MDNode *Weights = nullptr;
	if (SwitchWeights) {
	uint64_t ThisCount = getProfileCount(&S);
	uint64_t DefaultCount = (*SwitchWeights)[0];
	Weights = createProfileWeights(ThisCount, DefaultCount);

	// Since we're chaining the switch default through each large case range, we
	// need to update the weight for the default, ie, the first case, to include
	// this case.
	(*SwitchWeights)[0] += ThisCount;
	} else if (SwitchLikelihood)
	Cond = emitCondLikelihoodViaExpectIntrinsic(Cond, LH);

	Builder.CreateCondBr(Cond, CaseDest, FalseDest, Weights);

	// Restore the appropriate insertion point.
	if (RestoreBB)
	Builder.SetInsertPoint(RestoreBB);
	else
	Builder.ClearInsertionPoint();
	}

	void CodeGenFunction::EmitCaseStmt(const CaseStmt &S,
	ArrayRef<const Attr *> Attrs) {
	// If there is no enclosing switch instance that we're aware of, then this
	// case statement and its block can be elided. This situation only happens
	// when we've constant-folded the switch, are emitting the constant case,
	// and part of the constant case includes another case statement. For
	// instance: switch (4) { case 4: do { case 5: } while (1); }
	if (!SwitchInsn) {
	EmitStmt(S.getSubStmt());
	return;
	}

	// Handle case ranges.
	if (S.getRHS()) {
	EmitCaseStmtRange(S, Attrs);
	return;
	}

	llvm::ConstantInt *CaseVal =
	Builder.getInt(S.getLHS()->EvaluateKnownConstInt(getContext()));

	// Emit debuginfo for the case value if it is an enum value.
	const ConstantExpr *CE;
	if (auto ICE = dyn_cast<ImplicitCastExpr>(S.getLHS()))
	CE = dyn_cast<ConstantExpr>(ICE->getSubExpr());
	else
	CE = dyn_cast<ConstantExpr>(S.getLHS());
	if (CE) {
	if (auto DE = dyn_cast<DeclRefExpr>(CE->getSubExpr()))
	if (CGDebugInfo *Dbg = getDebugInfo())
	if (CGM.getCodeGenOpts().hasReducedDebugInfo())
	Dbg->EmitGlobalVariable(DE->getDecl(),
	APValue(llvm::APSInt(CaseVal->getValue())));
	}

	if (SwitchLikelihood)
	SwitchLikelihood->push_back(Stmt::getLikelihood(Attrs));

	// If the body of the case is just a 'break', try to not emit an empty block.
	// If we're profiling or we're not optimizing, leave the block in for better
	// debug and coverage analysis.
	if (!CGM.getCodeGenOpts().hasProfileClangInstr() &&
	CGM.getCodeGenOpts().OptimizationLevel > 0 &&
	isa<BreakStmt>(S.getSubStmt())) {
	JumpDest Block = BreakContinueStack.back().BreakBlock;

	// Only do this optimization if there are no cleanups that need emitting.
	if (isObviouslyBranchWithoutCleanups(Block)) {
	if (SwitchWeights)
	SwitchWeights->push_back(getProfileCount(&S));
	SwitchInsn->addCase(CaseVal, Block.getBlock());

	// If there was a fallthrough into this case, make sure to redirect it to
	// the end of the switch as well.
	if (Builder.GetInsertBlock()) {
	Builder.CreateBr(Block.getBlock());
	Builder.ClearInsertionPoint();
	}
	return;
	}
	}

	llvm::BasicBlock *CaseDest = createBasicBlock("sw.bb");
	EmitBlockWithFallThrough(CaseDest, &S);
	if (SwitchWeights)
	SwitchWeights->push_back(getProfileCount(&S));
	SwitchInsn->addCase(CaseVal, CaseDest);

	// Recursively emitting the statement is acceptable, but is not wonderful for
	// code where we have many case statements nested together, i.e.:
	// case 1:
	// case 2:
	// case 3: etc.
	// Handling this recursively will create a new block for each case statement
	// that falls through to the next case which is IR intensive. It also causes
	// deep recursion which can run into stack depth limitations. Handle
	// sequential non-range case statements specially.
	//
	// TODO When the next case has a likelihood attribute the code returns to the
	// recursive algorithm. Maybe improve this case if it becomes common practice
	// to use a lot of attributes.
	const CaseStmt *CurCase = &S;
	const CaseStmt *NextCase = dyn_cast<CaseStmt>(S.getSubStmt());

	// Otherwise, iteratively add consecutive cases to this switch stmt.
	while (NextCase && NextCase->getRHS() == nullptr) {
	CurCase = NextCase;
	llvm::ConstantInt *CaseVal =
	Builder.getInt(CurCase->getLHS()->EvaluateKnownConstInt(getContext()));

	if (SwitchWeights)
	SwitchWeights->push_back(getProfileCount(NextCase));
	if (CGM.getCodeGenOpts().hasProfileClangInstr()) {
	CaseDest = createBasicBlock("sw.bb");
	EmitBlockWithFallThrough(CaseDest, CurCase);
	}
	// Since this loop is only executed when the CaseStmt has no attributes
	// use a hard-coded value.
	if (SwitchLikelihood)
	SwitchLikelihood->push_back(Stmt::LH_None);

	SwitchInsn->addCase(CaseVal, CaseDest);
	NextCase = dyn_cast<CaseStmt>(CurCase->getSubStmt());
	}

	// Generate a stop point for debug info if the case statement is
	// followed by a default statement. A fallthrough case before a
	// default case gets its own branch target.
	if (CurCase->getSubStmt()->getStmtClass() == Stmt::DefaultStmtClass)
	EmitStopPoint(CurCase);

	// Normal default recursion for non-cases.
	EmitStmt(CurCase->getSubStmt());
	}

	void CodeGenFunction::EmitDefaultStmt(const DefaultStmt &S,
	ArrayRef<const Attr *> Attrs) {
	// If there is no enclosing switch instance that we're aware of, then this
	// default statement can be elided. This situation only happens when we've
	// constant-folded the switch.
	if (!SwitchInsn) {
	EmitStmt(S.getSubStmt());
	return;
	}

	llvm::BasicBlock *DefaultBlock = SwitchInsn->getDefaultDest();
	assert(DefaultBlock->empty() &&
	"EmitDefaultStmt: Default block already defined?");

	if (SwitchLikelihood)
	SwitchLikelihood->front() = Stmt::getLikelihood(Attrs);

	EmitBlockWithFallThrough(DefaultBlock, &S);

	EmitStmt(S.getSubStmt());
	}

	/// CollectStatementsForCase - Given the body of a 'switch' statement and a
	/// constant value that is being switched on, see if we can dead code eliminate
	/// the body of the switch to a simple series of statements to emit. Basically,
	/// on a switch (5) we want to find these statements:
	/// case 5:
	/// printf(...); <--
	/// ++i; <--
	/// break;
	///
	/// and add them to the ResultStmts vector. If it is unsafe to do this
	/// transformation (for example, one of the elided statements contains a label
	/// that might be jumped to), return CSFC_Failure. If we handled it and 'S'
	/// should include statements after it (e.g. the printf() line is a substmt of
	/// the case) then return CSFC_FallThrough. If we handled it and found a break
	/// statement, then return CSFC_Success.
	///
	/// If Case is non-null, then we are looking for the specified case, checking
	/// that nothing we jump over contains labels. If Case is null, then we found
	/// the case and are looking for the break.
	///
	/// If the recursive walk actually finds our Case, then we set FoundCase to
	/// true.
	///
	enum CSFC_Result { CSFC_Failure, CSFC_FallThrough, CSFC_Success };
	static CSFC_Result CollectStatementsForCase(const Stmt *S,
	const SwitchCase *Case,
	bool &FoundCase,
	SmallVectorImpl<const Stmt*> &ResultStmts) {
	// If this is a null statement, just succeed.
	if (!S)
	return Case ? CSFC_Success : CSFC_FallThrough;

	// If this is the switchcase (case 4: or default) that we're looking for, then
	// we're in business. Just add the substatement.
	if (const SwitchCase *SC = dyn_cast<SwitchCase>(S)) {
	if (S == Case) {
	FoundCase = true;
	return CollectStatementsForCase(SC->getSubStmt(), nullptr, FoundCase,
	ResultStmts);
	}

	// Otherwise, this is some other case or default statement, just ignore it.
	return CollectStatementsForCase(SC->getSubStmt(), Case, FoundCase,
	ResultStmts);
	}

	// If we are in the live part of the code and we found our break statement,
	// return a success!
	if (!Case && isa<BreakStmt>(S))
	return CSFC_Success;

	// If this is a switch statement, then it might contain the SwitchCase, the
	// break, or neither.
	if (const CompoundStmt *CS = dyn_cast<CompoundStmt>(S)) {
	// Handle this as two cases: we might be looking for the SwitchCase (if so
	// the skipped statements must be skippable) or we might already have it.
	CompoundStmt::const_body_iterator I = CS->body_begin(), E = CS->body_end();
	bool StartedInLiveCode = FoundCase;
	unsigned StartSize = ResultStmts.size();

	// If we've not found the case yet, scan through looking for it.
	if (Case) {
	// Keep track of whether we see a skipped declaration. The code could be
	// using the declaration even if it is skipped, so we can't optimize out
	// the decl if the kept statements might refer to it.
	bool HadSkippedDecl = false;

	// If we're looking for the case, just see if we can skip each of the
	// substatements.
	for (; Case && I != E; ++I) {
	HadSkippedDecl \|= CodeGenFunction::mightAddDeclToScope(*I);

	switch (CollectStatementsForCase(*I, Case, FoundCase, ResultStmts)) {
	case CSFC_Failure: return CSFC_Failure;
	case CSFC_Success:
	// A successful result means that either 1) that the statement doesn't
	// have the case and is skippable, or 2) does contain the case value
	// and also contains the break to exit the switch. In the later case,
	// we just verify the rest of the statements are elidable.
	if (FoundCase) {
	// If we found the case and skipped declarations, we can't do the
	// optimization.
	if (HadSkippedDecl)
	return CSFC_Failure;

	for (++I; I != E; ++I)
	if (CodeGenFunction::ContainsLabel(*I, true))
	return CSFC_Failure;
	return CSFC_Success;
	}
	break;
	case CSFC_FallThrough:
	// If we have a fallthrough condition, then we must have found the
	// case started to include statements. Consider the rest of the
	// statements in the compound statement as candidates for inclusion.
	assert(FoundCase && "Didn't find case but returned fallthrough?");
	// We recursively found Case, so we're not looking for it anymore.
	Case = nullptr;

	// If we found the case and skipped declarations, we can't do the
	// optimization.
	if (HadSkippedDecl)
	return CSFC_Failure;
	break;
	}
	}

	if (!FoundCase)
	return CSFC_Success;

	assert(!HadSkippedDecl && "fallthrough after skipping decl");
	}

	// If we have statements in our range, then we know that the statements are
	// live and need to be added to the set of statements we're tracking.
	bool AnyDecls = false;
	for (; I != E; ++I) {
	AnyDecls \|= CodeGenFunction::mightAddDeclToScope(*I);

	switch (CollectStatementsForCase(*I, nullptr, FoundCase, ResultStmts)) {
	case CSFC_Failure: return CSFC_Failure;
	case CSFC_FallThrough:
	// A fallthrough result means that the statement was simple and just
	// included in ResultStmt, keep adding them afterwards.
	break;
	case CSFC_Success:
	// A successful result means that we found the break statement and
	// stopped statement inclusion. We just ensure that any leftover stmts
	// are skippable and return success ourselves.
	for (++I; I != E; ++I)
	if (CodeGenFunction::ContainsLabel(*I, true))
	return CSFC_Failure;
	return CSFC_Success;
	}
	}

	// If we're about to fall out of a scope without hitting a 'break;', we
	// can't perform the optimization if there were any decls in that scope
	// (we'd lose their end-of-lifetime).
	if (AnyDecls) {
	// If the entire compound statement was live, there's one more thing we
	// can try before giving up: emit the whole thing as a single statement.
	// We can do that unless the statement contains a 'break;'.
	// FIXME: Such a break must be at the end of a construct within this one.
	// We could emit this by just ignoring the BreakStmts entirely.
	if (StartedInLiveCode && !CodeGenFunction::containsBreak(S)) {
	ResultStmts.resize(StartSize);
	ResultStmts.push_back(S);
	} else {
	return CSFC_Failure;
	}
	}

	return CSFC_FallThrough;
	}

	// Okay, this is some other statement that we don't handle explicitly, like a
	// for statement or increment etc. If we are skipping over this statement,
	// just verify it doesn't have labels, which would make it invalid to elide.
	if (Case) {
	if (CodeGenFunction::ContainsLabel(S, true))
	return CSFC_Failure;
	return CSFC_Success;
	}

	// Otherwise, we want to include this statement. Everything is cool with that
	// so long as it doesn't contain a break out of the switch we're in.
	if (CodeGenFunction::containsBreak(S)) return CSFC_Failure;

	// Otherwise, everything is great. Include the statement and tell the caller
	// that we fall through and include the next statement as well.
	ResultStmts.push_back(S);
	return CSFC_FallThrough;
	}

	/// FindCaseStatementsForValue - Find the case statement being jumped to and
	/// then invoke CollectStatementsForCase to find the list of statements to emit
	/// for a switch on constant. See the comment above CollectStatementsForCase
	/// for more details.
	static bool FindCaseStatementsForValue(const SwitchStmt &S,
	const llvm::APSInt &ConstantCondValue,
	SmallVectorImpl<const Stmt*> &ResultStmts,
	ASTContext &C,
	const SwitchCase *&ResultCase) {
	// First step, find the switch case that is being branched to. We can do this
	// efficiently by scanning the SwitchCase list.
	const SwitchCase *Case = S.getSwitchCaseList();
	const DefaultStmt *DefaultCase = nullptr;

	for (; Case; Case = Case->getNextSwitchCase()) {
	// It's either a default or case. Just remember the default statement in
	// case we're not jumping to any numbered cases.
	if (const DefaultStmt *DS = dyn_cast<DefaultStmt>(Case)) {
	DefaultCase = DS;
	continue;
	}

	// Check to see if this case is the one we're looking for.
	const CaseStmt *CS = cast<CaseStmt>(Case);
	// Don't handle case ranges yet.
	if (CS->getRHS()) return false;

	// If we found our case, remember it as 'case'.
	if (CS->getLHS()->EvaluateKnownConstInt(C) == ConstantCondValue)
	break;
	}

	// If we didn't find a matching case, we use a default if it exists, or we
	// elide the whole switch body!
	if (!Case) {
	// It is safe to elide the body of the switch if it doesn't contain labels
	// etc. If it is safe, return successfully with an empty ResultStmts list.
	if (!DefaultCase)
	return !CodeGenFunction::ContainsLabel(&S);
	Case = DefaultCase;
	}

	// Ok, we know which case is being jumped to, try to collect all the
	// statements that follow it. This can fail for a variety of reasons. Also,
	// check to see that the recursive walk actually found our case statement.
	// Insane cases like this can fail to find it in the recursive walk since we
	// don't handle every stmt kind:
	// switch (4) {
	// while (1) {
	// case 4: ...
	bool FoundCase = false;
	ResultCase = Case;
	return CollectStatementsForCase(S.getBody(), Case, FoundCase,
	ResultStmts) != CSFC_Failure &&
	FoundCase;
	}

	static std::optional<SmallVector<uint64_t, 16>>
	getLikelihoodWeights(ArrayRef<Stmt::Likelihood> Likelihoods) {
	// Are there enough branches to weight them?
	if (Likelihoods.size() <= 1)
	return std::nullopt;

	uint64_t NumUnlikely = 0;
	uint64_t NumNone = 0;
	uint64_t NumLikely = 0;
	for (const auto LH : Likelihoods) {
	switch (LH) {
	case Stmt::LH_Unlikely:
	++NumUnlikely;
	break;
	case Stmt::LH_None:
	++NumNone;
	break;
	case Stmt::LH_Likely:
	++NumLikely;
	break;
	}
	}

	// Is there a likelihood attribute used?
	if (NumUnlikely == 0 && NumLikely == 0)
	return std::nullopt;

	// When multiple cases share the same code they can be combined during
	// optimization. In that case the weights of the branch will be the sum of
	// the individual weights. Make sure the combined sum of all neutral cases
	// doesn't exceed the value of a single likely attribute.
	// The additions both avoid divisions by 0 and make sure the weights of None
	// don't exceed the weight of Likely.
	const uint64_t Likely = INT32_MAX / (NumLikely + 2);
	const uint64_t None = Likely / (NumNone + 1);
	const uint64_t Unlikely = 0;

	SmallVector<uint64_t, 16> Result;
	Result.reserve(Likelihoods.size());
	for (const auto LH : Likelihoods) {
	switch (LH) {
	case Stmt::LH_Unlikely:
	Result.push_back(Unlikely);
	break;
	case Stmt::LH_None:
	Result.push_back(None);
	break;
	case Stmt::LH_Likely:
	Result.push_back(Likely);
	break;
	}
	}

	return Result;
	}

	void CodeGenFunction::EmitSwitchStmt(const SwitchStmt &S) {
	// Handle nested switch statements.
	llvm::SwitchInst *SavedSwitchInsn = SwitchInsn;
	SmallVector<uint64_t, 16> *SavedSwitchWeights = SwitchWeights;
	SmallVector<Stmt::Likelihood, 16> *SavedSwitchLikelihood = SwitchLikelihood;
	llvm::BasicBlock *SavedCRBlock = CaseRangeBlock;

	// See if we can constant fold the condition of the switch and therefore only
	// emit the live case statement (if any) of the switch.
	llvm::APSInt ConstantCondValue;
	if (ConstantFoldsToSimpleInteger(S.getCond(), ConstantCondValue)) {
	SmallVector<const Stmt*, 4> CaseStmts;
	const SwitchCase *Case = nullptr;
	if (FindCaseStatementsForValue(S, ConstantCondValue, CaseStmts,
	getContext(), Case)) {
	if (Case)
	incrementProfileCounter(Case);
	RunCleanupsScope ExecutedScope(*this);

	if (S.getInit())
	EmitStmt(S.getInit());

	// Emit the condition variable if needed inside the entire cleanup scope
	// used by this special case for constant folded switches.
	if (S.getConditionVariable())
	EmitDecl(*S.getConditionVariable());

	// At this point, we are no longer "within" a switch instance, so
	// we can temporarily enforce this to ensure that any embedded case
	// statements are not emitted.
	SwitchInsn = nullptr;

	// Okay, we can dead code eliminate everything except this case. Emit the
	// specified series of statements and we're good.
	for (unsigned i = 0, e = CaseStmts.size(); i != e; ++i)
	EmitStmt(CaseStmts[i]);
	incrementProfileCounter(&S);

	// Now we want to restore the saved switch instance so that nested
	// switches continue to function properly
	SwitchInsn = SavedSwitchInsn;

	return;
	}
	}

	JumpDest SwitchExit = getJumpDestInCurrentScope("sw.epilog");

	RunCleanupsScope ConditionScope(*this);

	if (S.getInit())
	EmitStmt(S.getInit());

	if (S.getConditionVariable())
	EmitDecl(*S.getConditionVariable());
	llvm::Value *CondV = EmitScalarExpr(S.getCond());

	// Create basic block to hold stuff that comes after switch
	// statement. We also need to create a default block now so that
	// explicit case ranges tests can have a place to jump to on
	// failure.
	llvm::BasicBlock *DefaultBlock = createBasicBlock("sw.default");
	SwitchInsn = Builder.CreateSwitch(CondV, DefaultBlock);
	if (PGO.haveRegionCounts()) {
	// Walk the SwitchCase list to find how many there are.
	uint64_t DefaultCount = 0;
	unsigned NumCases = 0;
	for (const SwitchCase *Case = S.getSwitchCaseList();
	Case;
	Case = Case->getNextSwitchCase()) {
	if (isa<DefaultStmt>(Case))
	DefaultCount = getProfileCount(Case);
	NumCases += 1;
	}
	SwitchWeights = new SmallVector<uint64_t, 16>();
	SwitchWeights->reserve(NumCases);
	// The default needs to be first. We store the edge count, so we already
	// know the right weight.
	SwitchWeights->push_back(DefaultCount);
	} else if (CGM.getCodeGenOpts().OptimizationLevel) {
	SwitchLikelihood = new SmallVector<Stmt::Likelihood, 16>();
	// Initialize the default case.
	SwitchLikelihood->push_back(Stmt::LH_None);
	}

	CaseRangeBlock = DefaultBlock;

	// Clear the insertion point to indicate we are in unreachable code.
	Builder.ClearInsertionPoint();

	// All break statements jump to NextBlock. If BreakContinueStack is non-empty
	// then reuse last ContinueBlock.
	JumpDest OuterContinue;
	if (!BreakContinueStack.empty())
	OuterContinue = BreakContinueStack.back().ContinueBlock;

	BreakContinueStack.push_back(BreakContinue(SwitchExit, OuterContinue));

	// Emit switch body.
	EmitStmt(S.getBody());

	BreakContinueStack.pop_back();

	// Update the default block in case explicit case range tests have
	// been chained on top.
	SwitchInsn->setDefaultDest(CaseRangeBlock);

	// If a default was never emitted:
	if (!DefaultBlock->getParent()) {
	// If we have cleanups, emit the default block so that there's a
	// place to jump through the cleanups from.
	if (ConditionScope.requiresCleanups()) {
	EmitBlock(DefaultBlock);

	// Otherwise, just forward the default block to the switch end.
	} else {
	DefaultBlock->replaceAllUsesWith(SwitchExit.getBlock());
	delete DefaultBlock;
	}
	}

	ConditionScope.ForceCleanup();

	// Emit continuation.
	EmitBlock(SwitchExit.getBlock(), true);
	incrementProfileCounter(&S);

	// If the switch has a condition wrapped by __builtin_unpredictable,
	// create metadata that specifies that the switch is unpredictable.
	// Don't bother if not optimizing because that metadata would not be used.
	auto *Call = dyn_cast<CallExpr>(S.getCond());
	if (Call && CGM.getCodeGenOpts().OptimizationLevel != 0) {
	auto *FD = dyn_cast_or_null<FunctionDecl>(Call->getCalleeDecl());
	if (FD && FD->getBuiltinID() == Builtin::BI__builtin_unpredictable) {
	llvm::MDBuilder MDHelper(getLLVMContext());
	SwitchInsn->setMetadata(llvm::LLVMContext::MD_unpredictable,
	MDHelper.createUnpredictable());
	}
	}

	if (SwitchWeights) {
	assert(SwitchWeights->size() == 1 + SwitchInsn->getNumCases() &&
	"switch weights do not match switch cases");
	// If there's only one jump destination there's no sense weighting it.
	if (SwitchWeights->size() > 1)
	SwitchInsn->setMetadata(llvm::LLVMContext::MD_prof,
	createProfileWeights(*SwitchWeights));
	delete SwitchWeights;
	} else if (SwitchLikelihood) {
	assert(SwitchLikelihood->size() == 1 + SwitchInsn->getNumCases() &&
	"switch likelihoods do not match switch cases");
	std::optional<SmallVector<uint64_t, 16>> LHW =
	getLikelihoodWeights(*SwitchLikelihood);
	if (LHW) {
	llvm::MDBuilder MDHelper(CGM.getLLVMContext());
	SwitchInsn->setMetadata(llvm::LLVMContext::MD_prof,
	createProfileWeights(*LHW));
	}
	delete SwitchLikelihood;
	}
	SwitchInsn = SavedSwitchInsn;
	SwitchWeights = SavedSwitchWeights;
	SwitchLikelihood = SavedSwitchLikelihood;
	CaseRangeBlock = SavedCRBlock;
	}

	static std::string
	SimplifyConstraint(const char *Constraint, const TargetInfo &Target,
	SmallVectorImpl<TargetInfo::ConstraintInfo> *OutCons=nullptr) {
	std::string Result;

	while (*Constraint) {
	switch (*Constraint) {
	default:
	Result += Target.convertConstraint(Constraint);
	break;
	// Ignore these
	case '*':
	case '?':
	case '!':
	case '=': // Will see this and the following in mult-alt constraints.
	case '+':
	break;
	case '#': // Ignore the rest of the constraint alternative.
	while (Constraint[1] && Constraint[1] != ',')
	Constraint++;
	break;
	case '&':
	case '%':
	Result += *Constraint;
	while (Constraint[1] && Constraint[1] == *Constraint)
	Constraint++;
	break;
	case ',':
	Result += "\|";
	break;
	case 'g':
	Result += "imr";
	break;
	case '[': {
	assert(OutCons &&
	"Must pass output names to constraints with a symbolic name");
	unsigned Index;
	bool result = Target.resolveSymbolicName(Constraint, *OutCons, Index);
	assert(result && "Could not resolve symbolic name"); (void)result;
	Result += llvm::utostr(Index);
	break;
	}
	}

	Constraint++;
	}

	return Result;
	}

	/// AddVariableConstraints - Look at AsmExpr and if it is a variable declared
	/// as using a particular register add that as a constraint that will be used
	/// in this asm stmt.
	static std::string
	AddVariableConstraints(const std::string &Constraint, const Expr &AsmExpr,
	const TargetInfo &Target, CodeGenModule &CGM,
	const AsmStmt &Stmt, const bool EarlyClobber,
	std::string *GCCReg = nullptr) {
	const DeclRefExpr *AsmDeclRef = dyn_cast<DeclRefExpr>(&AsmExpr);
	if (!AsmDeclRef)
	return Constraint;
	const ValueDecl &Value = *AsmDeclRef->getDecl();
	const VarDecl *Variable = dyn_cast<VarDecl>(&Value);
	if (!Variable)
	return Constraint;
	if (Variable->getStorageClass() != SC_Register)
	return Constraint;
	AsmLabelAttr *Attr = Variable->getAttr<AsmLabelAttr>();
	if (!Attr)
	return Constraint;
	StringRef Register = Attr->getLabel();
	assert(Target.isValidGCCRegisterName(Register));
	// We're using validateOutputConstraint here because we only care if
	// this is a register constraint.
	TargetInfo::ConstraintInfo Info(Constraint, "");
	if (Target.validateOutputConstraint(Info) &&
	!Info.allowsRegister()) {
	CGM.ErrorUnsupported(&Stmt, "__asm__");
	return Constraint;
	}
	// Canonicalize the register here before returning it.
	Register = Target.getNormalizedGCCRegisterName(Register);
	if (GCCReg != nullptr)
	*GCCReg = Register.str();
	return (EarlyClobber ? "&{" : "{") + Register.str() + "}";
	}

	std::pair<llvm::Value, llvm::Type > CodeGenFunction::EmitAsmInputLValue(
	const TargetInfo::ConstraintInfo &Info, LValue InputValue,
	QualType InputType, std::string &ConstraintStr, SourceLocation Loc) {
	if (Info.allowsRegister() \|\| !Info.allowsMemory()) {
	if (CodeGenFunction::hasScalarEvaluationKind(InputType))
	return {EmitLoadOfLValue(InputValue, Loc).getScalarVal(), nullptr};

	llvm::Type *Ty = ConvertType(InputType);
	uint64_t Size = CGM.getDataLayout().getTypeSizeInBits(Ty);
	if ((Size <= 64 && llvm::isPowerOf2_64(Size)) \|\|
	getTargetHooks().isScalarizableAsmOperand(*this, Ty)) {
	Ty = llvm::IntegerType::get(getLLVMContext(), Size);

	return {Builder.CreateLoad(InputValue.getAddress().withElementType(Ty)),
	nullptr};
	}
	}

	Address Addr = InputValue.getAddress();
	ConstraintStr += '*';
	return {InputValue.getPointer(*this), Addr.getElementType()};
	}

	std::pair<llvm::Value , llvm::Type >
	CodeGenFunction::EmitAsmInput(const TargetInfo::ConstraintInfo &Info,
	const Expr *InputExpr,
	std::string &ConstraintStr) {
	// If this can't be a register or memory, i.e., has to be a constant
	// (immediate or symbolic), try to emit it as such.
	if (!Info.allowsRegister() && !Info.allowsMemory()) {
	if (Info.requiresImmediateConstant()) {
	Expr::EvalResult EVResult;
	InputExpr->EvaluateAsRValue(EVResult, getContext(), true);

	llvm::APSInt IntResult;
	if (EVResult.Val.toIntegralConstant(IntResult, InputExpr->getType(),
	getContext()))
	return {llvm::ConstantInt::get(getLLVMContext(), IntResult), nullptr};
	}

	Expr::EvalResult Result;
	if (InputExpr->EvaluateAsInt(Result, getContext()))
	return {llvm::ConstantInt::get(getLLVMContext(), Result.Val.getInt()),
	nullptr};
	}

	if (Info.allowsRegister() \|\| !Info.allowsMemory())
	if (CodeGenFunction::hasScalarEvaluationKind(InputExpr->getType()))
	return {EmitScalarExpr(InputExpr), nullptr};
	if (InputExpr->getStmtClass() == Expr::CXXThisExprClass)
	return {EmitScalarExpr(InputExpr), nullptr};
	InputExpr = InputExpr->IgnoreParenNoopCasts(getContext());
	LValue Dest = EmitLValue(InputExpr);
	return EmitAsmInputLValue(Info, Dest, InputExpr->getType(), ConstraintStr,
	InputExpr->getExprLoc());
	}

	/// getAsmSrcLocInfo - Return the !srcloc metadata node to attach to an inline
	/// asm call instruction. The !srcloc MDNode contains a list of constant
	/// integers which are the source locations of the start of each line in the
	/// asm.
	static llvm::MDNode getAsmSrcLocInfo(const StringLiteral Str,
	CodeGenFunction &CGF) {
	SmallVector<llvm::Metadata *, 8> Locs;
	// Add the location of the first line to the MDNode.
	Locs.push_back(llvm::ConstantAsMetadata::get(llvm::ConstantInt::get(
	CGF.Int64Ty, Str->getBeginLoc().getRawEncoding())));
	StringRef StrVal = Str->getString();
	if (!StrVal.empty()) {
	const SourceManager &SM = CGF.CGM.getContext().getSourceManager();
	const LangOptions &LangOpts = CGF.CGM.getLangOpts();
	unsigned StartToken = 0;
	unsigned ByteOffset = 0;

	// Add the location of the start of each subsequent line of the asm to the
	// MDNode.
	for (unsigned i = 0, e = StrVal.size() - 1; i != e; ++i) {
	if (StrVal[i] != '\n') continue;
	SourceLocation LineLoc = Str->getLocationOfByte(
	i + 1, SM, LangOpts, CGF.getTarget(), &StartToken, &ByteOffset);
	Locs.push_back(llvm::ConstantAsMetadata::get(
	llvm::ConstantInt::get(CGF.Int64Ty, LineLoc.getRawEncoding())));
	}
	}

	return llvm::MDNode::get(CGF.getLLVMContext(), Locs);
	}

	static void UpdateAsmCallInst(llvm::CallBase &Result, bool HasSideEffect,
	bool HasUnwindClobber, bool ReadOnly,
	bool ReadNone, bool NoMerge, const AsmStmt &S,
	const std::vector<llvm::Type *> &ResultRegTypes,
	const std::vector<llvm::Type *> &ArgElemTypes,
	CodeGenFunction &CGF,
	std::vector<llvm::Value *> &RegResults) {
	if (!HasUnwindClobber)
	Result.addFnAttr(llvm::Attribute::NoUnwind);

	if (NoMerge)
	Result.addFnAttr(llvm::Attribute::NoMerge);
	// Attach readnone and readonly attributes.
	if (!HasSideEffect) {
	if (ReadNone)
	Result.setDoesNotAccessMemory();
	else if (ReadOnly)
	Result.setOnlyReadsMemory();
	}

	// Add elementtype attribute for indirect constraints.
	for (auto Pair : llvm::enumerate(ArgElemTypes)) {
	if (Pair.value()) {
	auto Attr = llvm::Attribute::get(
	CGF.getLLVMContext(), llvm::Attribute::ElementType, Pair.value());
	Result.addParamAttr(Pair.index(), Attr);
	}
	}

	// Slap the source location of the inline asm into a !srcloc metadata on the
	// call.
	if (const auto *gccAsmStmt = dyn_cast<GCCAsmStmt>(&S))
	Result.setMetadata("srcloc",
	getAsmSrcLocInfo(gccAsmStmt->getAsmString(), CGF));
	else {
	// At least put the line number on MS inline asm blobs.
	llvm::Constant *Loc =
	llvm::ConstantInt::get(CGF.Int64Ty, S.getAsmLoc().getRawEncoding());
	Result.setMetadata("srcloc",
	llvm::MDNode::get(CGF.getLLVMContext(),
	llvm::ConstantAsMetadata::get(Loc)));
	}

	if (CGF.getLangOpts().assumeFunctionsAreConvergent())
	// Conservatively, mark all inline asm blocks in CUDA or OpenCL as
	// convergent (meaning, they may call an intrinsically convergent op, such
	// as bar.sync, and so can't have certain optimizations applied around
	// them).
	Result.addFnAttr(llvm::Attribute::Convergent);
	// Extract all of the register value results from the asm.
	if (ResultRegTypes.size() == 1) {
	RegResults.push_back(&Result);
	} else {
	for (unsigned i = 0, e = ResultRegTypes.size(); i != e; ++i) {
	llvm::Value *Tmp = CGF.Builder.CreateExtractValue(&Result, i, "asmresult");
	RegResults.push_back(Tmp);
	}
	}
	}

	static void
	EmitAsmStores(CodeGenFunction &CGF, const AsmStmt &S,
	const llvm::ArrayRef<llvm::Value *> RegResults,
	const llvm::ArrayRef<llvm::Type *> ResultRegTypes,
	const llvm::ArrayRef<llvm::Type *> ResultTruncRegTypes,
	const llvm::ArrayRef<LValue> ResultRegDests,
	const llvm::ArrayRef<QualType> ResultRegQualTys,
	const llvm::BitVector &ResultTypeRequiresCast,
	const llvm::BitVector &ResultRegIsFlagReg) {
	CGBuilderTy &Builder = CGF.Builder;
	CodeGenModule &CGM = CGF.CGM;
	llvm::LLVMContext &CTX = CGF.getLLVMContext();

	assert(RegResults.size() == ResultRegTypes.size());
	assert(RegResults.size() == ResultTruncRegTypes.size());
	assert(RegResults.size() == ResultRegDests.size());
	// ResultRegDests can be also populated by addReturnRegisterOutputs() above,
	// in which case its size may grow.
	assert(ResultTypeRequiresCast.size() <= ResultRegDests.size());
	assert(ResultRegIsFlagReg.size() <= ResultRegDests.size());

	for (unsigned i = 0, e = RegResults.size(); i != e; ++i) {
	llvm::Value *Tmp = RegResults[i];
	llvm::Type *TruncTy = ResultTruncRegTypes[i];

	if ((i < ResultRegIsFlagReg.size()) && ResultRegIsFlagReg[i]) {
	// Target must guarantee the Value `Tmp` here is lowered to a boolean
	// value.
	llvm::Constant *Two = llvm::ConstantInt::get(Tmp->getType(), 2);
	llvm::Value *IsBooleanValue =
	Builder.CreateCmp(llvm::CmpInst::ICMP_ULT, Tmp, Two);
	llvm::Function *FnAssume = CGM.getIntrinsic(llvm::Intrinsic::assume);
	Builder.CreateCall(FnAssume, IsBooleanValue);
	}

	// If the result type of the LLVM IR asm doesn't match the result type of
	// the expression, do the conversion.
	if (ResultRegTypes[i] != TruncTy) {

	// Truncate the integer result to the right size, note that TruncTy can be
	// a pointer.
	if (TruncTy->isFloatingPointTy())
	Tmp = Builder.CreateFPTrunc(Tmp, TruncTy);
	else if (TruncTy->isPointerTy() && Tmp->getType()->isIntegerTy()) {
	uint64_t ResSize = CGM.getDataLayout().getTypeSizeInBits(TruncTy);
	Tmp = Builder.CreateTrunc(
	Tmp, llvm::IntegerType::get(CTX, (unsigned)ResSize));
	Tmp = Builder.CreateIntToPtr(Tmp, TruncTy);
	} else if (Tmp->getType()->isPointerTy() && TruncTy->isIntegerTy()) {
	uint64_t TmpSize =
	CGM.getDataLayout().getTypeSizeInBits(Tmp->getType());
	Tmp = Builder.CreatePtrToInt(
	Tmp, llvm::IntegerType::get(CTX, (unsigned)TmpSize));
	Tmp = Builder.CreateTrunc(Tmp, TruncTy);
	} else if (Tmp->getType()->isIntegerTy() && TruncTy->isIntegerTy()) {
	Tmp = Builder.CreateZExtOrTrunc(Tmp, TruncTy);
	} else if (Tmp->getType()->isVectorTy() \|\| TruncTy->isVectorTy()) {
	Tmp = Builder.CreateBitCast(Tmp, TruncTy);
	}
	}

	LValue Dest = ResultRegDests[i];
	// ResultTypeRequiresCast elements correspond to the first
	// ResultTypeRequiresCast.size() elements of RegResults.
	if ((i < ResultTypeRequiresCast.size()) && ResultTypeRequiresCast[i]) {
	unsigned Size = CGF.getContext().getTypeSize(ResultRegQualTys[i]);
	Address A = Dest.getAddress().withElementType(ResultRegTypes[i]);
	if (CGF.getTargetHooks().isScalarizableAsmOperand(CGF, TruncTy)) {
	Builder.CreateStore(Tmp, A);
	continue;
	}

	QualType Ty =
	CGF.getContext().getIntTypeForBitwidth(Size, /Signed=/false);
	if (Ty.isNull()) {
	const Expr *OutExpr = S.getOutputExpr(i);
	CGM.getDiags().Report(OutExpr->getExprLoc(),
	diag::err_store_value_to_reg);
	return;
	}
	Dest = CGF.MakeAddrLValue(A, Ty);
	}
	CGF.EmitStoreThroughLValue(RValue::get(Tmp), Dest);
	}
	}

	static void EmitHipStdParUnsupportedAsm(CodeGenFunction *CGF,
	const AsmStmt &S) {
	constexpr auto Name = "__ASM__hipstdpar_unsupported";

	StringRef Asm;
	if (auto GCCAsm = dyn_cast<GCCAsmStmt>(&S))
	Asm = GCCAsm->getAsmString()->getString();

	auto &Ctx = CGF->CGM.getLLVMContext();

	auto StrTy = llvm::ConstantDataArray::getString(Ctx, Asm);
	auto FnTy = llvm::FunctionType::get(llvm::Type::getVoidTy(Ctx),
	{StrTy->getType()}, false);
	auto UBF = CGF->CGM.getModule().getOrInsertFunction(Name, FnTy);

	CGF->Builder.CreateCall(UBF, {StrTy});
	}

	void CodeGenFunction::EmitAsmStmt(const AsmStmt &S) {
	// Pop all cleanup blocks at the end of the asm statement.
	CodeGenFunction::RunCleanupsScope Cleanups(*this);

	// Assemble the final asm string.
	std::string AsmString = S.generateAsmString(getContext());

	// Get all the output and input constraints together.
	SmallVector<TargetInfo::ConstraintInfo, 4> OutputConstraintInfos;
	SmallVector<TargetInfo::ConstraintInfo, 4> InputConstraintInfos;

	bool IsHipStdPar = getLangOpts().HIPStdPar && getLangOpts().CUDAIsDevice;
	bool IsValidTargetAsm = true;
	for (unsigned i = 0, e = S.getNumOutputs(); i != e && IsValidTargetAsm; i++) {
	StringRef Name;
	if (const GCCAsmStmt *GAS = dyn_cast<GCCAsmStmt>(&S))
	Name = GAS->getOutputName(i);
	TargetInfo::ConstraintInfo Info(S.getOutputConstraint(i), Name);
	bool IsValid = getTarget().validateOutputConstraint(Info); (void)IsValid;
	if (IsHipStdPar && !IsValid)
	IsValidTargetAsm = false;
	else
	assert(IsValid && "Failed to parse output constraint");
	OutputConstraintInfos.push_back(Info);
	}

	for (unsigned i = 0, e = S.getNumInputs(); i != e && IsValidTargetAsm; i++) {
	StringRef Name;
	if (const GCCAsmStmt *GAS = dyn_cast<GCCAsmStmt>(&S))
	Name = GAS->getInputName(i);
	TargetInfo::ConstraintInfo Info(S.getInputConstraint(i), Name);
	bool IsValid =
	getTarget().validateInputConstraint(OutputConstraintInfos, Info);
	if (IsHipStdPar && !IsValid)
	IsValidTargetAsm = false;
	else
	assert(IsValid && "Failed to parse input constraint");
	InputConstraintInfos.push_back(Info);
	}

	if (!IsValidTargetAsm)
	return EmitHipStdParUnsupportedAsm(this, S);

	std::string Constraints;

	std::vector<LValue> ResultRegDests;
	std::vector<QualType> ResultRegQualTys;
	std::vector<llvm::Type *> ResultRegTypes;
	std::vector<llvm::Type *> ResultTruncRegTypes;
	std::vector<llvm::Type *> ArgTypes;
	std::vector<llvm::Type *> ArgElemTypes;
	std::vector<llvm::Value*> Args;
	llvm::BitVector ResultTypeRequiresCast;
	llvm::BitVector ResultRegIsFlagReg;

	// Keep track of inout constraints.
	std::string InOutConstraints;
	std::vector<llvm::Value*> InOutArgs;
	std::vector<llvm::Type*> InOutArgTypes;
	std::vector<llvm::Type*> InOutArgElemTypes;

	// Keep track of out constraints for tied input operand.
	std::vector<std::string> OutputConstraints;

	// Keep track of defined physregs.
	llvm::SmallSet<std::string, 8> PhysRegOutputs;

	// An inline asm can be marked readonly if it meets the following conditions:
	// - it doesn't have any sideeffects
	// - it doesn't clobber memory
	// - it doesn't return a value by-reference
	// It can be marked readnone if it doesn't have any input memory constraints
	// in addition to meeting the conditions listed above.
	bool ReadOnly = true, ReadNone = true;

	for (unsigned i = 0, e = S.getNumOutputs(); i != e; i++) {
	TargetInfo::ConstraintInfo &Info = OutputConstraintInfos[i];

	// Simplify the output constraint.
	std::string OutputConstraint(S.getOutputConstraint(i));
	OutputConstraint = SimplifyConstraint(OutputConstraint.c_str() + 1,
	getTarget(), &OutputConstraintInfos);

	const Expr *OutExpr = S.getOutputExpr(i);
	OutExpr = OutExpr->IgnoreParenNoopCasts(getContext());

	std::string GCCReg;
	OutputConstraint = AddVariableConstraints(OutputConstraint, *OutExpr,
	getTarget(), CGM, S,
	Info.earlyClobber(),
	&GCCReg);
	// Give an error on multiple outputs to same physreg.
	if (!GCCReg.empty() && !PhysRegOutputs.insert(GCCReg).second)
	CGM.Error(S.getAsmLoc(), "multiple outputs to hard register: " + GCCReg);

	OutputConstraints.push_back(OutputConstraint);
	LValue Dest = EmitLValue(OutExpr);
	if (!Constraints.empty())
	Constraints += ',';

	// If this is a register output, then make the inline asm return it
	// by-value. If this is a memory result, return the value by-reference.
	QualType QTy = OutExpr->getType();
	const bool IsScalarOrAggregate = hasScalarEvaluationKind(QTy) \|\|
	hasAggregateEvaluationKind(QTy);
	if (!Info.allowsMemory() && IsScalarOrAggregate) {

	Constraints += "=" + OutputConstraint;
	ResultRegQualTys.push_back(QTy);
	ResultRegDests.push_back(Dest);

	bool IsFlagReg = llvm::StringRef(OutputConstraint).starts_with("{@cc");
	ResultRegIsFlagReg.push_back(IsFlagReg);

	llvm::Type *Ty = ConvertTypeForMem(QTy);
	const bool RequiresCast = Info.allowsRegister() &&
	(getTargetHooks().isScalarizableAsmOperand(*this, Ty) \|\|
	Ty->isAggregateType());

	ResultTruncRegTypes.push_back(Ty);
	ResultTypeRequiresCast.push_back(RequiresCast);

	if (RequiresCast) {
	unsigned Size = getContext().getTypeSize(QTy);
	Ty = llvm::IntegerType::get(getLLVMContext(), Size);
	}
	ResultRegTypes.push_back(Ty);
	// If this output is tied to an input, and if the input is larger, then
	// we need to set the actual result type of the inline asm node to be the
	// same as the input type.
	if (Info.hasMatchingInput()) {
	unsigned InputNo;
	for (InputNo = 0; InputNo != S.getNumInputs(); ++InputNo) {
	TargetInfo::ConstraintInfo &Input = InputConstraintInfos[InputNo];
	if (Input.hasTiedOperand() && Input.getTiedOperand() == i)
	break;
	}
	assert(InputNo != S.getNumInputs() && "Didn't find matching input!");

	QualType InputTy = S.getInputExpr(InputNo)->getType();
	QualType OutputType = OutExpr->getType();

	uint64_t InputSize = getContext().getTypeSize(InputTy);
	if (getContext().getTypeSize(OutputType) < InputSize) {
	// Form the asm to return the value as a larger integer or fp type.
	ResultRegTypes.back() = ConvertType(InputTy);
	}
	}
	if (llvm::Type* AdjTy =
	getTargetHooks().adjustInlineAsmType(*this, OutputConstraint,
	ResultRegTypes.back()))
	ResultRegTypes.back() = AdjTy;
	else {
	CGM.getDiags().Report(S.getAsmLoc(),
	diag::err_asm_invalid_type_in_input)
	<< OutExpr->getType() << OutputConstraint;
	}

	// Update largest vector width for any vector types.
	if (auto *VT = dyn_cast<llvm::VectorType>(ResultRegTypes.back()))
	LargestVectorWidth =
	std::max((uint64_t)LargestVectorWidth,
	VT->getPrimitiveSizeInBits().getKnownMinValue());
	} else {
	Address DestAddr = Dest.getAddress();
	// Matrix types in memory are represented by arrays, but accessed through
	// vector pointers, with the alignment specified on the access operation.
	// For inline assembly, update pointer arguments to use vector pointers.
	// Otherwise there will be a mis-match if the matrix is also an
	// input-argument which is represented as vector.
	if (isa<MatrixType>(OutExpr->getType().getCanonicalType()))
	DestAddr = DestAddr.withElementType(ConvertType(OutExpr->getType()));

	ArgTypes.push_back(DestAddr.getType());
	ArgElemTypes.push_back(DestAddr.getElementType());
	Args.push_back(DestAddr.emitRawPointer(*this));
	Constraints += "=*";
	Constraints += OutputConstraint;
	ReadOnly = ReadNone = false;
	}

	if (Info.isReadWrite()) {
	InOutConstraints += ',';

	const Expr *InputExpr = S.getOutputExpr(i);
	llvm::Value *Arg;
	llvm::Type *ArgElemType;
	std::tie(Arg, ArgElemType) = EmitAsmInputLValue(
	Info, Dest, InputExpr->getType(), InOutConstraints,
	InputExpr->getExprLoc());

	if (llvm::Type* AdjTy =
	getTargetHooks().adjustInlineAsmType(*this, OutputConstraint,
	Arg->getType()))
	Arg = Builder.CreateBitCast(Arg, AdjTy);

	// Update largest vector width for any vector types.
	if (auto *VT = dyn_cast<llvm::VectorType>(Arg->getType()))
	LargestVectorWidth =
	std::max((uint64_t)LargestVectorWidth,
	VT->getPrimitiveSizeInBits().getKnownMinValue());
	// Only tie earlyclobber physregs.
	if (Info.allowsRegister() && (GCCReg.empty() \|\| Info.earlyClobber()))
	InOutConstraints += llvm::utostr(i);
	else
	InOutConstraints += OutputConstraint;

	InOutArgTypes.push_back(Arg->getType());
	InOutArgElemTypes.push_back(ArgElemType);
	InOutArgs.push_back(Arg);
	}
	}

	// If this is a Microsoft-style asm blob, store the return registers (EAX:EDX)
	// to the return value slot. Only do this when returning in registers.
	if (isa<MSAsmStmt>(&S)) {
	const ABIArgInfo &RetAI = CurFnInfo->getReturnInfo();
	if (RetAI.isDirect() \|\| RetAI.isExtend()) {
	// Make a fake lvalue for the return value slot.
	LValue ReturnSlot = MakeAddrLValueWithoutTBAA(ReturnValue, FnRetTy);
	CGM.getTargetCodeGenInfo().addReturnRegisterOutputs(
	*this, ReturnSlot, Constraints, ResultRegTypes, ResultTruncRegTypes,
	ResultRegDests, AsmString, S.getNumOutputs());
	SawAsmBlock = true;
	}
	}

	for (unsigned i = 0, e = S.getNumInputs(); i != e; i++) {
	const Expr *InputExpr = S.getInputExpr(i);

	TargetInfo::ConstraintInfo &Info = InputConstraintInfos[i];

	if (Info.allowsMemory())
	ReadNone = false;

	if (!Constraints.empty())
	Constraints += ',';

	// Simplify the input constraint.
	std::string InputConstraint(S.getInputConstraint(i));
	InputConstraint = SimplifyConstraint(InputConstraint.c_str(), getTarget(),
	&OutputConstraintInfos);

	InputConstraint = AddVariableConstraints(
	InputConstraint, *InputExpr->IgnoreParenNoopCasts(getContext()),
	getTarget(), CGM, S, false /* No EarlyClobber */);

	std::string ReplaceConstraint (InputConstraint);
	llvm::Value *Arg;
	llvm::Type *ArgElemType;
	std::tie(Arg, ArgElemType) = EmitAsmInput(Info, InputExpr, Constraints);

	// If this input argument is tied to a larger output result, extend the
	// input to be the same size as the output. The LLVM backend wants to see
	// the input and output of a matching constraint be the same size. Note
	// that GCC does not define what the top bits are here. We use zext because
	// that is usually cheaper, but LLVM IR should really get an anyext someday.
	if (Info.hasTiedOperand()) {
	unsigned Output = Info.getTiedOperand();
	QualType OutputType = S.getOutputExpr(Output)->getType();
	QualType InputTy = InputExpr->getType();

	if (getContext().getTypeSize(OutputType) >
	getContext().getTypeSize(InputTy)) {
	// Use ptrtoint as appropriate so that we can do our extension.
	if (isa<llvm::PointerType>(Arg->getType()))
	Arg = Builder.CreatePtrToInt(Arg, IntPtrTy);
	llvm::Type *OutputTy = ConvertType(OutputType);
	if (isa<llvm::IntegerType>(OutputTy))
	Arg = Builder.CreateZExt(Arg, OutputTy);
	else if (isa<llvm::PointerType>(OutputTy))
	Arg = Builder.CreateZExt(Arg, IntPtrTy);
	else if (OutputTy->isFloatingPointTy())
	Arg = Builder.CreateFPExt(Arg, OutputTy);
	}
	// Deal with the tied operands' constraint code in adjustInlineAsmType.
	ReplaceConstraint = OutputConstraints[Output];
	}
	if (llvm::Type* AdjTy =
	getTargetHooks().adjustInlineAsmType(*this, ReplaceConstraint,
	Arg->getType()))
	Arg = Builder.CreateBitCast(Arg, AdjTy);
	else
	CGM.getDiags().Report(S.getAsmLoc(), diag::err_asm_invalid_type_in_input)
	<< InputExpr->getType() << InputConstraint;

	// Update largest vector width for any vector types.
	if (auto *VT = dyn_cast<llvm::VectorType>(Arg->getType()))
	LargestVectorWidth =
	std::max((uint64_t)LargestVectorWidth,
	VT->getPrimitiveSizeInBits().getKnownMinValue());

	ArgTypes.push_back(Arg->getType());
	ArgElemTypes.push_back(ArgElemType);
	Args.push_back(Arg);
	Constraints += InputConstraint;
	}

	// Append the "input" part of inout constraints.
	for (unsigned i = 0, e = InOutArgs.size(); i != e; i++) {
	ArgTypes.push_back(InOutArgTypes[i]);
	ArgElemTypes.push_back(InOutArgElemTypes[i]);
	Args.push_back(InOutArgs[i]);
	}
	Constraints += InOutConstraints;

	// Labels
	SmallVector<llvm::BasicBlock *, 16> Transfer;
	llvm::BasicBlock *Fallthrough = nullptr;
	bool IsGCCAsmGoto = false;
	if (const auto *GS = dyn_cast<GCCAsmStmt>(&S)) {
	IsGCCAsmGoto = GS->isAsmGoto();
	if (IsGCCAsmGoto) {
	for (const auto *E : GS->labels()) {
	JumpDest Dest = getJumpDestForLabel(E->getLabel());
	Transfer.push_back(Dest.getBlock());
	if (!Constraints.empty())
	Constraints += ',';
	Constraints += "!i";
	}
	Fallthrough = createBasicBlock("asm.fallthrough");
	}
	}

	bool HasUnwindClobber = false;

	// Clobbers
	for (unsigned i = 0, e = S.getNumClobbers(); i != e; i++) {
	StringRef Clobber = S.getClobber(i);

	if (Clobber == "memory")
	ReadOnly = ReadNone = false;
	else if (Clobber == "unwind") {
	HasUnwindClobber = true;
	continue;
	} else if (Clobber != "cc") {
	Clobber = getTarget().getNormalizedGCCRegisterName(Clobber);
	if (CGM.getCodeGenOpts().StackClashProtector &&
	getTarget().isSPRegName(Clobber)) {
	CGM.getDiags().Report(S.getAsmLoc(),
	diag::warn_stack_clash_protection_inline_asm);
	}
	}

	if (isa<MSAsmStmt>(&S)) {
	if (Clobber == "eax" \|\| Clobber == "edx") {
	if (Constraints.find("=&A") != std::string::npos)
	continue;
	std::string::size_type position1 =
	Constraints.find("={" + Clobber.str() + "}");
	if (position1 != std::string::npos) {
	Constraints.insert(position1 + 1, "&");
	continue;
	}
	std::string::size_type position2 = Constraints.find("=A");
	if (position2 != std::string::npos) {
	Constraints.insert(position2 + 1, "&");
	continue;
	}
	}
	}
	if (!Constraints.empty())
	Constraints += ',';

	Constraints += "~{";
	Constraints += Clobber;
	Constraints += '}';
	}

	assert(!(HasUnwindClobber && IsGCCAsmGoto) &&
	"unwind clobber can't be used with asm goto");

	// Add machine specific clobbers
	std::string_view MachineClobbers = getTarget().getClobbers();
	if (!MachineClobbers.empty()) {
	if (!Constraints.empty())
	Constraints += ',';
	Constraints += MachineClobbers;
	}

	llvm::Type *ResultType;
	if (ResultRegTypes.empty())
	ResultType = VoidTy;
	else if (ResultRegTypes.size() == 1)
	ResultType = ResultRegTypes[0];
	else
	ResultType = llvm::StructType::get(getLLVMContext(), ResultRegTypes);

	llvm::FunctionType *FTy =
	llvm::FunctionType::get(ResultType, ArgTypes, false);

	bool HasSideEffect = S.isVolatile() \|\| S.getNumOutputs() == 0;

	llvm::InlineAsm::AsmDialect GnuAsmDialect =
	CGM.getCodeGenOpts().getInlineAsmDialect() == CodeGenOptions::IAD_ATT
	? llvm::InlineAsm::AD_ATT
	: llvm::InlineAsm::AD_Intel;
	llvm::InlineAsm::AsmDialect AsmDialect = isa<MSAsmStmt>(&S) ?
	llvm::InlineAsm::AD_Intel : GnuAsmDialect;

	llvm::InlineAsm *IA = llvm::InlineAsm::get(
	FTy, AsmString, Constraints, HasSideEffect,
	/* IsAlignStack */ false, AsmDialect, HasUnwindClobber);
	std::vector<llvm::Value*> RegResults;
	llvm::CallBrInst *CBR;
	llvm::DenseMap<llvm::BasicBlock , SmallVector<llvm::Value , 4>>
	CBRRegResults;
	if (IsGCCAsmGoto) {
	CBR = Builder.CreateCallBr(IA, Fallthrough, Transfer, Args);
	EmitBlock(Fallthrough);
	UpdateAsmCallInst(*CBR, HasSideEffect, false, ReadOnly, ReadNone,
	InNoMergeAttributedStmt, S, ResultRegTypes, ArgElemTypes,
	*this, RegResults);
	// Because we are emitting code top to bottom, we don't have enough
	// information at this point to know precisely whether we have a critical
	// edge. If we have outputs, split all indirect destinations.
	if (!RegResults.empty()) {
	unsigned i = 0;
	for (llvm::BasicBlock *Dest : CBR->getIndirectDests()) {
	llvm::Twine SynthName = Dest->getName() + ".split";
	llvm::BasicBlock *SynthBB = createBasicBlock(SynthName);
	llvm::IRBuilderBase::InsertPointGuard IPG(Builder);
	Builder.SetInsertPoint(SynthBB);

	if (ResultRegTypes.size() == 1) {
	CBRRegResults[SynthBB].push_back(CBR);
	} else {
	for (unsigned j = 0, e = ResultRegTypes.size(); j != e; ++j) {
	llvm::Value *Tmp = Builder.CreateExtractValue(CBR, j, "asmresult");
	CBRRegResults[SynthBB].push_back(Tmp);
	}
	}

	EmitBranch(Dest);
	EmitBlock(SynthBB);
	CBR->setIndirectDest(i++, SynthBB);
	}
	}
	} else if (HasUnwindClobber) {
	llvm::CallBase *Result = EmitCallOrInvoke(IA, Args, "");
	UpdateAsmCallInst(*Result, HasSideEffect, true, ReadOnly, ReadNone,
	InNoMergeAttributedStmt, S, ResultRegTypes, ArgElemTypes,
	*this, RegResults);
	} else {
	llvm::CallInst *Result =
	Builder.CreateCall(IA, Args, getBundlesForFunclet(IA));
	UpdateAsmCallInst(*Result, HasSideEffect, false, ReadOnly, ReadNone,
	InNoMergeAttributedStmt, S, ResultRegTypes, ArgElemTypes,
	*this, RegResults);
	}

	EmitAsmStores(*this, S, RegResults, ResultRegTypes, ResultTruncRegTypes,
	ResultRegDests, ResultRegQualTys, ResultTypeRequiresCast,
	ResultRegIsFlagReg);

	// If this is an asm goto with outputs, repeat EmitAsmStores, but with a
	// different insertion point; one for each indirect destination and with
	// CBRRegResults rather than RegResults.
	if (IsGCCAsmGoto && !CBRRegResults.empty()) {
	for (llvm::BasicBlock *Succ : CBR->getIndirectDests()) {
	llvm::IRBuilderBase::InsertPointGuard IPG(Builder);
	Builder.SetInsertPoint(Succ, --(Succ->end()));
	EmitAsmStores(*this, S, CBRRegResults[Succ], ResultRegTypes,
	ResultTruncRegTypes, ResultRegDests, ResultRegQualTys,
	ResultTypeRequiresCast, ResultRegIsFlagReg);
	}
	}
	}

	LValue CodeGenFunction::InitCapturedStruct(const CapturedStmt &S) {
	const RecordDecl *RD = S.getCapturedRecordDecl();
	QualType RecordTy = getContext().getRecordType(RD);

	// Initialize the captured struct.
	LValue SlotLV =
	MakeAddrLValue(CreateMemTemp(RecordTy, "agg.captured"), RecordTy);

	RecordDecl::field_iterator CurField = RD->field_begin();
	for (CapturedStmt::const_capture_init_iterator I = S.capture_init_begin(),
	E = S.capture_init_end();
	I != E; ++I, ++CurField) {
	LValue LV = EmitLValueForFieldInitialization(SlotLV, *CurField);
	if (CurField->hasCapturedVLAType()) {
	EmitLambdaVLACapture(CurField->getCapturedVLAType(), LV);
	} else {
	EmitInitializerForField(CurField, LV, I);
	}
	}

	return SlotLV;
	}

	/// Generate an outlined function for the body of a CapturedStmt, store any
	/// captured variables into the captured struct, and call the outlined function.
	llvm::Function *
	CodeGenFunction::EmitCapturedStmt(const CapturedStmt &S, CapturedRegionKind K) {
	LValue CapStruct = InitCapturedStruct(S);

	// Emit the CapturedDecl
	CodeGenFunction CGF(CGM, true);
	CGCapturedStmtRAII CapInfoRAII(CGF, new CGCapturedStmtInfo(S, K));
	llvm::Function *F = CGF.GenerateCapturedStmtFunction(S);
	delete CGF.CapturedStmtInfo;

	// Emit call to the helper function.
	EmitCallOrInvoke(F, CapStruct.getPointer(*this));

	return F;
	}

	Address CodeGenFunction::GenerateCapturedStmtArgument(const CapturedStmt &S) {
	LValue CapStruct = InitCapturedStruct(S);
	return CapStruct.getAddress();
	}

	/// Creates the outlined function for a CapturedStmt.
	llvm::Function *
	CodeGenFunction::GenerateCapturedStmtFunction(const CapturedStmt &S) {
	assert(CapturedStmtInfo &&
	"CapturedStmtInfo should be set when generating the captured function");
	const CapturedDecl *CD = S.getCapturedDecl();
	const RecordDecl *RD = S.getCapturedRecordDecl();
	SourceLocation Loc = S.getBeginLoc();
	assert(CD->hasBody() && "missing CapturedDecl body");

	// Build the argument list.
	ASTContext &Ctx = CGM.getContext();
	FunctionArgList Args;
	Args.append(CD->param_begin(), CD->param_end());

	// Create the function declaration.
	const CGFunctionInfo &FuncInfo =
	CGM.getTypes().arrangeBuiltinFunctionDeclaration(Ctx.VoidTy, Args);
	llvm::FunctionType *FuncLLVMTy = CGM.getTypes().GetFunctionType(FuncInfo);

	llvm::Function *F =
	llvm::Function::Create(FuncLLVMTy, llvm::GlobalValue::InternalLinkage,
	CapturedStmtInfo->getHelperName(), &CGM.getModule());
	CGM.SetInternalFunctionAttributes(CD, F, FuncInfo);
	if (CD->isNothrow())
	F->addFnAttr(llvm::Attribute::NoUnwind);

	// Generate the function.
	StartFunction(CD, Ctx.VoidTy, F, FuncInfo, Args, CD->getLocation(),
	CD->getBody()->getBeginLoc());
	// Set the context parameter in CapturedStmtInfo.
	Address DeclPtr = GetAddrOfLocalVar(CD->getContextParam());
	CapturedStmtInfo->setContextValue(Builder.CreateLoad(DeclPtr));

	// Initialize variable-length arrays.
	LValue Base = MakeNaturalAlignRawAddrLValue(
	CapturedStmtInfo->getContextValue(), Ctx.getTagDeclType(RD));
	for (auto *FD : RD->fields()) {
	if (FD->hasCapturedVLAType()) {
	auto *ExprArg =
	EmitLoadOfLValue(EmitLValueForField(Base, FD), S.getBeginLoc())
	.getScalarVal();
	auto VAT = FD->getCapturedVLAType();
	VLASizeMap[VAT->getSizeExpr()] = ExprArg;
	}
	}

	// If 'this' is captured, load it into CXXThisValue.
	if (CapturedStmtInfo->isCXXThisExprCaptured()) {
	FieldDecl *FD = CapturedStmtInfo->getThisFieldDecl();
	LValue ThisLValue = EmitLValueForField(Base, FD);
	CXXThisValue = EmitLoadOfLValue(ThisLValue, Loc).getScalarVal();
	}

	PGO.assignRegionCounters(GlobalDecl(CD), F);
	CapturedStmtInfo->EmitBody(*this, CD->getBody());
	FinishFunction(CD->getBodyRBrace());

	return F;
	}

	namespace {
	// Returns the first convergence entry/loop/anchor instruction found in \|BB\|.
	// std::nullptr otherwise.
	llvm::IntrinsicInst getConvergenceToken(llvm::BasicBlock BB) {
	for (auto &I : *BB) {
	auto *II = dyn_cast<llvm::IntrinsicInst>(&I);
	if (II && llvm::isConvergenceControlIntrinsic(II->getIntrinsicID()))
	return II;
	}
	return nullptr;
	}

	} // namespace

	llvm::CallBase *
	CodeGenFunction::addConvergenceControlToken(llvm::CallBase *Input,
	llvm::Value *ParentToken) {
	llvm::Value *bundleArgs[] = {ParentToken};
	llvm::OperandBundleDef OB("convergencectrl", bundleArgs);
	auto Output = llvm::CallBase::addOperandBundle(
	Input, llvm::LLVMContext::OB_convergencectrl, OB, Input);
	Input->replaceAllUsesWith(Output);
	Input->eraseFromParent();
	return Output;
	}

	llvm::IntrinsicInst *
	CodeGenFunction::emitConvergenceLoopToken(llvm::BasicBlock *BB,
	llvm::Value *ParentToken) {
	CGBuilderTy::InsertPoint IP = Builder.saveIP();
	if (BB->empty())
	Builder.SetInsertPoint(BB);
	else
	Builder.SetInsertPoint(BB->getFirstInsertionPt());

	llvm::CallBase *CB = Builder.CreateIntrinsic(
	llvm::Intrinsic::experimental_convergence_loop, {}, {});
	Builder.restoreIP(IP);

	llvm::CallBase *I = addConvergenceControlToken(CB, ParentToken);
	return cast<llvm::IntrinsicInst>(I);
	}

	llvm::IntrinsicInst *
	CodeGenFunction::getOrEmitConvergenceEntryToken(llvm::Function *F) {
	llvm::BasicBlock *BB = &F->getEntryBlock();
	llvm::IntrinsicInst *Token = getConvergenceToken(BB);
	if (Token)
	return Token;

	// Adding a convergence token requires the function to be marked as
	// convergent.
	F->setConvergent();

	CGBuilderTy::InsertPoint IP = Builder.saveIP();
	Builder.SetInsertPoint(&BB->front());
	llvm::CallBase *I = Builder.CreateIntrinsic(
	llvm::Intrinsic::experimental_convergence_entry, {}, {});
	assert(isa<llvm::IntrinsicInst>(I));
	Builder.restoreIP(IP);

	return cast<llvm::IntrinsicInst>(I);
	}
	diff --git a/contrib/llvm-project/clang/lib/CodeGen/CodeGenFunction.h b/contrib/llvm-project/clang/lib/CodeGen/CodeGenFunction.h
	index ba7b565d9755..60e6841e1b3d 100644
	--- a/contrib/llvm-project/clang/lib/CodeGen/CodeGenFunction.h
	+++ b/contrib/llvm-project/clang/lib/CodeGen/CodeGenFunction.h
	@@ -1,5377 +1,5378 @@
	//===-- CodeGenFunction.h - Per-Function state for LLVM CodeGen -- C++ --===//
	//
	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
	// See https://llvm.org/LICENSE.txt for license information.
	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
	//
	//===----------------------------------------------------------------------===//
	//
	// This is the internal per-function state used for llvm translation.
	//
	//===----------------------------------------------------------------------===//

	#ifndef LLVM_CLANG_LIB_CODEGEN_CODEGENFUNCTION_H
	#define LLVM_CLANG_LIB_CODEGEN_CODEGENFUNCTION_H

	#include "CGBuilder.h"
	#include "CGDebugInfo.h"
	#include "CGLoopInfo.h"
	#include "CGValue.h"
	#include "CodeGenModule.h"
	#include "CodeGenPGO.h"
	#include "EHScopeStack.h"
	#include "VarBypassDetector.h"
	#include "clang/AST/CharUnits.h"
	#include "clang/AST/CurrentSourceLocExprScope.h"
	#include "clang/AST/ExprCXX.h"
	#include "clang/AST/ExprObjC.h"
	#include "clang/AST/ExprOpenMP.h"
	#include "clang/AST/StmtOpenACC.h"
	#include "clang/AST/StmtOpenMP.h"
	#include "clang/AST/Type.h"
	#include "clang/Basic/ABI.h"
	#include "clang/Basic/CapturedStmt.h"
	#include "clang/Basic/CodeGenOptions.h"
	#include "clang/Basic/OpenMPKinds.h"
	#include "clang/Basic/TargetInfo.h"
	#include "llvm/ADT/ArrayRef.h"
	#include "llvm/ADT/DenseMap.h"
	#include "llvm/ADT/MapVector.h"
	#include "llvm/ADT/SmallVector.h"
	#include "llvm/Frontend/OpenMP/OMPIRBuilder.h"
	#include "llvm/IR/Instructions.h"
	#include "llvm/IR/ValueHandle.h"
	#include "llvm/Support/Debug.h"
	#include "llvm/Transforms/Utils/SanitizerStats.h"
	#include <optional>

	namespace llvm {
	class BasicBlock;
	class LLVMContext;
	class MDNode;
	class SwitchInst;
	class Twine;
	class Value;
	class CanonicalLoopInfo;
	}

	namespace clang {
	class ASTContext;
	class CXXDestructorDecl;
	class CXXForRangeStmt;
	class CXXTryStmt;
	class Decl;
	class LabelDecl;
	class FunctionDecl;
	class FunctionProtoType;
	class LabelStmt;
	class ObjCContainerDecl;
	class ObjCInterfaceDecl;
	class ObjCIvarDecl;
	class ObjCMethodDecl;
	class ObjCImplementationDecl;
	class ObjCPropertyImplDecl;
	class TargetInfo;
	class VarDecl;
	class ObjCForCollectionStmt;
	class ObjCAtTryStmt;
	class ObjCAtThrowStmt;
	class ObjCAtSynchronizedStmt;
	class ObjCAutoreleasePoolStmt;
	class OMPUseDevicePtrClause;
	class OMPUseDeviceAddrClause;
	class SVETypeFlags;
	class OMPExecutableDirective;

	namespace analyze_os_log {
	class OSLogBufferLayout;
	}

	namespace CodeGen {
	class CodeGenTypes;
	class CGCallee;
	class CGFunctionInfo;
	class CGBlockInfo;
	class CGCXXABI;
	class BlockByrefHelpers;
	class BlockByrefInfo;
	class BlockFieldFlags;
	class RegionCodeGenTy;
	class TargetCodeGenInfo;
	struct OMPTaskDataTy;
	struct CGCoroData;

	/// The kind of evaluation to perform on values of a particular
	/// type. Basically, is the code in CGExprScalar, CGExprComplex, or
	/// CGExprAgg?
	///
	/// TODO: should vectors maybe be split out into their own thing?
	enum TypeEvaluationKind {
	TEK_Scalar,
	TEK_Complex,
	TEK_Aggregate
	};

	#define LIST_SANITIZER_CHECKS \
	SANITIZER_CHECK(AddOverflow, add_overflow, 0) \
	SANITIZER_CHECK(BuiltinUnreachable, builtin_unreachable, 0) \
	SANITIZER_CHECK(CFICheckFail, cfi_check_fail, 0) \
	SANITIZER_CHECK(DivremOverflow, divrem_overflow, 0) \
	SANITIZER_CHECK(DynamicTypeCacheMiss, dynamic_type_cache_miss, 0) \
	SANITIZER_CHECK(FloatCastOverflow, float_cast_overflow, 0) \
	SANITIZER_CHECK(FunctionTypeMismatch, function_type_mismatch, 0) \
	SANITIZER_CHECK(ImplicitConversion, implicit_conversion, 0) \
	SANITIZER_CHECK(InvalidBuiltin, invalid_builtin, 0) \
	SANITIZER_CHECK(InvalidObjCCast, invalid_objc_cast, 0) \
	SANITIZER_CHECK(LoadInvalidValue, load_invalid_value, 0) \
	SANITIZER_CHECK(MissingReturn, missing_return, 0) \
	SANITIZER_CHECK(MulOverflow, mul_overflow, 0) \
	SANITIZER_CHECK(NegateOverflow, negate_overflow, 0) \
	SANITIZER_CHECK(NullabilityArg, nullability_arg, 0) \
	SANITIZER_CHECK(NullabilityReturn, nullability_return, 1) \
	SANITIZER_CHECK(NonnullArg, nonnull_arg, 0) \
	SANITIZER_CHECK(NonnullReturn, nonnull_return, 1) \
	SANITIZER_CHECK(OutOfBounds, out_of_bounds, 0) \
	SANITIZER_CHECK(PointerOverflow, pointer_overflow, 0) \
	SANITIZER_CHECK(ShiftOutOfBounds, shift_out_of_bounds, 0) \
	SANITIZER_CHECK(SubOverflow, sub_overflow, 0) \
	SANITIZER_CHECK(TypeMismatch, type_mismatch, 1) \
	SANITIZER_CHECK(AlignmentAssumption, alignment_assumption, 0) \
	SANITIZER_CHECK(VLABoundNotPositive, vla_bound_not_positive, 0) \
	SANITIZER_CHECK(BoundsSafety, bounds_safety, 0)

	enum SanitizerHandler {
	#define SANITIZER_CHECK(Enum, Name, Version) Enum,
	LIST_SANITIZER_CHECKS
	#undef SANITIZER_CHECK
	};

	/// Helper class with most of the code for saving a value for a
	/// conditional expression cleanup.
	struct DominatingLLVMValue {
	typedef llvm::PointerIntPair<llvm::Value*, 1, bool> saved_type;

	/// Answer whether the given value needs extra work to be saved.
	static bool needsSaving(llvm::Value *value) {
	if (!value)
	return false;

	// If it's not an instruction, we don't need to save.
	if (!isa<llvm::Instruction>(value)) return false;

	// If it's an instruction in the entry block, we don't need to save.
	llvm::BasicBlock *block = cast<llvm::Instruction>(value)->getParent();
	return (block != &block->getParent()->getEntryBlock());
	}

	static saved_type save(CodeGenFunction &CGF, llvm::Value *value);
	static llvm::Value *restore(CodeGenFunction &CGF, saved_type value);
	};

	/// A partial specialization of DominatingValue for llvm::Values that
	/// might be llvm::Instructions.
	template <class T> struct DominatingPointer<T,true> : DominatingLLVMValue {
	typedef T *type;
	static type restore(CodeGenFunction &CGF, saved_type value) {
	return static_cast<T*>(DominatingLLVMValue::restore(CGF, value));
	}
	};

	/// A specialization of DominatingValue for Address.
	template <> struct DominatingValue<Address> {
	typedef Address type;

	struct saved_type {
	DominatingLLVMValue::saved_type BasePtr;
	llvm::Type *ElementType;
	CharUnits Alignment;
	DominatingLLVMValue::saved_type Offset;
	llvm::PointerType *EffectiveType;
	};

	static bool needsSaving(type value) {
	if (DominatingLLVMValue::needsSaving(value.getBasePointer()) \|\|
	DominatingLLVMValue::needsSaving(value.getOffset()))
	return true;
	return false;
	}
	static saved_type save(CodeGenFunction &CGF, type value) {
	return {DominatingLLVMValue::save(CGF, value.getBasePointer()),
	value.getElementType(), value.getAlignment(),
	DominatingLLVMValue::save(CGF, value.getOffset()), value.getType()};
	}
	static type restore(CodeGenFunction &CGF, saved_type value) {
	return Address(DominatingLLVMValue::restore(CGF, value.BasePtr),
	value.ElementType, value.Alignment, CGPointerAuthInfo(),
	DominatingLLVMValue::restore(CGF, value.Offset));
	}
	};

	/// A specialization of DominatingValue for RValue.
	template <> struct DominatingValue<RValue> {
	typedef RValue type;
	class saved_type {
	enum Kind { ScalarLiteral, ScalarAddress, AggregateLiteral,
	AggregateAddress, ComplexAddress };
	union {
	struct {
	DominatingLLVMValue::saved_type first, second;
	} Vals;
	DominatingValue<Address>::saved_type AggregateAddr;
	};
	LLVM_PREFERRED_TYPE(Kind)
	unsigned K : 3;

	saved_type(DominatingLLVMValue::saved_type Val1, unsigned K)
	: Vals{Val1, DominatingLLVMValue::saved_type()}, K(K) {}

	saved_type(DominatingLLVMValue::saved_type Val1,
	DominatingLLVMValue::saved_type Val2)
	: Vals{Val1, Val2}, K(ComplexAddress) {}

	saved_type(DominatingValue<Address>::saved_type AggregateAddr, unsigned K)
	: AggregateAddr(AggregateAddr), K(K) {}

	public:
	static bool needsSaving(RValue value);
	static saved_type save(CodeGenFunction &CGF, RValue value);
	RValue restore(CodeGenFunction &CGF);

	// implementations in CGCleanup.cpp
	};

	static bool needsSaving(type value) {
	return saved_type::needsSaving(value);
	}
	static saved_type save(CodeGenFunction &CGF, type value) {
	return saved_type::save(CGF, value);
	}
	static type restore(CodeGenFunction &CGF, saved_type value) {
	return value.restore(CGF);
	}
	};

	/// CodeGenFunction - This class organizes the per-function state that is used
	/// while generating LLVM code.
	class CodeGenFunction : public CodeGenTypeCache {
	CodeGenFunction(const CodeGenFunction &) = delete;
	void operator=(const CodeGenFunction &) = delete;

	friend class CGCXXABI;
	public:
	/// A jump destination is an abstract label, branching to which may
	/// require a jump out through normal cleanups.
	struct JumpDest {
	JumpDest() : Block(nullptr), Index(0) {}
	JumpDest(llvm::BasicBlock *Block, EHScopeStack::stable_iterator Depth,
	unsigned Index)
	: Block(Block), ScopeDepth(Depth), Index(Index) {}

	bool isValid() const { return Block != nullptr; }
	llvm::BasicBlock *getBlock() const { return Block; }
	EHScopeStack::stable_iterator getScopeDepth() const { return ScopeDepth; }
	unsigned getDestIndex() const { return Index; }

	// This should be used cautiously.
	void setScopeDepth(EHScopeStack::stable_iterator depth) {
	ScopeDepth = depth;
	}

	private:
	llvm::BasicBlock *Block;
	EHScopeStack::stable_iterator ScopeDepth;
	unsigned Index;
	};

	CodeGenModule &CGM; // Per-module state.
	const TargetInfo &Target;

	// For EH/SEH outlined funclets, this field points to parent's CGF
	CodeGenFunction *ParentCGF = nullptr;

	typedef std::pair<llvm::Value , llvm::Value > ComplexPairTy;
	LoopInfoStack LoopStack;
	CGBuilderTy Builder;

	// Stores variables for which we can't generate correct lifetime markers
	// because of jumps.
	VarBypassDetector Bypasses;

	/// List of recently emitted OMPCanonicalLoops.
	///
	/// Since OMPCanonicalLoops are nested inside other statements (in particular
	/// CapturedStmt generated by OMPExecutableDirective and non-perfectly nested
	/// loops), we cannot directly call OMPEmitOMPCanonicalLoop and receive its
	/// llvm::CanonicalLoopInfo. Instead, we call EmitStmt and any
	/// OMPEmitOMPCanonicalLoop called by it will add its CanonicalLoopInfo to
	/// this stack when done. Entering a new loop requires clearing this list; it
	/// either means we start parsing a new loop nest (in which case the previous
	/// loop nest goes out of scope) or a second loop in the same level in which
	/// case it would be ambiguous into which of the two (or more) loops the loop
	/// nest would extend.
	SmallVector<llvm::CanonicalLoopInfo *, 4> OMPLoopNestStack;

	/// Stack to track the Logical Operator recursion nest for MC/DC.
	SmallVector<const BinaryOperator *, 16> MCDCLogOpStack;

	/// Stack to track the controlled convergence tokens.
	SmallVector<llvm::IntrinsicInst *, 4> ConvergenceTokenStack;

	/// Number of nested loop to be consumed by the last surrounding
	/// loop-associated directive.
	int ExpectedOMPLoopDepth = 0;

	// CodeGen lambda for loops and support for ordered clause
	typedef llvm::function_ref<void(CodeGenFunction &, const OMPLoopDirective &,
	JumpDest)>
	CodeGenLoopTy;
	typedef llvm::function_ref<void(CodeGenFunction &, SourceLocation,
	const unsigned, const bool)>
	CodeGenOrderedTy;

	// Codegen lambda for loop bounds in worksharing loop constructs
	typedef llvm::function_ref<std::pair<LValue, LValue>(
	CodeGenFunction &, const OMPExecutableDirective &S)>
	CodeGenLoopBoundsTy;

	// Codegen lambda for loop bounds in dispatch-based loop implementation
	typedef llvm::function_ref<std::pair<llvm::Value , llvm::Value >(
	CodeGenFunction &, const OMPExecutableDirective &S, Address LB,
	Address UB)>
	CodeGenDispatchBoundsTy;

	/// CGBuilder insert helper. This function is called after an
	/// instruction is created using Builder.
	void InsertHelper(llvm::Instruction *I, const llvm::Twine &Name,
	llvm::BasicBlock::iterator InsertPt) const;

	/// CurFuncDecl - Holds the Decl for the current outermost
	/// non-closure context.
	const Decl *CurFuncDecl = nullptr;
	/// CurCodeDecl - This is the inner-most code context, which includes blocks.
	const Decl *CurCodeDecl = nullptr;
	const CGFunctionInfo *CurFnInfo = nullptr;
	QualType FnRetTy;
	llvm::Function *CurFn = nullptr;

	/// Save Parameter Decl for coroutine.
	llvm::SmallVector<const ParmVarDecl *, 4> FnArgs;

	// Holds coroutine data if the current function is a coroutine. We use a
	// wrapper to manage its lifetime, so that we don't have to define CGCoroData
	// in this header.
	struct CGCoroInfo {
	std::unique_ptr<CGCoroData> Data;
	bool InSuspendBlock = false;
	CGCoroInfo();
	~CGCoroInfo();
	};
	CGCoroInfo CurCoro;

	bool isCoroutine() const {
	return CurCoro.Data != nullptr;
	}

	bool inSuspendBlock() const {
	return isCoroutine() && CurCoro.InSuspendBlock;
	}

	// Holds FramePtr for await_suspend wrapper generation,
	// so that __builtin_coro_frame call can be lowered
	// directly to value of its second argument
	struct AwaitSuspendWrapperInfo {
	llvm::Value *FramePtr = nullptr;
	};
	AwaitSuspendWrapperInfo CurAwaitSuspendWrapper;

	// Generates wrapper function for `llvm.coro.await.suspend.*` intrinisics.
	// It encapsulates SuspendExpr in a function, to separate it's body
	// from the main coroutine to avoid miscompilations. Intrinisic
	// is lowered to this function call in CoroSplit pass
	// Function signature is:
	// <type> __await_suspend_wrapper_<name>(ptr %awaiter, ptr %hdl)
	// where type is one of (void, i1, ptr)
	llvm::Function *generateAwaitSuspendWrapper(Twine const &CoroName,
	Twine const &SuspendPointName,
	CoroutineSuspendExpr const &S);

	/// CurGD - The GlobalDecl for the current function being compiled.
	GlobalDecl CurGD;

	/// PrologueCleanupDepth - The cleanup depth enclosing all the
	/// cleanups associated with the parameters.
	EHScopeStack::stable_iterator PrologueCleanupDepth;

	/// ReturnBlock - Unified return block.
	JumpDest ReturnBlock;

	/// ReturnValue - The temporary alloca to hold the return
	/// value. This is invalid iff the function has no return value.
	Address ReturnValue = Address::invalid();

	/// ReturnValuePointer - The temporary alloca to hold a pointer to sret.
	/// This is invalid if sret is not in use.
	Address ReturnValuePointer = Address::invalid();

	/// If a return statement is being visited, this holds the return statment's
	/// result expression.
	const Expr *RetExpr = nullptr;

	/// Return true if a label was seen in the current scope.
	bool hasLabelBeenSeenInCurrentScope() const {
	if (CurLexicalScope)
	return CurLexicalScope->hasLabels();
	return !LabelMap.empty();
	}

	/// AllocaInsertPoint - This is an instruction in the entry block before which
	/// we prefer to insert allocas.
	llvm::AssertingVH<llvm::Instruction> AllocaInsertPt;

	private:
	/// PostAllocaInsertPt - This is a place in the prologue where code can be
	/// inserted that will be dominated by all the static allocas. This helps
	/// achieve two things:
	/// 1. Contiguity of all static allocas (within the prologue) is maintained.
	/// 2. All other prologue code (which are dominated by static allocas) do
	/// appear in the source order immediately after all static allocas.
	///
	/// PostAllocaInsertPt will be lazily created when it is really required.
	llvm::AssertingVH<llvm::Instruction> PostAllocaInsertPt = nullptr;

	public:
	/// Return PostAllocaInsertPt. If it is not yet created, then insert it
	/// immediately after AllocaInsertPt.
	llvm::Instruction *getPostAllocaInsertPoint() {
	if (!PostAllocaInsertPt) {
	assert(AllocaInsertPt &&
	"Expected static alloca insertion point at function prologue");
	assert(AllocaInsertPt->getParent()->isEntryBlock() &&
	"EBB should be entry block of the current code gen function");
	PostAllocaInsertPt = AllocaInsertPt->clone();
	PostAllocaInsertPt->setName("postallocapt");
	PostAllocaInsertPt->insertAfter(AllocaInsertPt);
	}

	return PostAllocaInsertPt;
	}

	/// API for captured statement code generation.
	class CGCapturedStmtInfo {
	public:
	explicit CGCapturedStmtInfo(CapturedRegionKind K = CR_Default)
	: Kind(K), ThisValue(nullptr), CXXThisFieldDecl(nullptr) {}
	explicit CGCapturedStmtInfo(const CapturedStmt &S,
	CapturedRegionKind K = CR_Default)
	: Kind(K), ThisValue(nullptr), CXXThisFieldDecl(nullptr) {

	RecordDecl::field_iterator Field =
	S.getCapturedRecordDecl()->field_begin();
	for (CapturedStmt::const_capture_iterator I = S.capture_begin(),
	E = S.capture_end();
	I != E; ++I, ++Field) {
	if (I->capturesThis())
	CXXThisFieldDecl = *Field;
	else if (I->capturesVariable())
	CaptureFields[I->getCapturedVar()->getCanonicalDecl()] = *Field;
	else if (I->capturesVariableByCopy())
	CaptureFields[I->getCapturedVar()->getCanonicalDecl()] = *Field;
	}
	}

	virtual ~CGCapturedStmtInfo();

	CapturedRegionKind getKind() const { return Kind; }

	virtual void setContextValue(llvm::Value *V) { ThisValue = V; }
	// Retrieve the value of the context parameter.
	virtual llvm::Value *getContextValue() const { return ThisValue; }

	/// Lookup the captured field decl for a variable.
	virtual const FieldDecl lookup(const VarDecl VD) const {
	return CaptureFields.lookup(VD->getCanonicalDecl());
	}

	bool isCXXThisExprCaptured() const { return getThisFieldDecl() != nullptr; }
	virtual FieldDecl *getThisFieldDecl() const { return CXXThisFieldDecl; }

	static bool classof(const CGCapturedStmtInfo *) {
	return true;
	}

	/// Emit the captured statement body.
	virtual void EmitBody(CodeGenFunction &CGF, const Stmt *S) {
	CGF.incrementProfileCounter(S);
	CGF.EmitStmt(S);
	}

	/// Get the name of the capture helper.
	virtual StringRef getHelperName() const { return "__captured_stmt"; }

	/// Get the CaptureFields
	llvm::SmallDenseMap<const VarDecl , FieldDecl > getCaptureFields() {
	return CaptureFields;
	}

	private:
	/// The kind of captured statement being generated.
	CapturedRegionKind Kind;

	/// Keep the map between VarDecl and FieldDecl.
	llvm::SmallDenseMap<const VarDecl , FieldDecl > CaptureFields;

	/// The base address of the captured record, passed in as the first
	/// argument of the parallel region function.
	llvm::Value *ThisValue;

	/// Captured 'this' type.
	FieldDecl *CXXThisFieldDecl;
	};
	CGCapturedStmtInfo *CapturedStmtInfo = nullptr;

	/// RAII for correct setting/restoring of CapturedStmtInfo.
	class CGCapturedStmtRAII {
	private:
	CodeGenFunction &CGF;
	CGCapturedStmtInfo *PrevCapturedStmtInfo;
	public:
	CGCapturedStmtRAII(CodeGenFunction &CGF,
	CGCapturedStmtInfo *NewCapturedStmtInfo)
	: CGF(CGF), PrevCapturedStmtInfo(CGF.CapturedStmtInfo) {
	CGF.CapturedStmtInfo = NewCapturedStmtInfo;
	}
	~CGCapturedStmtRAII() { CGF.CapturedStmtInfo = PrevCapturedStmtInfo; }
	};

	/// An abstract representation of regular/ObjC call/message targets.
	class AbstractCallee {
	/// The function declaration of the callee.
	const Decl *CalleeDecl;

	public:
	AbstractCallee() : CalleeDecl(nullptr) {}
	AbstractCallee(const FunctionDecl *FD) : CalleeDecl(FD) {}
	AbstractCallee(const ObjCMethodDecl *OMD) : CalleeDecl(OMD) {}
	bool hasFunctionDecl() const {
	return isa_and_nonnull<FunctionDecl>(CalleeDecl);
	}
	const Decl *getDecl() const { return CalleeDecl; }
	unsigned getNumParams() const {
	if (const auto *FD = dyn_cast<FunctionDecl>(CalleeDecl))
	return FD->getNumParams();
	return cast<ObjCMethodDecl>(CalleeDecl)->param_size();
	}
	const ParmVarDecl *getParamDecl(unsigned I) const {
	if (const auto *FD = dyn_cast<FunctionDecl>(CalleeDecl))
	return FD->getParamDecl(I);
	return *(cast<ObjCMethodDecl>(CalleeDecl)->param_begin() + I);
	}
	};

	/// Sanitizers enabled for this function.
	SanitizerSet SanOpts;

	/// True if CodeGen currently emits code implementing sanitizer checks.
	bool IsSanitizerScope = false;

	/// RAII object to set/unset CodeGenFunction::IsSanitizerScope.
	class SanitizerScope {
	CodeGenFunction *CGF;
	public:
	SanitizerScope(CodeGenFunction *CGF);
	~SanitizerScope();
	};

	/// In C++, whether we are code generating a thunk. This controls whether we
	/// should emit cleanups.
	bool CurFuncIsThunk = false;

	/// In ARC, whether we should autorelease the return value.
	bool AutoreleaseResult = false;

	/// Whether we processed a Microsoft-style asm block during CodeGen. These can
	/// potentially set the return value.
	bool SawAsmBlock = false;

	GlobalDecl CurSEHParent;

	/// True if the current function is an outlined SEH helper. This can be a
	/// finally block or filter expression.
	bool IsOutlinedSEHHelper = false;

	/// True if CodeGen currently emits code inside presereved access index
	/// region.
	bool IsInPreservedAIRegion = false;

	/// True if the current statement has nomerge attribute.
	bool InNoMergeAttributedStmt = false;

	/// True if the current statement has noinline attribute.
	bool InNoInlineAttributedStmt = false;

	/// True if the current statement has always_inline attribute.
	bool InAlwaysInlineAttributedStmt = false;

	// The CallExpr within the current statement that the musttail attribute
	// applies to. nullptr if there is no 'musttail' on the current statement.
	const CallExpr *MustTailCall = nullptr;

	/// Returns true if a function must make progress, which means the
	/// mustprogress attribute can be added.
	bool checkIfFunctionMustProgress() {
	if (CGM.getCodeGenOpts().getFiniteLoops() ==
	CodeGenOptions::FiniteLoopsKind::Never)
	return false;

	// C++11 and later guarantees that a thread eventually will do one of the
	// following (C++11 [intro.multithread]p24 and C++17 [intro.progress]p1):
	// - terminate,
	// - make a call to a library I/O function,
	// - perform an access through a volatile glvalue, or
	// - perform a synchronization operation or an atomic operation.
	//
	// Hence each function is 'mustprogress' in C++11 or later.
	return getLangOpts().CPlusPlus11;
	}

	/// Returns true if a loop must make progress, which means the mustprogress
	/// attribute can be added. \p HasConstantCond indicates whether the branch
	/// condition is a known constant.
	bool checkIfLoopMustProgress(const Expr *, bool HasEmptyBody);

	const CodeGen::CGBlockInfo *BlockInfo = nullptr;
	llvm::Value *BlockPointer = nullptr;

	llvm::DenseMap<const ValueDecl , FieldDecl > LambdaCaptureFields;
	FieldDecl *LambdaThisCaptureField = nullptr;

	/// A mapping from NRVO variables to the flags used to indicate
	/// when the NRVO has been applied to this variable.
	llvm::DenseMap<const VarDecl , llvm::Value > NRVOFlags;

	EHScopeStack EHStack;
	llvm::SmallVector<char, 256> LifetimeExtendedCleanupStack;

	// A stack of cleanups which were added to EHStack but have to be deactivated
	// later before being popped or emitted. These are usually deactivated on
	// exiting a `CleanupDeactivationScope` scope. For instance, after a
	// full-expr.
	//
	// These are specially useful for correctly emitting cleanups while
	// encountering branches out of expression (through stmt-expr or coroutine
	// suspensions).
	struct DeferredDeactivateCleanup {
	EHScopeStack::stable_iterator Cleanup;
	llvm::Instruction *DominatingIP;
	};
	llvm::SmallVector<DeferredDeactivateCleanup> DeferredDeactivationCleanupStack;

	// Enters a new scope for capturing cleanups which are deferred to be
	// deactivated, all of which will be deactivated once the scope is exited.
	struct CleanupDeactivationScope {
	CodeGenFunction &CGF;
	size_t OldDeactivateCleanupStackSize;
	bool Deactivated;
	CleanupDeactivationScope(CodeGenFunction &CGF)
	: CGF(CGF), OldDeactivateCleanupStackSize(
	CGF.DeferredDeactivationCleanupStack.size()),
	Deactivated(false) {}

	void ForceDeactivate() {
	assert(!Deactivated && "Deactivating already deactivated scope");
	auto &Stack = CGF.DeferredDeactivationCleanupStack;
	for (size_t I = Stack.size(); I > OldDeactivateCleanupStackSize; I--) {
	CGF.DeactivateCleanupBlock(Stack[I - 1].Cleanup,
	Stack[I - 1].DominatingIP);
	Stack[I - 1].DominatingIP->eraseFromParent();
	}
	Stack.resize(OldDeactivateCleanupStackSize);
	Deactivated = true;
	}

	~CleanupDeactivationScope() {
	if (Deactivated)
	return;
	ForceDeactivate();
	}
	};

	llvm::SmallVector<const JumpDest *, 2> SEHTryEpilogueStack;

	llvm::Instruction *CurrentFuncletPad = nullptr;

	class CallLifetimeEnd final : public EHScopeStack::Cleanup {
	bool isRedundantBeforeReturn() override { return true; }

	llvm::Value *Addr;
	llvm::Value *Size;

	public:
	CallLifetimeEnd(RawAddress addr, llvm::Value *size)
	: Addr(addr.getPointer()), Size(size) {}

	void Emit(CodeGenFunction &CGF, Flags flags) override {
	CGF.EmitLifetimeEnd(Size, Addr);
	}
	};

	/// Header for data within LifetimeExtendedCleanupStack.
	struct LifetimeExtendedCleanupHeader {
	/// The size of the following cleanup object.
	unsigned Size;
	/// The kind of cleanup to push.
	LLVM_PREFERRED_TYPE(CleanupKind)
	unsigned Kind : 31;
	/// Whether this is a conditional cleanup.
	LLVM_PREFERRED_TYPE(bool)
	unsigned IsConditional : 1;

	size_t getSize() const { return Size; }
	CleanupKind getKind() const { return (CleanupKind)Kind; }
	bool isConditional() const { return IsConditional; }
	};

	/// i32s containing the indexes of the cleanup destinations.
	RawAddress NormalCleanupDest = RawAddress::invalid();

	unsigned NextCleanupDestIndex = 1;

	/// EHResumeBlock - Unified block containing a call to llvm.eh.resume.
	llvm::BasicBlock *EHResumeBlock = nullptr;

	/// The exception slot. All landing pads write the current exception pointer
	/// into this alloca.
	llvm::Value *ExceptionSlot = nullptr;

	/// The selector slot. Under the MandatoryCleanup model, all landing pads
	/// write the current selector value into this alloca.
	llvm::AllocaInst *EHSelectorSlot = nullptr;

	/// A stack of exception code slots. Entering an __except block pushes a slot
	/// on the stack and leaving pops one. The __exception_code() intrinsic loads
	/// a value from the top of the stack.
	SmallVector<Address, 1> SEHCodeSlotStack;

	/// Value returned by __exception_info intrinsic.
	llvm::Value *SEHInfo = nullptr;

	/// Emits a landing pad for the current EH stack.
	llvm::BasicBlock *EmitLandingPad();

	llvm::BasicBlock *getInvokeDestImpl();

	/// Parent loop-based directive for scan directive.
	const OMPExecutableDirective *OMPParentLoopDirectiveForScan = nullptr;
	llvm::BasicBlock *OMPBeforeScanBlock = nullptr;
	llvm::BasicBlock *OMPAfterScanBlock = nullptr;
	llvm::BasicBlock *OMPScanExitBlock = nullptr;
	llvm::BasicBlock *OMPScanDispatch = nullptr;
	bool OMPFirstScanLoop = false;

	/// Manages parent directive for scan directives.
	class ParentLoopDirectiveForScanRegion {
	CodeGenFunction &CGF;
	const OMPExecutableDirective *ParentLoopDirectiveForScan;

	public:
	ParentLoopDirectiveForScanRegion(
	CodeGenFunction &CGF,
	const OMPExecutableDirective &ParentLoopDirectiveForScan)
	: CGF(CGF),
	ParentLoopDirectiveForScan(CGF.OMPParentLoopDirectiveForScan) {
	CGF.OMPParentLoopDirectiveForScan = &ParentLoopDirectiveForScan;
	}
	~ParentLoopDirectiveForScanRegion() {
	CGF.OMPParentLoopDirectiveForScan = ParentLoopDirectiveForScan;
	}
	};

	template <class T>
	typename DominatingValue<T>::saved_type saveValueInCond(T value) {
	return DominatingValue<T>::save(*this, value);
	}

	class CGFPOptionsRAII {
	public:
	CGFPOptionsRAII(CodeGenFunction &CGF, FPOptions FPFeatures);
	CGFPOptionsRAII(CodeGenFunction &CGF, const Expr *E);
	~CGFPOptionsRAII();

	private:
	void ConstructorHelper(FPOptions FPFeatures);
	CodeGenFunction &CGF;
	FPOptions OldFPFeatures;
	llvm::fp::ExceptionBehavior OldExcept;
	llvm::RoundingMode OldRounding;
	std::optional<CGBuilderTy::FastMathFlagGuard> FMFGuard;
	};
	FPOptions CurFPFeatures;

	public:
	/// ObjCEHValueStack - Stack of Objective-C exception values, used for
	/// rethrows.
	SmallVector<llvm::Value*, 8> ObjCEHValueStack;

	/// A class controlling the emission of a finally block.
	class FinallyInfo {
	/// Where the catchall's edge through the cleanup should go.
	JumpDest RethrowDest;

	/// A function to call to enter the catch.
	llvm::FunctionCallee BeginCatchFn;

	/// An i1 variable indicating whether or not the @finally is
	/// running for an exception.
	llvm::AllocaInst *ForEHVar = nullptr;

	/// An i8* variable into which the exception pointer to rethrow
	/// has been saved.
	llvm::AllocaInst *SavedExnVar = nullptr;

	public:
	void enter(CodeGenFunction &CGF, const Stmt *Finally,
	llvm::FunctionCallee beginCatchFn,
	llvm::FunctionCallee endCatchFn, llvm::FunctionCallee rethrowFn);
	void exit(CodeGenFunction &CGF);
	};

	/// Returns true inside SEH __try blocks.
	bool isSEHTryScope() const { return !SEHTryEpilogueStack.empty(); }

	/// Returns true while emitting a cleanuppad.
	bool isCleanupPadScope() const {
	return CurrentFuncletPad && isa<llvm::CleanupPadInst>(CurrentFuncletPad);
	}

	/// pushFullExprCleanup - Push a cleanup to be run at the end of the
	/// current full-expression. Safe against the possibility that
	/// we're currently inside a conditionally-evaluated expression.
	template <class T, class... As>
	void pushFullExprCleanup(CleanupKind kind, As... A) {
	// If we're not in a conditional branch, or if none of the
	// arguments requires saving, then use the unconditional cleanup.
	if (!isInConditionalBranch())
	return EHStack.pushCleanup<T>(kind, A...);

	// Stash values in a tuple so we can guarantee the order of saves.
	typedef std::tuple<typename DominatingValue<As>::saved_type...> SavedTuple;
	SavedTuple Saved{saveValueInCond(A)...};

	typedef EHScopeStack::ConditionalCleanup<T, As...> CleanupType;
	EHStack.pushCleanupTuple<CleanupType>(kind, Saved);
	initFullExprCleanup();
	}

	/// Queue a cleanup to be pushed after finishing the current full-expression,
	/// potentially with an active flag.
	template <class T, class... As>
	void pushCleanupAfterFullExpr(CleanupKind Kind, As... A) {
	if (!isInConditionalBranch())
	return pushCleanupAfterFullExprWithActiveFlag<T>(
	Kind, RawAddress::invalid(), A...);

	RawAddress ActiveFlag = createCleanupActiveFlag();
	assert(!DominatingValue<Address>::needsSaving(ActiveFlag) &&
	"cleanup active flag should never need saving");

	typedef std::tuple<typename DominatingValue<As>::saved_type...> SavedTuple;
	SavedTuple Saved{saveValueInCond(A)...};

	typedef EHScopeStack::ConditionalCleanup<T, As...> CleanupType;
	pushCleanupAfterFullExprWithActiveFlag<CleanupType>(Kind, ActiveFlag, Saved);
	}

	template <class T, class... As>
	void pushCleanupAfterFullExprWithActiveFlag(CleanupKind Kind,
	RawAddress ActiveFlag, As... A) {
	LifetimeExtendedCleanupHeader Header = {sizeof(T), Kind,
	ActiveFlag.isValid()};

	size_t OldSize = LifetimeExtendedCleanupStack.size();
	LifetimeExtendedCleanupStack.resize(
	LifetimeExtendedCleanupStack.size() + sizeof(Header) + Header.Size +
	(Header.IsConditional ? sizeof(ActiveFlag) : 0));

	static_assert(sizeof(Header) % alignof(T) == 0,
	"Cleanup will be allocated on misaligned address");
	char *Buffer = &LifetimeExtendedCleanupStack[OldSize];
	new (Buffer) LifetimeExtendedCleanupHeader(Header);
	new (Buffer + sizeof(Header)) T(A...);
	if (Header.IsConditional)
	new (Buffer + sizeof(Header) + sizeof(T)) RawAddress(ActiveFlag);
	}

	// Push a cleanup onto EHStack and deactivate it later. It is usually
	// deactivated when exiting a `CleanupDeactivationScope` (for example: after a
	// full expression).
	template <class T, class... As>
	void pushCleanupAndDeferDeactivation(CleanupKind Kind, As... A) {
	// Placeholder dominating IP for this cleanup.
	llvm::Instruction *DominatingIP =
	Builder.CreateFlagLoad(llvm::Constant::getNullValue(Int8PtrTy));
	EHStack.pushCleanup<T>(Kind, A...);
	DeferredDeactivationCleanupStack.push_back(
	{EHStack.stable_begin(), DominatingIP});
	}

	/// Set up the last cleanup that was pushed as a conditional
	/// full-expression cleanup.
	void initFullExprCleanup() {
	initFullExprCleanupWithFlag(createCleanupActiveFlag());
	}

	void initFullExprCleanupWithFlag(RawAddress ActiveFlag);
	RawAddress createCleanupActiveFlag();

	/// PushDestructorCleanup - Push a cleanup to call the
	/// complete-object destructor of an object of the given type at the
	/// given address. Does nothing if T is not a C++ class type with a
	/// non-trivial destructor.
	void PushDestructorCleanup(QualType T, Address Addr);

	/// PushDestructorCleanup - Push a cleanup to call the
	/// complete-object variant of the given destructor on the object at
	/// the given address.
	void PushDestructorCleanup(const CXXDestructorDecl *Dtor, QualType T,
	Address Addr);

	/// PopCleanupBlock - Will pop the cleanup entry on the stack and
	/// process all branch fixups.
	void PopCleanupBlock(bool FallThroughIsBranchThrough = false,
	bool ForDeactivation = false);

	/// DeactivateCleanupBlock - Deactivates the given cleanup block.
	/// The block cannot be reactivated. Pops it if it's the top of the
	/// stack.
	///
	/// \param DominatingIP - An instruction which is known to
	/// dominate the current IP (if set) and which lies along
	/// all paths of execution between the current IP and the
	/// the point at which the cleanup comes into scope.
	void DeactivateCleanupBlock(EHScopeStack::stable_iterator Cleanup,
	llvm::Instruction *DominatingIP);

	/// ActivateCleanupBlock - Activates an initially-inactive cleanup.
	/// Cannot be used to resurrect a deactivated cleanup.
	///
	/// \param DominatingIP - An instruction which is known to
	/// dominate the current IP (if set) and which lies along
	/// all paths of execution between the current IP and the
	/// the point at which the cleanup comes into scope.
	void ActivateCleanupBlock(EHScopeStack::stable_iterator Cleanup,
	llvm::Instruction *DominatingIP);

	/// Enters a new scope for capturing cleanups, all of which
	/// will be executed once the scope is exited.
	class RunCleanupsScope {
	EHScopeStack::stable_iterator CleanupStackDepth, OldCleanupScopeDepth;
	size_t LifetimeExtendedCleanupStackSize;
	CleanupDeactivationScope DeactivateCleanups;
	bool OldDidCallStackSave;
	protected:
	bool PerformCleanup;
	private:

	RunCleanupsScope(const RunCleanupsScope &) = delete;
	void operator=(const RunCleanupsScope &) = delete;

	protected:
	CodeGenFunction& CGF;

	public:
	/// Enter a new cleanup scope.
	explicit RunCleanupsScope(CodeGenFunction &CGF)
	: DeactivateCleanups(CGF), PerformCleanup(true), CGF(CGF) {
	CleanupStackDepth = CGF.EHStack.stable_begin();
	LifetimeExtendedCleanupStackSize =
	CGF.LifetimeExtendedCleanupStack.size();
	OldDidCallStackSave = CGF.DidCallStackSave;
	CGF.DidCallStackSave = false;
	OldCleanupScopeDepth = CGF.CurrentCleanupScopeDepth;
	CGF.CurrentCleanupScopeDepth = CleanupStackDepth;
	}

	/// Exit this cleanup scope, emitting any accumulated cleanups.
	~RunCleanupsScope() {
	if (PerformCleanup)
	ForceCleanup();
	}

	/// Determine whether this scope requires any cleanups.
	bool requiresCleanups() const {
	return CGF.EHStack.stable_begin() != CleanupStackDepth;
	}

	/// Force the emission of cleanups now, instead of waiting
	/// until this object is destroyed.
	/// \param ValuesToReload - A list of values that need to be available at
	/// the insertion point after cleanup emission. If cleanup emission created
	/// a shared cleanup block, these value pointers will be rewritten.
	/// Otherwise, they not will be modified.
	void ForceCleanup(std::initializer_list<llvm::Value**> ValuesToReload = {}) {
	assert(PerformCleanup && "Already forced cleanup");
	CGF.DidCallStackSave = OldDidCallStackSave;
	DeactivateCleanups.ForceDeactivate();
	CGF.PopCleanupBlocks(CleanupStackDepth, LifetimeExtendedCleanupStackSize,
	ValuesToReload);
	PerformCleanup = false;
	CGF.CurrentCleanupScopeDepth = OldCleanupScopeDepth;
	}
	};

	// Cleanup stack depth of the RunCleanupsScope that was pushed most recently.
	EHScopeStack::stable_iterator CurrentCleanupScopeDepth =
	EHScopeStack::stable_end();

	class LexicalScope : public RunCleanupsScope {
	SourceRange Range;
	SmallVector<const LabelDecl*, 4> Labels;
	LexicalScope *ParentScope;

	LexicalScope(const LexicalScope &) = delete;
	void operator=(const LexicalScope &) = delete;

	public:
	/// Enter a new cleanup scope.
	explicit LexicalScope(CodeGenFunction &CGF, SourceRange Range)
	: RunCleanupsScope(CGF), Range(Range), ParentScope(CGF.CurLexicalScope) {
	CGF.CurLexicalScope = this;
	if (CGDebugInfo *DI = CGF.getDebugInfo())
	DI->EmitLexicalBlockStart(CGF.Builder, Range.getBegin());
	}

	void addLabel(const LabelDecl *label) {
	assert(PerformCleanup && "adding label to dead scope?");
	Labels.push_back(label);
	}

	/// Exit this cleanup scope, emitting any accumulated
	/// cleanups.
	~LexicalScope() {
	if (CGDebugInfo *DI = CGF.getDebugInfo())
	DI->EmitLexicalBlockEnd(CGF.Builder, Range.getEnd());

	// If we should perform a cleanup, force them now. Note that
	// this ends the cleanup scope before rescoping any labels.
	if (PerformCleanup) {
	ApplyDebugLocation DL(CGF, Range.getEnd());
	ForceCleanup();
	}
	}

	/// Force the emission of cleanups now, instead of waiting
	/// until this object is destroyed.
	void ForceCleanup() {
	CGF.CurLexicalScope = ParentScope;
	RunCleanupsScope::ForceCleanup();

	if (!Labels.empty())
	rescopeLabels();
	}

	bool hasLabels() const {
	return !Labels.empty();
	}

	void rescopeLabels();
	};

	typedef llvm::DenseMap<const Decl *, Address> DeclMapTy;

	/// The class used to assign some variables some temporarily addresses.
	class OMPMapVars {
	DeclMapTy SavedLocals;
	DeclMapTy SavedTempAddresses;
	OMPMapVars(const OMPMapVars &) = delete;
	void operator=(const OMPMapVars &) = delete;

	public:
	explicit OMPMapVars() = default;
	~OMPMapVars() {
	assert(SavedLocals.empty() && "Did not restored original addresses.");
	};

	/// Sets the address of the variable \p LocalVD to be \p TempAddr in
	/// function \p CGF.
	/// \return true if at least one variable was set already, false otherwise.
	bool setVarAddr(CodeGenFunction &CGF, const VarDecl *LocalVD,
	Address TempAddr) {
	LocalVD = LocalVD->getCanonicalDecl();
	// Only save it once.
	if (SavedLocals.count(LocalVD)) return false;

	// Copy the existing local entry to SavedLocals.
	auto it = CGF.LocalDeclMap.find(LocalVD);
	if (it != CGF.LocalDeclMap.end())
	SavedLocals.try_emplace(LocalVD, it->second);
	else
	SavedLocals.try_emplace(LocalVD, Address::invalid());

	// Generate the private entry.
	QualType VarTy = LocalVD->getType();
	if (VarTy->isReferenceType()) {
	Address Temp = CGF.CreateMemTemp(VarTy);
	CGF.Builder.CreateStore(TempAddr.emitRawPointer(CGF), Temp);
	TempAddr = Temp;
	}
	SavedTempAddresses.try_emplace(LocalVD, TempAddr);

	return true;
	}

	/// Applies new addresses to the list of the variables.
	/// \return true if at least one variable is using new address, false
	/// otherwise.
	bool apply(CodeGenFunction &CGF) {
	copyInto(SavedTempAddresses, CGF.LocalDeclMap);
	SavedTempAddresses.clear();
	return !SavedLocals.empty();
	}

	/// Restores original addresses of the variables.
	void restore(CodeGenFunction &CGF) {
	if (!SavedLocals.empty()) {
	copyInto(SavedLocals, CGF.LocalDeclMap);
	SavedLocals.clear();
	}
	}

	private:
	/// Copy all the entries in the source map over the corresponding
	/// entries in the destination, which must exist.
	static void copyInto(const DeclMapTy &Src, DeclMapTy &Dest) {
	for (auto &Pair : Src) {
	if (!Pair.second.isValid()) {
	Dest.erase(Pair.first);
	continue;
	}

	auto I = Dest.find(Pair.first);
	if (I != Dest.end())
	I->second = Pair.second;
	else
	Dest.insert(Pair);
	}
	}
	};

	/// The scope used to remap some variables as private in the OpenMP loop body
	/// (or other captured region emitted without outlining), and to restore old
	/// vars back on exit.
	class OMPPrivateScope : public RunCleanupsScope {
	OMPMapVars MappedVars;
	OMPPrivateScope(const OMPPrivateScope &) = delete;
	void operator=(const OMPPrivateScope &) = delete;

	public:
	/// Enter a new OpenMP private scope.
	explicit OMPPrivateScope(CodeGenFunction &CGF) : RunCleanupsScope(CGF) {}

	/// Registers \p LocalVD variable as a private with \p Addr as the address
	/// of the corresponding private variable. \p
	/// PrivateGen is the address of the generated private variable.
	/// \return true if the variable is registered as private, false if it has
	/// been privatized already.
	bool addPrivate(const VarDecl *LocalVD, Address Addr) {
	assert(PerformCleanup && "adding private to dead scope");
	return MappedVars.setVarAddr(CGF, LocalVD, Addr);
	}

	/// Privatizes local variables previously registered as private.
	/// Registration is separate from the actual privatization to allow
	/// initializers use values of the original variables, not the private one.
	/// This is important, for example, if the private variable is a class
	/// variable initialized by a constructor that references other private
	/// variables. But at initialization original variables must be used, not
	/// private copies.
	/// \return true if at least one variable was privatized, false otherwise.
	bool Privatize() { return MappedVars.apply(CGF); }

	void ForceCleanup() {
	RunCleanupsScope::ForceCleanup();
	restoreMap();
	}

	/// Exit scope - all the mapped variables are restored.
	~OMPPrivateScope() {
	if (PerformCleanup)
	ForceCleanup();
	}

	/// Checks if the global variable is captured in current function.
	bool isGlobalVarCaptured(const VarDecl *VD) const {
	VD = VD->getCanonicalDecl();
	return !VD->isLocalVarDeclOrParm() && CGF.LocalDeclMap.count(VD) > 0;
	}

	/// Restore all mapped variables w/o clean up. This is usefully when we want
	/// to reference the original variables but don't want the clean up because
	/// that could emit lifetime end too early, causing backend issue #56913.
	void restoreMap() { MappedVars.restore(CGF); }
	};

	/// Save/restore original map of previously emitted local vars in case when we
	/// need to duplicate emission of the same code several times in the same
	/// function for OpenMP code.
	class OMPLocalDeclMapRAII {
	CodeGenFunction &CGF;
	DeclMapTy SavedMap;

	public:
	OMPLocalDeclMapRAII(CodeGenFunction &CGF)
	: CGF(CGF), SavedMap(CGF.LocalDeclMap) {}
	~OMPLocalDeclMapRAII() { SavedMap.swap(CGF.LocalDeclMap); }
	};

	/// Takes the old cleanup stack size and emits the cleanup blocks
	/// that have been added.
	void
	PopCleanupBlocks(EHScopeStack::stable_iterator OldCleanupStackSize,
	std::initializer_list<llvm::Value **> ValuesToReload = {});

	/// Takes the old cleanup stack size and emits the cleanup blocks
	/// that have been added, then adds all lifetime-extended cleanups from
	/// the given position to the stack.
	void
	PopCleanupBlocks(EHScopeStack::stable_iterator OldCleanupStackSize,
	size_t OldLifetimeExtendedStackSize,
	std::initializer_list<llvm::Value **> ValuesToReload = {});

	void ResolveBranchFixups(llvm::BasicBlock *Target);

	/// The given basic block lies in the current EH scope, but may be a
	/// target of a potentially scope-crossing jump; get a stable handle
	/// to which we can perform this jump later.
	JumpDest getJumpDestInCurrentScope(llvm::BasicBlock *Target) {
	return JumpDest(Target,
	EHStack.getInnermostNormalCleanup(),
	NextCleanupDestIndex++);
	}

	/// The given basic block lies in the current EH scope, but may be a
	/// target of a potentially scope-crossing jump; get a stable handle
	/// to which we can perform this jump later.
	JumpDest getJumpDestInCurrentScope(StringRef Name = StringRef()) {
	return getJumpDestInCurrentScope(createBasicBlock(Name));
	}

	/// EmitBranchThroughCleanup - Emit a branch from the current insert
	/// block through the normal cleanup handling code (if any) and then
	/// on to \arg Dest.
	void EmitBranchThroughCleanup(JumpDest Dest);

	/// isObviouslyBranchWithoutCleanups - Return true if a branch to the
	/// specified destination obviously has no cleanups to run. 'false' is always
	/// a conservatively correct answer for this method.
	bool isObviouslyBranchWithoutCleanups(JumpDest Dest) const;

	/// popCatchScope - Pops the catch scope at the top of the EHScope
	/// stack, emitting any required code (other than the catch handlers
	/// themselves).
	void popCatchScope();

	llvm::BasicBlock *getEHResumeBlock(bool isCleanup);
	llvm::BasicBlock *getEHDispatchBlock(EHScopeStack::stable_iterator scope);
	llvm::BasicBlock *
	getFuncletEHDispatchBlock(EHScopeStack::stable_iterator scope);

	/// An object to manage conditionally-evaluated expressions.
	class ConditionalEvaluation {
	llvm::BasicBlock *StartBB;

	public:
	ConditionalEvaluation(CodeGenFunction &CGF)
	: StartBB(CGF.Builder.GetInsertBlock()) {}

	void begin(CodeGenFunction &CGF) {
	assert(CGF.OutermostConditional != this);
	if (!CGF.OutermostConditional)
	CGF.OutermostConditional = this;
	}

	void end(CodeGenFunction &CGF) {
	assert(CGF.OutermostConditional != nullptr);
	if (CGF.OutermostConditional == this)
	CGF.OutermostConditional = nullptr;
	}

	/// Returns a block which will be executed prior to each
	/// evaluation of the conditional code.
	llvm::BasicBlock *getStartingBlock() const {
	return StartBB;
	}
	};

	/// isInConditionalBranch - Return true if we're currently emitting
	/// one branch or the other of a conditional expression.
	bool isInConditionalBranch() const { return OutermostConditional != nullptr; }

	void setBeforeOutermostConditional(llvm::Value *value, Address addr,
	CodeGenFunction &CGF) {
	assert(isInConditionalBranch());
	llvm::BasicBlock *block = OutermostConditional->getStartingBlock();
	auto store =
	new llvm::StoreInst(value, addr.emitRawPointer(CGF), &block->back());
	store->setAlignment(addr.getAlignment().getAsAlign());
	}

	/// An RAII object to record that we're evaluating a statement
	/// expression.
	class StmtExprEvaluation {
	CodeGenFunction &CGF;

	/// We have to save the outermost conditional: cleanups in a
	/// statement expression aren't conditional just because the
	/// StmtExpr is.
	ConditionalEvaluation *SavedOutermostConditional;

	public:
	StmtExprEvaluation(CodeGenFunction &CGF)
	: CGF(CGF), SavedOutermostConditional(CGF.OutermostConditional) {
	CGF.OutermostConditional = nullptr;
	}

	~StmtExprEvaluation() {
	CGF.OutermostConditional = SavedOutermostConditional;
	CGF.EnsureInsertPoint();
	}
	};

	/// An object which temporarily prevents a value from being
	/// destroyed by aggressive peephole optimizations that assume that
	/// all uses of a value have been realized in the IR.
	class PeepholeProtection {
	llvm::Instruction *Inst = nullptr;
	friend class CodeGenFunction;

	public:
	PeepholeProtection() = default;
	};

	/// A non-RAII class containing all the information about a bound
	/// opaque value. OpaqueValueMapping, below, is a RAII wrapper for
	/// this which makes individual mappings very simple; using this
	/// class directly is useful when you have a variable number of
	/// opaque values or don't want the RAII functionality for some
	/// reason.
	class OpaqueValueMappingData {
	const OpaqueValueExpr *OpaqueValue;
	bool BoundLValue;
	CodeGenFunction::PeepholeProtection Protection;

	OpaqueValueMappingData(const OpaqueValueExpr *ov,
	bool boundLValue)
	: OpaqueValue(ov), BoundLValue(boundLValue) {}
	public:
	OpaqueValueMappingData() : OpaqueValue(nullptr) {}

	static bool shouldBindAsLValue(const Expr *expr) {
	// gl-values should be bound as l-values for obvious reasons.
	// Records should be bound as l-values because IR generation
	// always keeps them in memory. Expressions of function type
	// act exactly like l-values but are formally required to be
	// r-values in C.
	return expr->isGLValue() \|\|
	expr->getType()->isFunctionType() \|\|
	hasAggregateEvaluationKind(expr->getType());
	}

	static OpaqueValueMappingData bind(CodeGenFunction &CGF,
	const OpaqueValueExpr *ov,
	const Expr *e) {
	if (shouldBindAsLValue(ov))
	return bind(CGF, ov, CGF.EmitLValue(e));
	return bind(CGF, ov, CGF.EmitAnyExpr(e));
	}

	static OpaqueValueMappingData bind(CodeGenFunction &CGF,
	const OpaqueValueExpr *ov,
	const LValue &lv) {
	assert(shouldBindAsLValue(ov));
	CGF.OpaqueLValues.insert(std::make_pair(ov, lv));
	return OpaqueValueMappingData(ov, true);
	}

	static OpaqueValueMappingData bind(CodeGenFunction &CGF,
	const OpaqueValueExpr *ov,
	const RValue &rv) {
	assert(!shouldBindAsLValue(ov));
	CGF.OpaqueRValues.insert(std::make_pair(ov, rv));

	OpaqueValueMappingData data(ov, false);

	// Work around an extremely aggressive peephole optimization in
	// EmitScalarConversion which assumes that all other uses of a
	// value are extant.
	data.Protection = CGF.protectFromPeepholes(rv);

	return data;
	}

	bool isValid() const { return OpaqueValue != nullptr; }
	void clear() { OpaqueValue = nullptr; }

	void unbind(CodeGenFunction &CGF) {
	assert(OpaqueValue && "no data to unbind!");

	if (BoundLValue) {
	CGF.OpaqueLValues.erase(OpaqueValue);
	} else {
	CGF.OpaqueRValues.erase(OpaqueValue);
	CGF.unprotectFromPeepholes(Protection);
	}
	}
	};

	/// An RAII object to set (and then clear) a mapping for an OpaqueValueExpr.
	class OpaqueValueMapping {
	CodeGenFunction &CGF;
	OpaqueValueMappingData Data;

	public:
	static bool shouldBindAsLValue(const Expr *expr) {
	return OpaqueValueMappingData::shouldBindAsLValue(expr);
	}

	/// Build the opaque value mapping for the given conditional
	/// operator if it's the GNU ?: extension. This is a common
	/// enough pattern that the convenience operator is really
	/// helpful.
	///
	OpaqueValueMapping(CodeGenFunction &CGF,
	const AbstractConditionalOperator *op) : CGF(CGF) {
	if (isa<ConditionalOperator>(op))
	// Leave Data empty.
	return;

	const BinaryConditionalOperator *e = cast<BinaryConditionalOperator>(op);
	Data = OpaqueValueMappingData::bind(CGF, e->getOpaqueValue(),
	e->getCommon());
	}

	/// Build the opaque value mapping for an OpaqueValueExpr whose source
	/// expression is set to the expression the OVE represents.
	OpaqueValueMapping(CodeGenFunction &CGF, const OpaqueValueExpr *OV)
	: CGF(CGF) {
	if (OV) {
	assert(OV->getSourceExpr() && "wrong form of OpaqueValueMapping used "
	"for OVE with no source expression");
	Data = OpaqueValueMappingData::bind(CGF, OV, OV->getSourceExpr());
	}
	}

	OpaqueValueMapping(CodeGenFunction &CGF,
	const OpaqueValueExpr *opaqueValue,
	LValue lvalue)
	: CGF(CGF), Data(OpaqueValueMappingData::bind(CGF, opaqueValue, lvalue)) {
	}

	OpaqueValueMapping(CodeGenFunction &CGF,
	const OpaqueValueExpr *opaqueValue,
	RValue rvalue)
	: CGF(CGF), Data(OpaqueValueMappingData::bind(CGF, opaqueValue, rvalue)) {
	}

	void pop() {
	Data.unbind(CGF);
	Data.clear();
	}

	~OpaqueValueMapping() {
	if (Data.isValid()) Data.unbind(CGF);
	}
	};

	private:
	CGDebugInfo *DebugInfo;
	/// Used to create unique names for artificial VLA size debug info variables.
	unsigned VLAExprCounter = 0;
	bool DisableDebugInfo = false;

	/// DidCallStackSave - Whether llvm.stacksave has been called. Used to avoid
	/// calling llvm.stacksave for multiple VLAs in the same scope.
	bool DidCallStackSave = false;

	/// IndirectBranch - The first time an indirect goto is seen we create a block
	/// with an indirect branch. Every time we see the address of a label taken,
	/// we add the label to the indirect goto. Every subsequent indirect goto is
	/// codegen'd as a jump to the IndirectBranch's basic block.
	llvm::IndirectBrInst *IndirectBranch = nullptr;

	/// LocalDeclMap - This keeps track of the LLVM allocas or globals for local C
	/// decls.
	DeclMapTy LocalDeclMap;

	// Keep track of the cleanups for callee-destructed parameters pushed to the
	// cleanup stack so that they can be deactivated later.
	llvm::DenseMap<const ParmVarDecl *, EHScopeStack::stable_iterator>
	CalleeDestructedParamCleanups;

	/// SizeArguments - If a ParmVarDecl had the pass_object_size attribute, this
	/// will contain a mapping from said ParmVarDecl to its implicit "object_size"
	/// parameter.
	llvm::SmallDenseMap<const ParmVarDecl , const ImplicitParamDecl , 2>
	SizeArguments;

	/// Track escaped local variables with auto storage. Used during SEH
	/// outlining to produce a call to llvm.localescape.
	llvm::DenseMap<llvm::AllocaInst *, int> EscapedLocals;

	/// LabelMap - This keeps track of the LLVM basic block for each C label.
	llvm::DenseMap<const LabelDecl*, JumpDest> LabelMap;

	// BreakContinueStack - This keeps track of where break and continue
	// statements should jump to.
	struct BreakContinue {
	BreakContinue(JumpDest Break, JumpDest Continue)
	: BreakBlock(Break), ContinueBlock(Continue) {}

	JumpDest BreakBlock;
	JumpDest ContinueBlock;
	};
	SmallVector<BreakContinue, 8> BreakContinueStack;

	/// Handles cancellation exit points in OpenMP-related constructs.
	class OpenMPCancelExitStack {
	/// Tracks cancellation exit point and join point for cancel-related exit
	/// and normal exit.
	struct CancelExit {
	CancelExit() = default;
	CancelExit(OpenMPDirectiveKind Kind, JumpDest ExitBlock,
	JumpDest ContBlock)
	: Kind(Kind), ExitBlock(ExitBlock), ContBlock(ContBlock) {}
	OpenMPDirectiveKind Kind = llvm::omp::OMPD_unknown;
	/// true if the exit block has been emitted already by the special
	/// emitExit() call, false if the default codegen is used.
	bool HasBeenEmitted = false;
	JumpDest ExitBlock;
	JumpDest ContBlock;
	};

	SmallVector<CancelExit, 8> Stack;

	public:
	OpenMPCancelExitStack() : Stack(1) {}
	~OpenMPCancelExitStack() = default;
	/// Fetches the exit block for the current OpenMP construct.
	JumpDest getExitBlock() const { return Stack.back().ExitBlock; }
	/// Emits exit block with special codegen procedure specific for the related
	/// OpenMP construct + emits code for normal construct cleanup.
	void emitExit(CodeGenFunction &CGF, OpenMPDirectiveKind Kind,
	const llvm::function_ref<void(CodeGenFunction &)> CodeGen) {
	if (Stack.back().Kind == Kind && getExitBlock().isValid()) {
	assert(CGF.getOMPCancelDestination(Kind).isValid());
	assert(CGF.HaveInsertPoint());
	assert(!Stack.back().HasBeenEmitted);
	auto IP = CGF.Builder.saveAndClearIP();
	CGF.EmitBlock(Stack.back().ExitBlock.getBlock());
	CodeGen(CGF);
	CGF.EmitBranch(Stack.back().ContBlock.getBlock());
	CGF.Builder.restoreIP(IP);
	Stack.back().HasBeenEmitted = true;
	}
	CodeGen(CGF);
	}
	/// Enter the cancel supporting \a Kind construct.
	/// \param Kind OpenMP directive that supports cancel constructs.
	/// \param HasCancel true, if the construct has inner cancel directive,
	/// false otherwise.
	void enter(CodeGenFunction &CGF, OpenMPDirectiveKind Kind, bool HasCancel) {
	Stack.push_back({Kind,
	HasCancel ? CGF.getJumpDestInCurrentScope("cancel.exit")
	: JumpDest(),
	HasCancel ? CGF.getJumpDestInCurrentScope("cancel.cont")
	: JumpDest()});
	}
	/// Emits default exit point for the cancel construct (if the special one
	/// has not be used) + join point for cancel/normal exits.
	void exit(CodeGenFunction &CGF) {
	if (getExitBlock().isValid()) {
	assert(CGF.getOMPCancelDestination(Stack.back().Kind).isValid());
	bool HaveIP = CGF.HaveInsertPoint();
	if (!Stack.back().HasBeenEmitted) {
	if (HaveIP)
	CGF.EmitBranchThroughCleanup(Stack.back().ContBlock);
	CGF.EmitBlock(Stack.back().ExitBlock.getBlock());
	CGF.EmitBranchThroughCleanup(Stack.back().ContBlock);
	}
	CGF.EmitBlock(Stack.back().ContBlock.getBlock());
	if (!HaveIP) {
	CGF.Builder.CreateUnreachable();
	CGF.Builder.ClearInsertionPoint();
	}
	}
	Stack.pop_back();
	}
	};
	OpenMPCancelExitStack OMPCancelStack;

	/// Lower the Likelihood knowledge about the \p Cond via llvm.expect intrin.
	llvm::Value emitCondLikelihoodViaExpectIntrinsic(llvm::Value Cond,
	Stmt::Likelihood LH);

	CodeGenPGO PGO;

	/// Bitmap used by MC/DC to track condition outcomes of a boolean expression.
	Address MCDCCondBitmapAddr = Address::invalid();

	/// Calculate branch weights appropriate for PGO data
	llvm::MDNode *createProfileWeights(uint64_t TrueCount,
	uint64_t FalseCount) const;
	llvm::MDNode *createProfileWeights(ArrayRef<uint64_t> Weights) const;
	llvm::MDNode createProfileWeightsForLoop(const Stmt Cond,
	uint64_t LoopCount) const;

	public:
	/// Increment the profiler's counter for the given statement by \p StepV.
	/// If \p StepV is null, the default increment is 1.
	void incrementProfileCounter(const Stmt S, llvm::Value StepV = nullptr) {
	if (CGM.getCodeGenOpts().hasProfileClangInstr() &&
	!CurFn->hasFnAttribute(llvm::Attribute::NoProfile) &&
	!CurFn->hasFnAttribute(llvm::Attribute::SkipProfile)) {
	auto AL = ApplyDebugLocation::CreateArtificial(*this);
	PGO.emitCounterSetOrIncrement(Builder, S, StepV);
	}
	PGO.setCurrentStmt(S);
	}

	bool isMCDCCoverageEnabled() const {
	return (CGM.getCodeGenOpts().hasProfileClangInstr() &&
	CGM.getCodeGenOpts().MCDCCoverage &&
	!CurFn->hasFnAttribute(llvm::Attribute::NoProfile));
	}

	/// Allocate a temp value on the stack that MCDC can use to track condition
	/// results.
	void maybeCreateMCDCCondBitmap() {
	if (isMCDCCoverageEnabled()) {
	PGO.emitMCDCParameters(Builder);
	MCDCCondBitmapAddr =
	CreateIRTemp(getContext().UnsignedIntTy, "mcdc.addr");
	}
	}

	bool isBinaryLogicalOp(const Expr *E) const {
	const BinaryOperator *BOp = dyn_cast<BinaryOperator>(E->IgnoreParens());
	return (BOp && BOp->isLogicalOp());
	}

	/// Zero-init the MCDC temp value.
	void maybeResetMCDCCondBitmap(const Expr *E) {
	if (isMCDCCoverageEnabled() && isBinaryLogicalOp(E)) {
	PGO.emitMCDCCondBitmapReset(Builder, E, MCDCCondBitmapAddr);
	PGO.setCurrentStmt(E);
	}
	}

	/// Increment the profiler's counter for the given expression by \p StepV.
	/// If \p StepV is null, the default increment is 1.
	void maybeUpdateMCDCTestVectorBitmap(const Expr *E) {
	if (isMCDCCoverageEnabled() && isBinaryLogicalOp(E)) {
	PGO.emitMCDCTestVectorBitmapUpdate(Builder, E, MCDCCondBitmapAddr, *this);
	PGO.setCurrentStmt(E);
	}
	}

	/// Update the MCDC temp value with the condition's evaluated result.
	void maybeUpdateMCDCCondBitmap(const Expr E, llvm::Value Val) {
	if (isMCDCCoverageEnabled()) {
	PGO.emitMCDCCondBitmapUpdate(Builder, E, MCDCCondBitmapAddr, Val, *this);
	PGO.setCurrentStmt(E);
	}
	}

	/// Get the profiler's count for the given statement.
	uint64_t getProfileCount(const Stmt *S) {
	return PGO.getStmtCount(S).value_or(0);
	}

	/// Set the profiler's current count.
	void setCurrentProfileCount(uint64_t Count) {
	PGO.setCurrentRegionCount(Count);
	}

	/// Get the profiler's current count. This is generally the count for the most
	/// recently incremented counter.
	uint64_t getCurrentProfileCount() {
	return PGO.getCurrentRegionCount();
	}

	private:

	/// SwitchInsn - This is nearest current switch instruction. It is null if
	/// current context is not in a switch.
	llvm::SwitchInst *SwitchInsn = nullptr;
	/// The branch weights of SwitchInsn when doing instrumentation based PGO.
	SmallVector<uint64_t, 16> *SwitchWeights = nullptr;

	/// The likelihood attributes of the SwitchCase.
	SmallVector<Stmt::Likelihood, 16> *SwitchLikelihood = nullptr;

	/// CaseRangeBlock - This block holds if condition check for last case
	/// statement range in current switch instruction.
	llvm::BasicBlock *CaseRangeBlock = nullptr;

	/// OpaqueLValues - Keeps track of the current set of opaque value
	/// expressions.
	llvm::DenseMap<const OpaqueValueExpr *, LValue> OpaqueLValues;
	llvm::DenseMap<const OpaqueValueExpr *, RValue> OpaqueRValues;

	// VLASizeMap - This keeps track of the associated size for each VLA type.
	// We track this by the size expression rather than the type itself because
	// in certain situations, like a const qualifier applied to an VLA typedef,
	// multiple VLA types can share the same size expression.
	// FIXME: Maybe this could be a stack of maps that is pushed/popped as we
	// enter/leave scopes.
	llvm::DenseMap<const Expr, llvm::Value> VLASizeMap;

	/// A block containing a single 'unreachable' instruction. Created
	/// lazily by getUnreachableBlock().
	llvm::BasicBlock *UnreachableBlock = nullptr;

	/// Counts of the number return expressions in the function.
	unsigned NumReturnExprs = 0;

	/// Count the number of simple (constant) return expressions in the function.
	unsigned NumSimpleReturnExprs = 0;

	/// The last regular (non-return) debug location (breakpoint) in the function.
	SourceLocation LastStopPoint;

	public:
	/// Source location information about the default argument or member
	/// initializer expression we're evaluating, if any.
	CurrentSourceLocExprScope CurSourceLocExprScope;
	using SourceLocExprScopeGuard =
	CurrentSourceLocExprScope::SourceLocExprScopeGuard;

	/// A scope within which we are constructing the fields of an object which
	/// might use a CXXDefaultInitExpr. This stashes away a 'this' value to use
	/// if we need to evaluate a CXXDefaultInitExpr within the evaluation.
	class FieldConstructionScope {
	public:
	FieldConstructionScope(CodeGenFunction &CGF, Address This)
	: CGF(CGF), OldCXXDefaultInitExprThis(CGF.CXXDefaultInitExprThis) {
	CGF.CXXDefaultInitExprThis = This;
	}
	~FieldConstructionScope() {
	CGF.CXXDefaultInitExprThis = OldCXXDefaultInitExprThis;
	}

	private:
	CodeGenFunction &CGF;
	Address OldCXXDefaultInitExprThis;
	};

	/// The scope of a CXXDefaultInitExpr. Within this scope, the value of 'this'
	/// is overridden to be the object under construction.
	class CXXDefaultInitExprScope {
	public:
	CXXDefaultInitExprScope(CodeGenFunction &CGF, const CXXDefaultInitExpr *E)
	: CGF(CGF), OldCXXThisValue(CGF.CXXThisValue),
	OldCXXThisAlignment(CGF.CXXThisAlignment),
	SourceLocScope(E, CGF.CurSourceLocExprScope) {
	CGF.CXXThisValue = CGF.CXXDefaultInitExprThis.getBasePointer();
	CGF.CXXThisAlignment = CGF.CXXDefaultInitExprThis.getAlignment();
	}
	~CXXDefaultInitExprScope() {
	CGF.CXXThisValue = OldCXXThisValue;
	CGF.CXXThisAlignment = OldCXXThisAlignment;
	}

	public:
	CodeGenFunction &CGF;
	llvm::Value *OldCXXThisValue;
	CharUnits OldCXXThisAlignment;
	SourceLocExprScopeGuard SourceLocScope;
	};

	struct CXXDefaultArgExprScope : SourceLocExprScopeGuard {
	CXXDefaultArgExprScope(CodeGenFunction &CGF, const CXXDefaultArgExpr *E)
	: SourceLocExprScopeGuard(E, CGF.CurSourceLocExprScope) {}
	};

	/// The scope of an ArrayInitLoopExpr. Within this scope, the value of the
	/// current loop index is overridden.
	class ArrayInitLoopExprScope {
	public:
	ArrayInitLoopExprScope(CodeGenFunction &CGF, llvm::Value *Index)
	: CGF(CGF), OldArrayInitIndex(CGF.ArrayInitIndex) {
	CGF.ArrayInitIndex = Index;
	}
	~ArrayInitLoopExprScope() {
	CGF.ArrayInitIndex = OldArrayInitIndex;
	}

	private:
	CodeGenFunction &CGF;
	llvm::Value *OldArrayInitIndex;
	};

	class InlinedInheritingConstructorScope {
	public:
	InlinedInheritingConstructorScope(CodeGenFunction &CGF, GlobalDecl GD)
	: CGF(CGF), OldCurGD(CGF.CurGD), OldCurFuncDecl(CGF.CurFuncDecl),
	OldCurCodeDecl(CGF.CurCodeDecl),
	OldCXXABIThisDecl(CGF.CXXABIThisDecl),
	OldCXXABIThisValue(CGF.CXXABIThisValue),
	OldCXXThisValue(CGF.CXXThisValue),
	OldCXXABIThisAlignment(CGF.CXXABIThisAlignment),
	OldCXXThisAlignment(CGF.CXXThisAlignment),
	OldReturnValue(CGF.ReturnValue), OldFnRetTy(CGF.FnRetTy),
	OldCXXInheritedCtorInitExprArgs(
	std::move(CGF.CXXInheritedCtorInitExprArgs)) {
	CGF.CurGD = GD;
	CGF.CurFuncDecl = CGF.CurCodeDecl =
	cast<CXXConstructorDecl>(GD.getDecl());
	CGF.CXXABIThisDecl = nullptr;
	CGF.CXXABIThisValue = nullptr;
	CGF.CXXThisValue = nullptr;
	CGF.CXXABIThisAlignment = CharUnits();
	CGF.CXXThisAlignment = CharUnits();
	CGF.ReturnValue = Address::invalid();
	CGF.FnRetTy = QualType();
	CGF.CXXInheritedCtorInitExprArgs.clear();
	}
	~InlinedInheritingConstructorScope() {
	CGF.CurGD = OldCurGD;
	CGF.CurFuncDecl = OldCurFuncDecl;
	CGF.CurCodeDecl = OldCurCodeDecl;
	CGF.CXXABIThisDecl = OldCXXABIThisDecl;
	CGF.CXXABIThisValue = OldCXXABIThisValue;
	CGF.CXXThisValue = OldCXXThisValue;
	CGF.CXXABIThisAlignment = OldCXXABIThisAlignment;
	CGF.CXXThisAlignment = OldCXXThisAlignment;
	CGF.ReturnValue = OldReturnValue;
	CGF.FnRetTy = OldFnRetTy;
	CGF.CXXInheritedCtorInitExprArgs =
	std::move(OldCXXInheritedCtorInitExprArgs);
	}

	private:
	CodeGenFunction &CGF;
	GlobalDecl OldCurGD;
	const Decl *OldCurFuncDecl;
	const Decl *OldCurCodeDecl;
	ImplicitParamDecl *OldCXXABIThisDecl;
	llvm::Value *OldCXXABIThisValue;
	llvm::Value *OldCXXThisValue;
	CharUnits OldCXXABIThisAlignment;
	CharUnits OldCXXThisAlignment;
	Address OldReturnValue;
	QualType OldFnRetTy;
	CallArgList OldCXXInheritedCtorInitExprArgs;
	};

	// Helper class for the OpenMP IR Builder. Allows reusability of code used for
	// region body, and finalization codegen callbacks. This will class will also
	// contain privatization functions used by the privatization call backs
	//
	// TODO: this is temporary class for things that are being moved out of
	// CGOpenMPRuntime, new versions of current CodeGenFunction methods, or
	// utility function for use with the OMPBuilder. Once that move to use the
	// OMPBuilder is done, everything here will either become part of CodeGenFunc.
	// directly, or a new helper class that will contain functions used by both
	// this and the OMPBuilder

	struct OMPBuilderCBHelpers {

	OMPBuilderCBHelpers() = delete;
	OMPBuilderCBHelpers(const OMPBuilderCBHelpers &) = delete;
	OMPBuilderCBHelpers &operator=(const OMPBuilderCBHelpers &) = delete;

	using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;

	/// Cleanup action for allocate support.
	class OMPAllocateCleanupTy final : public EHScopeStack::Cleanup {

	private:
	llvm::CallInst *RTLFnCI;

	public:
	OMPAllocateCleanupTy(llvm::CallInst *RLFnCI) : RTLFnCI(RLFnCI) {
	RLFnCI->removeFromParent();
	}

	void Emit(CodeGenFunction &CGF, Flags /flags/) override {
	if (!CGF.HaveInsertPoint())
	return;
	CGF.Builder.Insert(RTLFnCI);
	}
	};

	/// Returns address of the threadprivate variable for the current
	/// thread. This Also create any necessary OMP runtime calls.
	///
	/// \param VD VarDecl for Threadprivate variable.
	/// \param VDAddr Address of the Vardecl
	/// \param Loc The location where the barrier directive was encountered
	static Address getAddrOfThreadPrivate(CodeGenFunction &CGF,
	const VarDecl *VD, Address VDAddr,
	SourceLocation Loc);

	/// Gets the OpenMP-specific address of the local variable /p VD.
	static Address getAddressOfLocalVariable(CodeGenFunction &CGF,
	const VarDecl *VD);
	/// Get the platform-specific name separator.
	/// \param Parts different parts of the final name that needs separation
	/// \param FirstSeparator First separator used between the initial two
	/// parts of the name.
	/// \param Separator separator used between all of the rest consecutinve
	/// parts of the name
	static std::string getNameWithSeparators(ArrayRef<StringRef> Parts,
	StringRef FirstSeparator = ".",
	StringRef Separator = ".");
	/// Emit the Finalization for an OMP region
	/// \param CGF The Codegen function this belongs to
	/// \param IP Insertion point for generating the finalization code.
	static void FinalizeOMPRegion(CodeGenFunction &CGF, InsertPointTy IP) {
	CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
	assert(IP.getBlock()->end() != IP.getPoint() &&
	"OpenMP IR Builder should cause terminated block!");

	llvm::BasicBlock *IPBB = IP.getBlock();
	llvm::BasicBlock *DestBB = IPBB->getUniqueSuccessor();
	assert(DestBB && "Finalization block should have one successor!");

	// erase and replace with cleanup branch.
	IPBB->getTerminator()->eraseFromParent();
	CGF.Builder.SetInsertPoint(IPBB);
	CodeGenFunction::JumpDest Dest = CGF.getJumpDestInCurrentScope(DestBB);
	CGF.EmitBranchThroughCleanup(Dest);
	}

	/// Emit the body of an OMP region
	/// \param CGF The Codegen function this belongs to
	/// \param RegionBodyStmt The body statement for the OpenMP region being
	/// generated
	/// \param AllocaIP Where to insert alloca instructions
	/// \param CodeGenIP Where to insert the region code
	/// \param RegionName Name to be used for new blocks
	static void EmitOMPInlinedRegionBody(CodeGenFunction &CGF,
	const Stmt *RegionBodyStmt,
	InsertPointTy AllocaIP,
	InsertPointTy CodeGenIP,
	Twine RegionName);

	static void EmitCaptureStmt(CodeGenFunction &CGF, InsertPointTy CodeGenIP,
	llvm::BasicBlock &FiniBB, llvm::Function *Fn,
	ArrayRef<llvm::Value *> Args) {
	llvm::BasicBlock *CodeGenIPBB = CodeGenIP.getBlock();
	if (llvm::Instruction *CodeGenIPBBTI = CodeGenIPBB->getTerminator())
	CodeGenIPBBTI->eraseFromParent();

	CGF.Builder.SetInsertPoint(CodeGenIPBB);

	if (Fn->doesNotThrow())
	CGF.EmitNounwindRuntimeCall(Fn, Args);
	else
	CGF.EmitRuntimeCall(Fn, Args);

	if (CGF.Builder.saveIP().isSet())
	CGF.Builder.CreateBr(&FiniBB);
	}

	/// Emit the body of an OMP region that will be outlined in
	/// OpenMPIRBuilder::finalize().
	/// \param CGF The Codegen function this belongs to
	/// \param RegionBodyStmt The body statement for the OpenMP region being
	/// generated
	/// \param AllocaIP Where to insert alloca instructions
	/// \param CodeGenIP Where to insert the region code
	/// \param RegionName Name to be used for new blocks
	static void EmitOMPOutlinedRegionBody(CodeGenFunction &CGF,
	const Stmt *RegionBodyStmt,
	InsertPointTy AllocaIP,
	InsertPointTy CodeGenIP,
	Twine RegionName);

	/// RAII for preserving necessary info during Outlined region body codegen.
	class OutlinedRegionBodyRAII {

	llvm::AssertingVH<llvm::Instruction> OldAllocaIP;
	CodeGenFunction::JumpDest OldReturnBlock;
	CodeGenFunction &CGF;

	public:
	OutlinedRegionBodyRAII(CodeGenFunction &cgf, InsertPointTy &AllocaIP,
	llvm::BasicBlock &RetBB)
	: CGF(cgf) {
	assert(AllocaIP.isSet() &&
	"Must specify Insertion point for allocas of outlined function");
	OldAllocaIP = CGF.AllocaInsertPt;
	CGF.AllocaInsertPt = &*AllocaIP.getPoint();

	OldReturnBlock = CGF.ReturnBlock;
	CGF.ReturnBlock = CGF.getJumpDestInCurrentScope(&RetBB);
	}

	~OutlinedRegionBodyRAII() {
	CGF.AllocaInsertPt = OldAllocaIP;
	CGF.ReturnBlock = OldReturnBlock;
	}
	};

	/// RAII for preserving necessary info during inlined region body codegen.
	class InlinedRegionBodyRAII {

	llvm::AssertingVH<llvm::Instruction> OldAllocaIP;
	CodeGenFunction &CGF;

	public:
	InlinedRegionBodyRAII(CodeGenFunction &cgf, InsertPointTy &AllocaIP,
	llvm::BasicBlock &FiniBB)
	: CGF(cgf) {
	// Alloca insertion block should be in the entry block of the containing
	// function so it expects an empty AllocaIP in which case will reuse the
	// old alloca insertion point, or a new AllocaIP in the same block as
	// the old one
	assert((!AllocaIP.isSet() \|\|
	CGF.AllocaInsertPt->getParent() == AllocaIP.getBlock()) &&
	"Insertion point should be in the entry block of containing "
	"function!");
	OldAllocaIP = CGF.AllocaInsertPt;
	if (AllocaIP.isSet())
	CGF.AllocaInsertPt = &*AllocaIP.getPoint();

	// TODO: Remove the call, after making sure the counter is not used by
	// the EHStack.
	// Since this is an inlined region, it should not modify the
	// ReturnBlock, and should reuse the one for the enclosing outlined
	// region. So, the JumpDest being return by the function is discarded
	(void)CGF.getJumpDestInCurrentScope(&FiniBB);
	}

	~InlinedRegionBodyRAII() { CGF.AllocaInsertPt = OldAllocaIP; }
	};
	};

	private:
	/// CXXThisDecl - When generating code for a C++ member function,
	/// this will hold the implicit 'this' declaration.
	ImplicitParamDecl *CXXABIThisDecl = nullptr;
	llvm::Value *CXXABIThisValue = nullptr;
	llvm::Value *CXXThisValue = nullptr;
	CharUnits CXXABIThisAlignment;
	CharUnits CXXThisAlignment;

	/// The value of 'this' to use when evaluating CXXDefaultInitExprs within
	/// this expression.
	Address CXXDefaultInitExprThis = Address::invalid();

	/// The current array initialization index when evaluating an
	/// ArrayInitIndexExpr within an ArrayInitLoopExpr.
	llvm::Value *ArrayInitIndex = nullptr;

	/// The values of function arguments to use when evaluating
	/// CXXInheritedCtorInitExprs within this context.
	CallArgList CXXInheritedCtorInitExprArgs;

	/// CXXStructorImplicitParamDecl - When generating code for a constructor or
	/// destructor, this will hold the implicit argument (e.g. VTT).
	ImplicitParamDecl *CXXStructorImplicitParamDecl = nullptr;
	llvm::Value *CXXStructorImplicitParamValue = nullptr;

	/// OutermostConditional - Points to the outermost active
	/// conditional control. This is used so that we know if a
	/// temporary should be destroyed conditionally.
	ConditionalEvaluation *OutermostConditional = nullptr;

	/// The current lexical scope.
	LexicalScope *CurLexicalScope = nullptr;

	/// The current source location that should be used for exception
	/// handling code.
	SourceLocation CurEHLocation;

	/// BlockByrefInfos - For each __block variable, contains
	/// information about the layout of the variable.
	llvm::DenseMap<const ValueDecl *, BlockByrefInfo> BlockByrefInfos;

	/// Used by -fsanitize=nullability-return to determine whether the return
	/// value can be checked.
	llvm::Value *RetValNullabilityPrecondition = nullptr;

	/// Check if -fsanitize=nullability-return instrumentation is required for
	/// this function.
	bool requiresReturnValueNullabilityCheck() const {
	return RetValNullabilityPrecondition;
	}

	/// Used to store precise source locations for return statements by the
	/// runtime return value checks.
	Address ReturnLocation = Address::invalid();

	/// Check if the return value of this function requires sanitization.
	bool requiresReturnValueCheck() const;

	bool isInAllocaArgument(CGCXXABI &ABI, QualType Ty);
	bool hasInAllocaArg(const CXXMethodDecl *MD);

	llvm::BasicBlock *TerminateLandingPad = nullptr;
	llvm::BasicBlock *TerminateHandler = nullptr;
	llvm::SmallVector<llvm::BasicBlock *, 2> TrapBBs;

	/// Terminate funclets keyed by parent funclet pad.
	llvm::MapVector<llvm::Value , llvm::BasicBlock > TerminateFunclets;

	/// Largest vector width used in ths function. Will be used to create a
	/// function attribute.
	unsigned LargestVectorWidth = 0;

	/// True if we need emit the life-time markers. This is initially set in
	/// the constructor, but could be overwritten to true if this is a coroutine.
	bool ShouldEmitLifetimeMarkers;

	/// Add OpenCL kernel arg metadata and the kernel attribute metadata to
	/// the function metadata.
	void EmitKernelMetadata(const FunctionDecl FD, llvm::Function Fn);

	public:
	CodeGenFunction(CodeGenModule &cgm, bool suppressNewContext=false);
	~CodeGenFunction();

	CodeGenTypes &getTypes() const { return CGM.getTypes(); }
	ASTContext &getContext() const { return CGM.getContext(); }
	CGDebugInfo *getDebugInfo() {
	if (DisableDebugInfo)
	return nullptr;
	return DebugInfo;
	}
	void disableDebugInfo() { DisableDebugInfo = true; }
	void enableDebugInfo() { DisableDebugInfo = false; }

	bool shouldUseFusedARCCalls() {
	return CGM.getCodeGenOpts().OptimizationLevel == 0;
	}

	const LangOptions &getLangOpts() const { return CGM.getLangOpts(); }

	/// Returns a pointer to the function's exception object and selector slot,
	/// which is assigned in every landing pad.
	Address getExceptionSlot();
	Address getEHSelectorSlot();

	/// Returns the contents of the function's exception object and selector
	/// slots.
	llvm::Value *getExceptionFromSlot();
	llvm::Value *getSelectorFromSlot();

	RawAddress getNormalCleanupDestSlot();

	llvm::BasicBlock *getUnreachableBlock() {
	if (!UnreachableBlock) {
	UnreachableBlock = createBasicBlock("unreachable");
	new llvm::UnreachableInst(getLLVMContext(), UnreachableBlock);
	}
	return UnreachableBlock;
	}

	llvm::BasicBlock *getInvokeDest() {
	if (!EHStack.requiresLandingPad()) return nullptr;
	return getInvokeDestImpl();
	}

	bool currentFunctionUsesSEHTry() const { return !!CurSEHParent; }

	const TargetInfo &getTarget() const { return Target; }
	llvm::LLVMContext &getLLVMContext() { return CGM.getLLVMContext(); }
	const TargetCodeGenInfo &getTargetHooks() const {
	return CGM.getTargetCodeGenInfo();
	}

	//===--------------------------------------------------------------------===//
	// Cleanups
	//===--------------------------------------------------------------------===//

	typedef void Destroyer(CodeGenFunction &CGF, Address addr, QualType ty);

	void pushIrregularPartialArrayCleanup(llvm::Value *arrayBegin,
	Address arrayEndPointer,
	QualType elementType,
	CharUnits elementAlignment,
	Destroyer *destroyer);
	void pushRegularPartialArrayCleanup(llvm::Value *arrayBegin,
	llvm::Value *arrayEnd,
	QualType elementType,
	CharUnits elementAlignment,
	Destroyer *destroyer);

	void pushDestroy(QualType::DestructionKind dtorKind,
	Address addr, QualType type);
	void pushEHDestroy(QualType::DestructionKind dtorKind,
	Address addr, QualType type);
	void pushDestroy(CleanupKind kind, Address addr, QualType type,
	Destroyer *destroyer, bool useEHCleanupForArray);
	void pushDestroyAndDeferDeactivation(QualType::DestructionKind dtorKind,
	Address addr, QualType type);
	void pushDestroyAndDeferDeactivation(CleanupKind cleanupKind, Address addr,
	QualType type, Destroyer *destroyer,
	bool useEHCleanupForArray);
	void pushLifetimeExtendedDestroy(CleanupKind kind, Address addr,
	QualType type, Destroyer *destroyer,
	bool useEHCleanupForArray);
	void pushCallObjectDeleteCleanup(const FunctionDecl *OperatorDelete,
	llvm::Value *CompletePtr,
	QualType ElementType);
	void pushStackRestore(CleanupKind kind, Address SPMem);
	void pushKmpcAllocFree(CleanupKind Kind,
	std::pair<llvm::Value , llvm::Value > AddrSizePair);
	void emitDestroy(Address addr, QualType type, Destroyer *destroyer,
	bool useEHCleanupForArray);
	llvm::Function *generateDestroyHelper(Address addr, QualType type,
	Destroyer *destroyer,
	bool useEHCleanupForArray,
	const VarDecl *VD);
	void emitArrayDestroy(llvm::Value begin, llvm::Value end,
	QualType elementType, CharUnits elementAlign,
	Destroyer *destroyer,
	bool checkZeroLength, bool useEHCleanup);

	Destroyer *getDestroyer(QualType::DestructionKind destructionKind);

	/// Determines whether an EH cleanup is required to destroy a type
	/// with the given destruction kind.
	bool needsEHCleanup(QualType::DestructionKind kind) {
	switch (kind) {
	case QualType::DK_none:
	return false;
	case QualType::DK_cxx_destructor:
	case QualType::DK_objc_weak_lifetime:
	case QualType::DK_nontrivial_c_struct:
	return getLangOpts().Exceptions;
	case QualType::DK_objc_strong_lifetime:
	return getLangOpts().Exceptions &&
	CGM.getCodeGenOpts().ObjCAutoRefCountExceptions;
	}
	llvm_unreachable("bad destruction kind");
	}

	CleanupKind getCleanupKind(QualType::DestructionKind kind) {
	return (needsEHCleanup(kind) ? NormalAndEHCleanup : NormalCleanup);
	}

	//===--------------------------------------------------------------------===//
	// Objective-C
	//===--------------------------------------------------------------------===//

	void GenerateObjCMethod(const ObjCMethodDecl *OMD);

	void StartObjCMethod(const ObjCMethodDecl MD, const ObjCContainerDecl CD);

	/// GenerateObjCGetter - Synthesize an Objective-C property getter function.
	void GenerateObjCGetter(ObjCImplementationDecl *IMP,
	const ObjCPropertyImplDecl *PID);
	void generateObjCGetterBody(const ObjCImplementationDecl *classImpl,
	const ObjCPropertyImplDecl *propImpl,
	const ObjCMethodDecl *GetterMothodDecl,
	llvm::Constant *AtomicHelperFn);

	void GenerateObjCCtorDtorMethod(ObjCImplementationDecl *IMP,
	ObjCMethodDecl *MD, bool ctor);

	/// GenerateObjCSetter - Synthesize an Objective-C property setter function
	/// for the given property.
	void GenerateObjCSetter(ObjCImplementationDecl *IMP,
	const ObjCPropertyImplDecl *PID);
	void generateObjCSetterBody(const ObjCImplementationDecl *classImpl,
	const ObjCPropertyImplDecl *propImpl,
	llvm::Constant *AtomicHelperFn);

	//===--------------------------------------------------------------------===//
	// Block Bits
	//===--------------------------------------------------------------------===//

	/// Emit block literal.
	/// \return an LLVM value which is a pointer to a struct which contains
	/// information about the block, including the block invoke function, the
	/// captured variables, etc.
	llvm::Value EmitBlockLiteral(const BlockExpr );

	llvm::Function *GenerateBlockFunction(GlobalDecl GD,
	const CGBlockInfo &Info,
	const DeclMapTy &ldm,
	bool IsLambdaConversionToBlock,
	bool BuildGlobalBlock);

	/// Check if \p T is a C++ class that has a destructor that can throw.
	static bool cxxDestructorCanThrow(QualType T);

	llvm::Constant *GenerateCopyHelperFunction(const CGBlockInfo &blockInfo);
	llvm::Constant *GenerateDestroyHelperFunction(const CGBlockInfo &blockInfo);
	llvm::Constant *GenerateObjCAtomicSetterCopyHelperFunction(
	const ObjCPropertyImplDecl *PID);
	llvm::Constant *GenerateObjCAtomicGetterCopyHelperFunction(
	const ObjCPropertyImplDecl *PID);
	llvm::Value EmitBlockCopyAndAutorelease(llvm::Value Block, QualType Ty);

	void BuildBlockRelease(llvm::Value *DeclPtr, BlockFieldFlags flags,
	bool CanThrow);

	class AutoVarEmission;

	void emitByrefStructureInit(const AutoVarEmission &emission);

	/// Enter a cleanup to destroy a __block variable. Note that this
	/// cleanup should be a no-op if the variable hasn't left the stack
	/// yet; if a cleanup is required for the variable itself, that needs
	/// to be done externally.
	///
	/// \param Kind Cleanup kind.
	///
	/// \param Addr When \p LoadBlockVarAddr is false, the address of the __block
	/// structure that will be passed to _Block_object_dispose. When
	/// \p LoadBlockVarAddr is true, the address of the field of the block
	/// structure that holds the address of the __block structure.
	///
	/// \param Flags The flag that will be passed to _Block_object_dispose.
	///
	/// \param LoadBlockVarAddr Indicates whether we need to emit a load from
	/// \p Addr to get the address of the __block structure.
	void enterByrefCleanup(CleanupKind Kind, Address Addr, BlockFieldFlags Flags,
	bool LoadBlockVarAddr, bool CanThrow);

	void setBlockContextParameter(const ImplicitParamDecl *D, unsigned argNum,
	llvm::Value *ptr);

	Address LoadBlockStruct();
	Address GetAddrOfBlockDecl(const VarDecl *var);

	/// BuildBlockByrefAddress - Computes the location of the
	/// data in a variable which is declared as __block.
	Address emitBlockByrefAddress(Address baseAddr, const VarDecl *V,
	bool followForward = true);
	Address emitBlockByrefAddress(Address baseAddr,
	const BlockByrefInfo &info,
	bool followForward,
	const llvm::Twine &name);

	const BlockByrefInfo &getBlockByrefInfo(const VarDecl *var);

	QualType BuildFunctionArgList(GlobalDecl GD, FunctionArgList &Args);

	void GenerateCode(GlobalDecl GD, llvm::Function *Fn,
	const CGFunctionInfo &FnInfo);

	/// Annotate the function with an attribute that disables TSan checking at
	/// runtime.
	void markAsIgnoreThreadCheckingAtRuntime(llvm::Function *Fn);

	/// Emit code for the start of a function.
	/// \param Loc The location to be associated with the function.
	/// \param StartLoc The location of the function body.
	void StartFunction(GlobalDecl GD,
	QualType RetTy,
	llvm::Function *Fn,
	const CGFunctionInfo &FnInfo,
	const FunctionArgList &Args,
	SourceLocation Loc = SourceLocation(),
	SourceLocation StartLoc = SourceLocation());

	static bool IsConstructorDelegationValid(const CXXConstructorDecl *Ctor);

	void EmitConstructorBody(FunctionArgList &Args);
	void EmitDestructorBody(FunctionArgList &Args);
	void emitImplicitAssignmentOperatorBody(FunctionArgList &Args);
	void EmitFunctionBody(const Stmt *Body);
	void EmitBlockWithFallThrough(llvm::BasicBlock BB, const Stmt S);

	void EmitForwardingCallToLambda(const CXXMethodDecl *LambdaCallOperator,
	CallArgList &CallArgs,
	const CGFunctionInfo *CallOpFnInfo = nullptr,
	llvm::Constant *CallOpFn = nullptr);
	void EmitLambdaBlockInvokeBody();
	void EmitLambdaStaticInvokeBody(const CXXMethodDecl *MD);
	void EmitLambdaDelegatingInvokeBody(const CXXMethodDecl *MD,
	CallArgList &CallArgs);
	void EmitLambdaInAllocaImplFn(const CXXMethodDecl *CallOp,
	const CGFunctionInfo **ImplFnInfo,
	llvm::Function **ImplFn);
	void EmitLambdaInAllocaCallOpBody(const CXXMethodDecl *MD);
	void EmitLambdaVLACapture(const VariableArrayType *VAT, LValue LV) {
	EmitStoreThroughLValue(RValue::get(VLASizeMap[VAT->getSizeExpr()]), LV);
	}
	void EmitAsanPrologueOrEpilogue(bool Prologue);

	/// Emit the unified return block, trying to avoid its emission when
	/// possible.
	/// \return The debug location of the user written return statement if the
	/// return block is avoided.
	llvm::DebugLoc EmitReturnBlock();

	/// FinishFunction - Complete IR generation of the current function. It is
	/// legal to call this function even if there is no current insertion point.
	void FinishFunction(SourceLocation EndLoc=SourceLocation());

	void StartThunk(llvm::Function *Fn, GlobalDecl GD,
	const CGFunctionInfo &FnInfo, bool IsUnprototyped);

	void EmitCallAndReturnForThunk(llvm::FunctionCallee Callee,
	const ThunkInfo *Thunk, bool IsUnprototyped);

	void FinishThunk();

	/// Emit a musttail call for a thunk with a potentially adjusted this pointer.
	void EmitMustTailThunk(GlobalDecl GD, llvm::Value *AdjustedThisPtr,
	llvm::FunctionCallee Callee);

	/// Generate a thunk for the given method.
	void generateThunk(llvm::Function *Fn, const CGFunctionInfo &FnInfo,
	GlobalDecl GD, const ThunkInfo &Thunk,
	bool IsUnprototyped);

	llvm::Function GenerateVarArgsThunk(llvm::Function Fn,
	const CGFunctionInfo &FnInfo,
	GlobalDecl GD, const ThunkInfo &Thunk);

	void EmitCtorPrologue(const CXXConstructorDecl *CD, CXXCtorType Type,
	FunctionArgList &Args);

	void EmitInitializerForField(FieldDecl Field, LValue LHS, Expr Init);

	/// Struct with all information about dynamic [sub]class needed to set vptr.
	struct VPtr {
	BaseSubobject Base;
	const CXXRecordDecl *NearestVBase;
	CharUnits OffsetFromNearestVBase;
	const CXXRecordDecl *VTableClass;
	};

	/// Initialize the vtable pointer of the given subobject.
	void InitializeVTablePointer(const VPtr &vptr);

	typedef llvm::SmallVector<VPtr, 4> VPtrsVector;

	typedef llvm::SmallPtrSet<const CXXRecordDecl *, 4> VisitedVirtualBasesSetTy;
	VPtrsVector getVTablePointers(const CXXRecordDecl *VTableClass);

	void getVTablePointers(BaseSubobject Base, const CXXRecordDecl *NearestVBase,
	CharUnits OffsetFromNearestVBase,
	bool BaseIsNonVirtualPrimaryBase,
	const CXXRecordDecl *VTableClass,
	VisitedVirtualBasesSetTy &VBases, VPtrsVector &vptrs);

	void InitializeVTablePointers(const CXXRecordDecl *ClassDecl);

	// VTableTrapMode - whether we guarantee that loading the
	// vtable is guaranteed to trap on authentication failure,
	// even if the resulting vtable pointer is unused.
	enum class VTableAuthMode {
	Authenticate,
	MustTrap,
	UnsafeUbsanStrip // Should only be used for Vptr UBSan check
	};
	/// GetVTablePtr - Return the Value of the vtable pointer member pointed
	/// to by This.
	llvm::Value *
	GetVTablePtr(Address This, llvm::Type *VTableTy,
	const CXXRecordDecl *VTableClass,
	VTableAuthMode AuthMode = VTableAuthMode::Authenticate);

	enum CFITypeCheckKind {
	CFITCK_VCall,
	CFITCK_NVCall,
	CFITCK_DerivedCast,
	CFITCK_UnrelatedCast,
	CFITCK_ICall,
	CFITCK_NVMFCall,
	CFITCK_VMFCall,
	};

	/// Derived is the presumed address of an object of type T after a
	/// cast. If T is a polymorphic class type, emit a check that the virtual
	/// table for Derived belongs to a class derived from T.
	void EmitVTablePtrCheckForCast(QualType T, Address Derived, bool MayBeNull,
	CFITypeCheckKind TCK, SourceLocation Loc);

	/// EmitVTablePtrCheckForCall - Virtual method MD is being called via VTable.
	/// If vptr CFI is enabled, emit a check that VTable is valid.
	void EmitVTablePtrCheckForCall(const CXXRecordDecl RD, llvm::Value VTable,
	CFITypeCheckKind TCK, SourceLocation Loc);

	/// EmitVTablePtrCheck - Emit a check that VTable is a valid virtual table for
	/// RD using llvm.type.test.
	void EmitVTablePtrCheck(const CXXRecordDecl RD, llvm::Value VTable,
	CFITypeCheckKind TCK, SourceLocation Loc);

	/// If whole-program virtual table optimization is enabled, emit an assumption
	/// that VTable is a member of RD's type identifier. Or, if vptr CFI is
	/// enabled, emit a check that VTable is a member of RD's type identifier.
	void EmitTypeMetadataCodeForVCall(const CXXRecordDecl *RD,
	llvm::Value *VTable, SourceLocation Loc);

	/// Returns whether we should perform a type checked load when loading a
	/// virtual function for virtual calls to members of RD. This is generally
	/// true when both vcall CFI and whole-program-vtables are enabled.
	bool ShouldEmitVTableTypeCheckedLoad(const CXXRecordDecl *RD);

	/// Emit a type checked load from the given vtable.
	llvm::Value EmitVTableTypeCheckedLoad(const CXXRecordDecl RD,
	llvm::Value *VTable,
	llvm::Type *VTableTy,
	uint64_t VTableByteOffset);

	/// EnterDtorCleanups - Enter the cleanups necessary to complete the
	/// given phase of destruction for a destructor. The end result
	/// should call destructors on members and base classes in reverse
	/// order of their construction.
	void EnterDtorCleanups(const CXXDestructorDecl *Dtor, CXXDtorType Type);

	/// ShouldInstrumentFunction - Return true if the current function should be
	/// instrumented with __cyg_profile_func_* calls
	bool ShouldInstrumentFunction();

	/// ShouldSkipSanitizerInstrumentation - Return true if the current function
	/// should not be instrumented with sanitizers.
	bool ShouldSkipSanitizerInstrumentation();

	/// ShouldXRayInstrument - Return true if the current function should be
	/// instrumented with XRay nop sleds.
	bool ShouldXRayInstrumentFunction() const;

	/// AlwaysEmitXRayCustomEvents - Return true if we must unconditionally emit
	/// XRay custom event handling calls.
	bool AlwaysEmitXRayCustomEvents() const;

	/// AlwaysEmitXRayTypedEvents - Return true if clang must unconditionally emit
	/// XRay typed event handling calls.
	bool AlwaysEmitXRayTypedEvents() const;

	/// Return a type hash constant for a function instrumented by
	/// -fsanitize=function.
	llvm::ConstantInt *getUBSanFunctionTypeHash(QualType T) const;

	/// EmitFunctionProlog - Emit the target specific LLVM code to load the
	/// arguments for the given function. This is also responsible for naming the
	/// LLVM function arguments.
	void EmitFunctionProlog(const CGFunctionInfo &FI,
	llvm::Function *Fn,
	const FunctionArgList &Args);

	/// EmitFunctionEpilog - Emit the target specific LLVM code to return the
	/// given temporary.
	void EmitFunctionEpilog(const CGFunctionInfo &FI, bool EmitRetDbgLoc,
	SourceLocation EndLoc);

	/// Emit a test that checks if the return value \p RV is nonnull.
	void EmitReturnValueCheck(llvm::Value *RV);

	/// EmitStartEHSpec - Emit the start of the exception spec.
	void EmitStartEHSpec(const Decl *D);

	/// EmitEndEHSpec - Emit the end of the exception spec.
	void EmitEndEHSpec(const Decl *D);

	/// getTerminateLandingPad - Return a landing pad that just calls terminate.
	llvm::BasicBlock *getTerminateLandingPad();

	/// getTerminateLandingPad - Return a cleanup funclet that just calls
	/// terminate.
	llvm::BasicBlock *getTerminateFunclet();

	/// getTerminateHandler - Return a handler (not a landing pad, just
	/// a catch handler) that just calls terminate. This is used when
	/// a terminate scope encloses a try.
	llvm::BasicBlock *getTerminateHandler();

	llvm::Type *ConvertTypeForMem(QualType T);
	llvm::Type *ConvertType(QualType T);
	llvm::Type *convertTypeForLoadStore(QualType ASTTy,
	llvm::Type *LLVMTy = nullptr);
	llvm::Type ConvertType(const TypeDecl T) {
	return ConvertType(getContext().getTypeDeclType(T));
	}

	/// LoadObjCSelf - Load the value of self. This function is only valid while
	/// generating code for an Objective-C method.
	llvm::Value *LoadObjCSelf();

	/// TypeOfSelfObject - Return type of object that this self represents.
	QualType TypeOfSelfObject();

	/// getEvaluationKind - Return the TypeEvaluationKind of QualType \c T.
	static TypeEvaluationKind getEvaluationKind(QualType T);

	static bool hasScalarEvaluationKind(QualType T) {
	return getEvaluationKind(T) == TEK_Scalar;
	}

	static bool hasAggregateEvaluationKind(QualType T) {
	return getEvaluationKind(T) == TEK_Aggregate;
	}

	/// createBasicBlock - Create an LLVM basic block.
	llvm::BasicBlock *createBasicBlock(const Twine &name = "",
	llvm::Function *parent = nullptr,
	llvm::BasicBlock *before = nullptr) {
	return llvm::BasicBlock::Create(getLLVMContext(), name, parent, before);
	}

	/// getBasicBlockForLabel - Return the LLVM basicblock that the specified
	/// label maps to.
	JumpDest getJumpDestForLabel(const LabelDecl *S);

	/// SimplifyForwardingBlocks - If the given basic block is only a branch to
	/// another basic block, simplify it. This assumes that no other code could
	/// potentially reference the basic block.
	void SimplifyForwardingBlocks(llvm::BasicBlock *BB);

	/// EmitBlock - Emit the given block \arg BB and set it as the insert point,
	/// adding a fall-through branch from the current insert block if
	/// necessary. It is legal to call this function even if there is no current
	/// insertion point.
	///
	/// IsFinished - If true, indicates that the caller has finished emitting
	/// branches to the given block and does not expect to emit code into it. This
	/// means the block can be ignored if it is unreachable.
	void EmitBlock(llvm::BasicBlock *BB, bool IsFinished=false);

	/// EmitBlockAfterUses - Emit the given block somewhere hopefully
	/// near its uses, and leave the insertion point in it.
	void EmitBlockAfterUses(llvm::BasicBlock *BB);

	/// EmitBranch - Emit a branch to the specified basic block from the current
	/// insert block, taking care to avoid creation of branches from dummy
	/// blocks. It is legal to call this function even if there is no current
	/// insertion point.
	///
	/// This function clears the current insertion point. The caller should follow
	/// calls to this function with calls to Emit*Block prior to generation new
	/// code.
	void EmitBranch(llvm::BasicBlock *Block);

	/// HaveInsertPoint - True if an insertion point is defined. If not, this
	/// indicates that the current code being emitted is unreachable.
	bool HaveInsertPoint() const {
	return Builder.GetInsertBlock() != nullptr;
	}

	/// EnsureInsertPoint - Ensure that an insertion point is defined so that
	/// emitted IR has a place to go. Note that by definition, if this function
	/// creates a block then that block is unreachable; callers may do better to
	/// detect when no insertion point is defined and simply skip IR generation.
	void EnsureInsertPoint() {
	if (!HaveInsertPoint())
	EmitBlock(createBasicBlock());
	}

	/// ErrorUnsupported - Print out an error that codegen doesn't support the
	/// specified stmt yet.
	void ErrorUnsupported(const Stmt S, const char Type);

	//===--------------------------------------------------------------------===//
	// Helpers
	//===--------------------------------------------------------------------===//

	Address mergeAddressesInConditionalExpr(Address LHS, Address RHS,
	llvm::BasicBlock *LHSBlock,
	llvm::BasicBlock *RHSBlock,
	llvm::BasicBlock *MergeBlock,
	QualType MergedType) {
	Builder.SetInsertPoint(MergeBlock);
	llvm::PHINode *PtrPhi = Builder.CreatePHI(LHS.getType(), 2, "cond");
	PtrPhi->addIncoming(LHS.getBasePointer(), LHSBlock);
	PtrPhi->addIncoming(RHS.getBasePointer(), RHSBlock);
	LHS.replaceBasePointer(PtrPhi);
	LHS.setAlignment(std::min(LHS.getAlignment(), RHS.getAlignment()));
	return LHS;
	}

	/// Construct an address with the natural alignment of T. If a pointer to T
	/// is expected to be signed, the pointer passed to this function must have
	/// been signed, and the returned Address will have the pointer authentication
	/// information needed to authenticate the signed pointer.
	Address makeNaturalAddressForPointer(
	llvm::Value *Ptr, QualType T, CharUnits Alignment = CharUnits::Zero(),
	bool ForPointeeType = false, LValueBaseInfo *BaseInfo = nullptr,
	TBAAAccessInfo *TBAAInfo = nullptr,
	KnownNonNull_t IsKnownNonNull = NotKnownNonNull) {
	if (Alignment.isZero())
	Alignment =
	CGM.getNaturalTypeAlignment(T, BaseInfo, TBAAInfo, ForPointeeType);
	return Address(Ptr, ConvertTypeForMem(T), Alignment,
	CGM.getPointerAuthInfoForPointeeType(T), /Offset=/nullptr,
	IsKnownNonNull);
	}

	LValue MakeAddrLValue(Address Addr, QualType T,
	AlignmentSource Source = AlignmentSource::Type) {
	return MakeAddrLValue(Addr, T, LValueBaseInfo(Source),
	CGM.getTBAAAccessInfo(T));
	}

	LValue MakeAddrLValue(Address Addr, QualType T, LValueBaseInfo BaseInfo,
	TBAAAccessInfo TBAAInfo) {
	return LValue::MakeAddr(Addr, T, getContext(), BaseInfo, TBAAInfo);
	}

	LValue MakeAddrLValue(llvm::Value *V, QualType T, CharUnits Alignment,
	AlignmentSource Source = AlignmentSource::Type) {
	return MakeAddrLValue(makeNaturalAddressForPointer(V, T, Alignment), T,
	LValueBaseInfo(Source), CGM.getTBAAAccessInfo(T));
	}

	/// Same as MakeAddrLValue above except that the pointer is known to be
	/// unsigned.
	LValue MakeRawAddrLValue(llvm::Value *V, QualType T, CharUnits Alignment,
	AlignmentSource Source = AlignmentSource::Type) {
	Address Addr(V, ConvertTypeForMem(T), Alignment);
	return LValue::MakeAddr(Addr, T, getContext(), LValueBaseInfo(Source),
	CGM.getTBAAAccessInfo(T));
	}

	LValue
	MakeAddrLValueWithoutTBAA(Address Addr, QualType T,
	AlignmentSource Source = AlignmentSource::Type) {
	return LValue::MakeAddr(Addr, T, getContext(), LValueBaseInfo(Source),
	TBAAAccessInfo());
	}

	/// Given a value of type T* that may not be to a complete object, construct
	/// an l-value with the natural pointee alignment of T.
	LValue MakeNaturalAlignPointeeAddrLValue(llvm::Value *V, QualType T);

	LValue
	MakeNaturalAlignAddrLValue(llvm::Value *V, QualType T,
	KnownNonNull_t IsKnownNonNull = NotKnownNonNull);

	/// Same as MakeNaturalAlignPointeeAddrLValue except that the pointer is known
	/// to be unsigned.
	LValue MakeNaturalAlignPointeeRawAddrLValue(llvm::Value *V, QualType T);

	LValue MakeNaturalAlignRawAddrLValue(llvm::Value *V, QualType T);

	Address EmitLoadOfReference(LValue RefLVal,
	LValueBaseInfo *PointeeBaseInfo = nullptr,
	TBAAAccessInfo *PointeeTBAAInfo = nullptr);
	LValue EmitLoadOfReferenceLValue(LValue RefLVal);
	LValue EmitLoadOfReferenceLValue(Address RefAddr, QualType RefTy,
	AlignmentSource Source =
	AlignmentSource::Type) {
	LValue RefLVal = MakeAddrLValue(RefAddr, RefTy, LValueBaseInfo(Source),
	CGM.getTBAAAccessInfo(RefTy));
	return EmitLoadOfReferenceLValue(RefLVal);
	}

	/// Load a pointer with type \p PtrTy stored at address \p Ptr.
	/// Note that \p PtrTy is the type of the loaded pointer, not the addresses
	/// it is loaded from.
	Address EmitLoadOfPointer(Address Ptr, const PointerType *PtrTy,
	LValueBaseInfo *BaseInfo = nullptr,
	TBAAAccessInfo *TBAAInfo = nullptr);
	LValue EmitLoadOfPointerLValue(Address Ptr, const PointerType *PtrTy);

	private:
	struct AllocaTracker {
	void Add(llvm::AllocaInst *I) { Allocas.push_back(I); }
	llvm::SmallVector<llvm::AllocaInst *> Take() { return std::move(Allocas); }

	private:
	llvm::SmallVector<llvm::AllocaInst *> Allocas;
	};
	AllocaTracker *Allocas = nullptr;

	public:
	// Captures all the allocas created during the scope of its RAII object.
	struct AllocaTrackerRAII {
	AllocaTrackerRAII(CodeGenFunction &CGF)
	: CGF(CGF), OldTracker(CGF.Allocas) {
	CGF.Allocas = &Tracker;
	}
	~AllocaTrackerRAII() { CGF.Allocas = OldTracker; }

	llvm::SmallVector<llvm::AllocaInst *> Take() { return Tracker.Take(); }

	private:
	CodeGenFunction &CGF;
	AllocaTracker *OldTracker;
	AllocaTracker Tracker;
	};

	/// CreateTempAlloca - This creates an alloca and inserts it into the entry
	/// block if \p ArraySize is nullptr, otherwise inserts it at the current
	/// insertion point of the builder. The caller is responsible for setting an
	/// appropriate alignment on
	/// the alloca.
	///
	/// \p ArraySize is the number of array elements to be allocated if it
	/// is not nullptr.
	///
	/// LangAS::Default is the address space of pointers to local variables and
	/// temporaries, as exposed in the source language. In certain
	/// configurations, this is not the same as the alloca address space, and a
	/// cast is needed to lift the pointer from the alloca AS into
	/// LangAS::Default. This can happen when the target uses a restricted
	/// address space for the stack but the source language requires
	/// LangAS::Default to be a generic address space. The latter condition is
	/// common for most programming languages; OpenCL is an exception in that
	/// LangAS::Default is the private address space, which naturally maps
	/// to the stack.
	///
	/// Because the address of a temporary is often exposed to the program in
	/// various ways, this function will perform the cast. The original alloca
	/// instruction is returned through \p Alloca if it is not nullptr.
	///
	/// The cast is not performaed in CreateTempAllocaWithoutCast. This is
	/// more efficient if the caller knows that the address will not be exposed.
	llvm::AllocaInst CreateTempAlloca(llvm::Type Ty, const Twine &Name = "tmp",
	llvm::Value *ArraySize = nullptr);
	RawAddress CreateTempAlloca(llvm::Type *Ty, CharUnits align,
	const Twine &Name = "tmp",
	llvm::Value *ArraySize = nullptr,
	RawAddress *Alloca = nullptr);
	RawAddress CreateTempAllocaWithoutCast(llvm::Type *Ty, CharUnits align,
	const Twine &Name = "tmp",
	llvm::Value *ArraySize = nullptr);

	/// CreateDefaultAlignedTempAlloca - This creates an alloca with the
	/// default ABI alignment of the given LLVM type.
	///
	/// IMPORTANT NOTE: This is not generally the right alignment for
	/// any given AST type that happens to have been lowered to the
	/// given IR type. This should only ever be used for function-local,
	/// IR-driven manipulations like saving and restoring a value. Do
	/// not hand this address off to arbitrary IRGen routines, and especially
	/// do not pass it as an argument to a function that might expect a
	/// properly ABI-aligned value.
	RawAddress CreateDefaultAlignTempAlloca(llvm::Type *Ty,
	const Twine &Name = "tmp");

	/// CreateIRTemp - Create a temporary IR object of the given type, with
	/// appropriate alignment. This routine should only be used when an temporary
	/// value needs to be stored into an alloca (for example, to avoid explicit
	/// PHI construction), but the type is the IR type, not the type appropriate
	/// for storing in memory.
	///
	/// That is, this is exactly equivalent to CreateMemTemp, but calling
	/// ConvertType instead of ConvertTypeForMem.
	RawAddress CreateIRTemp(QualType T, const Twine &Name = "tmp");

	/// CreateMemTemp - Create a temporary memory object of the given type, with
	/// appropriate alignmen and cast it to the default address space. Returns
	/// the original alloca instruction by \p Alloca if it is not nullptr.
	RawAddress CreateMemTemp(QualType T, const Twine &Name = "tmp",
	RawAddress *Alloca = nullptr);
	RawAddress CreateMemTemp(QualType T, CharUnits Align,
	const Twine &Name = "tmp",
	RawAddress *Alloca = nullptr);

	/// CreateMemTemp - Create a temporary memory object of the given type, with
	/// appropriate alignmen without casting it to the default address space.
	RawAddress CreateMemTempWithoutCast(QualType T, const Twine &Name = "tmp");
	RawAddress CreateMemTempWithoutCast(QualType T, CharUnits Align,
	const Twine &Name = "tmp");

	/// CreateAggTemp - Create a temporary memory object for the given
	/// aggregate type.
	AggValueSlot CreateAggTemp(QualType T, const Twine &Name = "tmp",
	RawAddress *Alloca = nullptr) {
	return AggValueSlot::forAddr(
	CreateMemTemp(T, Name, Alloca), T.getQualifiers(),
	AggValueSlot::IsNotDestructed, AggValueSlot::DoesNotNeedGCBarriers,
	AggValueSlot::IsNotAliased, AggValueSlot::DoesNotOverlap);
	}

	/// EvaluateExprAsBool - Perform the usual unary conversions on the specified
	/// expression and compare the result against zero, returning an Int1Ty value.
	llvm::Value EvaluateExprAsBool(const Expr E);

	/// Retrieve the implicit cast expression of the rhs in a binary operator
	/// expression by passing pointers to Value and QualType
	/// This is used for implicit bitfield conversion checks, which
	/// must compare with the value before potential truncation.
	llvm::Value EmitWithOriginalRHSBitfieldAssignment(const BinaryOperator E,
	llvm::Value **Previous,
	QualType *SrcType);

	/// Emit a check that an [implicit] conversion of a bitfield. It is not UB,
	/// so we use the value after conversion.
	void EmitBitfieldConversionCheck(llvm::Value *Src, QualType SrcType,
	llvm::Value *Dst, QualType DstType,
	const CGBitFieldInfo &Info,
	SourceLocation Loc);

	/// EmitIgnoredExpr - Emit an expression in a context which ignores the result.
	void EmitIgnoredExpr(const Expr *E);

	/// EmitAnyExpr - Emit code to compute the specified expression which can have
	/// any type. The result is returned as an RValue struct. If this is an
	/// aggregate expression, the aggloc/agglocvolatile arguments indicate where
	/// the result should be returned.
	///
	/// \param ignoreResult True if the resulting value isn't used.
	RValue EmitAnyExpr(const Expr *E,
	AggValueSlot aggSlot = AggValueSlot::ignored(),
	bool ignoreResult = false);

	// EmitVAListRef - Emit a "reference" to a va_list; this is either the address
	// or the value of the expression, depending on how va_list is defined.
	Address EmitVAListRef(const Expr *E);

	/// Emit a "reference" to a __builtin_ms_va_list; this is
	/// always the value of the expression, because a __builtin_ms_va_list is a
	/// pointer to a char.
	Address EmitMSVAListRef(const Expr *E);

	/// EmitAnyExprToTemp - Similarly to EmitAnyExpr(), however, the result will
	/// always be accessible even if no aggregate location is provided.
	RValue EmitAnyExprToTemp(const Expr *E);

	/// EmitAnyExprToMem - Emits the code necessary to evaluate an
	/// arbitrary expression into the given memory location.
	void EmitAnyExprToMem(const Expr *E, Address Location,
	Qualifiers Quals, bool IsInitializer);

	void EmitAnyExprToExn(const Expr *E, Address Addr);

	/// EmitExprAsInit - Emits the code necessary to initialize a
	/// location in memory with the given initializer.
	void EmitExprAsInit(const Expr init, const ValueDecl D, LValue lvalue,
	bool capturedByInit);

	/// hasVolatileMember - returns true if aggregate type has a volatile
	/// member.
	bool hasVolatileMember(QualType T) {
	if (const RecordType *RT = T->getAs<RecordType>()) {
	const RecordDecl *RD = cast<RecordDecl>(RT->getDecl());
	return RD->hasVolatileMember();
	}
	return false;
	}

	/// Determine whether a return value slot may overlap some other object.
	AggValueSlot::Overlap_t getOverlapForReturnValue() {
	// FIXME: Assuming no overlap here breaks guaranteed copy elision for base
	// class subobjects. These cases may need to be revisited depending on the
	// resolution of the relevant core issue.
	return AggValueSlot::DoesNotOverlap;
	}

	/// Determine whether a field initialization may overlap some other object.
	AggValueSlot::Overlap_t getOverlapForFieldInit(const FieldDecl *FD);

	/// Determine whether a base class initialization may overlap some other
	/// object.
	AggValueSlot::Overlap_t getOverlapForBaseInit(const CXXRecordDecl *RD,
	const CXXRecordDecl *BaseRD,
	bool IsVirtual);

	/// Emit an aggregate assignment.
	void EmitAggregateAssign(LValue Dest, LValue Src, QualType EltTy) {
	bool IsVolatile = hasVolatileMember(EltTy);
	EmitAggregateCopy(Dest, Src, EltTy, AggValueSlot::MayOverlap, IsVolatile);
	}

	void EmitAggregateCopyCtor(LValue Dest, LValue Src,
	AggValueSlot::Overlap_t MayOverlap) {
	EmitAggregateCopy(Dest, Src, Src.getType(), MayOverlap);
	}

	/// EmitAggregateCopy - Emit an aggregate copy.
	///
	/// \param isVolatile \c true iff either the source or the destination is
	/// volatile.
	/// \param MayOverlap Whether the tail padding of the destination might be
	/// occupied by some other object. More efficient code can often be
	/// generated if not.
	void EmitAggregateCopy(LValue Dest, LValue Src, QualType EltTy,
	AggValueSlot::Overlap_t MayOverlap,
	bool isVolatile = false);

	/// GetAddrOfLocalVar - Return the address of a local variable.
	Address GetAddrOfLocalVar(const VarDecl *VD) {
	auto it = LocalDeclMap.find(VD);
	assert(it != LocalDeclMap.end() &&
	"Invalid argument to GetAddrOfLocalVar(), no decl!");
	return it->second;
	}

	/// Given an opaque value expression, return its LValue mapping if it exists,
	/// otherwise create one.
	LValue getOrCreateOpaqueLValueMapping(const OpaqueValueExpr *e);

	/// Given an opaque value expression, return its RValue mapping if it exists,
	/// otherwise create one.
	RValue getOrCreateOpaqueRValueMapping(const OpaqueValueExpr *e);

	/// Get the index of the current ArrayInitLoopExpr, if any.
	llvm::Value *getArrayInitIndex() { return ArrayInitIndex; }

	/// getAccessedFieldNo - Given an encoded value and a result number, return
	/// the input field number being accessed.
	static unsigned getAccessedFieldNo(unsigned Idx, const llvm::Constant *Elts);

	llvm::BlockAddress GetAddrOfLabel(const LabelDecl L);
	llvm::BasicBlock *GetIndirectGotoBlock();

	/// Check if \p E is a C++ "this" pointer wrapped in value-preserving casts.
	static bool IsWrappedCXXThis(const Expr *E);

	/// EmitNullInitialization - Generate code to set a value of the given type to
	/// null, If the type contains data member pointers, they will be initialized
	/// to -1 in accordance with the Itanium C++ ABI.
	void EmitNullInitialization(Address DestPtr, QualType Ty);

	/// Emits a call to an LLVM variable-argument intrinsic, either
	/// \c llvm.va_start or \c llvm.va_end.
	/// \param ArgValue A reference to the \c va_list as emitted by either
	/// \c EmitVAListRef or \c EmitMSVAListRef.
	/// \param IsStart If \c true, emits a call to \c llvm.va_start; otherwise,
	/// calls \c llvm.va_end.
	llvm::Value EmitVAStartEnd(llvm::Value ArgValue, bool IsStart);

	/// Generate code to get an argument from the passed in pointer
	/// and update it accordingly.
	/// \param VE The \c VAArgExpr for which to generate code.
	/// \param VAListAddr Receives a reference to the \c va_list as emitted by
	/// either \c EmitVAListRef or \c EmitMSVAListRef.
	/// \returns A pointer to the argument.
	// FIXME: We should be able to get rid of this method and use the va_arg
	// instruction in LLVM instead once it works well enough.
	RValue EmitVAArg(VAArgExpr *VE, Address &VAListAddr,
	AggValueSlot Slot = AggValueSlot::ignored());

	/// emitArrayLength - Compute the length of an array, even if it's a
	/// VLA, and drill down to the base element type.
	llvm::Value emitArrayLength(const ArrayType arrayType,
	QualType &baseType,
	Address &addr);

	/// EmitVLASize - Capture all the sizes for the VLA expressions in
	/// the given variably-modified type and store them in the VLASizeMap.
	///
	/// This function can be called with a null (unreachable) insert point.
	void EmitVariablyModifiedType(QualType Ty);

	struct VlaSizePair {
	llvm::Value *NumElts;
	QualType Type;

	VlaSizePair(llvm::Value *NE, QualType T) : NumElts(NE), Type(T) {}
	};

	/// Return the number of elements for a single dimension
	/// for the given array type.
	VlaSizePair getVLAElements1D(const VariableArrayType *vla);
	VlaSizePair getVLAElements1D(QualType vla);

	/// Returns an LLVM value that corresponds to the size,
	/// in non-variably-sized elements, of a variable length array type,
	/// plus that largest non-variably-sized element type. Assumes that
	/// the type has already been emitted with EmitVariablyModifiedType.
	VlaSizePair getVLASize(const VariableArrayType *vla);
	VlaSizePair getVLASize(QualType vla);

	/// LoadCXXThis - Load the value of 'this'. This function is only valid while
	/// generating code for an C++ member function.
	llvm::Value *LoadCXXThis() {
	assert(CXXThisValue && "no 'this' value for this function");
	return CXXThisValue;
	}
	Address LoadCXXThisAddress();

	/// LoadCXXVTT - Load the VTT parameter to base constructors/destructors have
	/// virtual bases.
	// FIXME: Every place that calls LoadCXXVTT is something
	// that needs to be abstracted properly.
	llvm::Value *LoadCXXVTT() {
	assert(CXXStructorImplicitParamValue && "no VTT value for this function");
	return CXXStructorImplicitParamValue;
	}

	/// GetAddressOfBaseOfCompleteClass - Convert the given pointer to a
	/// complete class to the given direct base.
	Address
	GetAddressOfDirectBaseInCompleteClass(Address Value,
	const CXXRecordDecl *Derived,
	const CXXRecordDecl *Base,
	bool BaseIsVirtual);

	static bool ShouldNullCheckClassCastValue(const CastExpr *Cast);

	/// GetAddressOfBaseClass - This function will add the necessary delta to the
	/// load of 'this' and returns address of the base class.
	Address GetAddressOfBaseClass(Address Value,
	const CXXRecordDecl *Derived,
	CastExpr::path_const_iterator PathBegin,
	CastExpr::path_const_iterator PathEnd,
	bool NullCheckValue, SourceLocation Loc);

	Address GetAddressOfDerivedClass(Address Value,
	const CXXRecordDecl *Derived,
	CastExpr::path_const_iterator PathBegin,
	CastExpr::path_const_iterator PathEnd,
	bool NullCheckValue);

	/// GetVTTParameter - Return the VTT parameter that should be passed to a
	/// base constructor/destructor with virtual bases.
	/// FIXME: VTTs are Itanium ABI-specific, so the definition should move
	/// to ItaniumCXXABI.cpp together with all the references to VTT.
	llvm::Value *GetVTTParameter(GlobalDecl GD, bool ForVirtualBase,
	bool Delegating);

	void EmitDelegateCXXConstructorCall(const CXXConstructorDecl *Ctor,
	CXXCtorType CtorType,
	const FunctionArgList &Args,
	SourceLocation Loc);
	// It's important not to confuse this and the previous function. Delegating
	// constructors are the C++0x feature. The constructor delegate optimization
	// is used to reduce duplication in the base and complete consturctors where
	// they are substantially the same.
	void EmitDelegatingCXXConstructorCall(const CXXConstructorDecl *Ctor,
	const FunctionArgList &Args);

	/// Emit a call to an inheriting constructor (that is, one that invokes a
	/// constructor inherited from a base class) by inlining its definition. This
	/// is necessary if the ABI does not support forwarding the arguments to the
	/// base class constructor (because they're variadic or similar).
	void EmitInlinedInheritingCXXConstructorCall(const CXXConstructorDecl *Ctor,
	CXXCtorType CtorType,
	bool ForVirtualBase,
	bool Delegating,
	CallArgList &Args);

	/// Emit a call to a constructor inherited from a base class, passing the
	/// current constructor's arguments along unmodified (without even making
	/// a copy).
	void EmitInheritedCXXConstructorCall(const CXXConstructorDecl *D,
	bool ForVirtualBase, Address This,
	bool InheritedFromVBase,
	const CXXInheritedCtorInitExpr *E);

	void EmitCXXConstructorCall(const CXXConstructorDecl *D, CXXCtorType Type,
	bool ForVirtualBase, bool Delegating,
	AggValueSlot ThisAVS, const CXXConstructExpr *E);

	void EmitCXXConstructorCall(const CXXConstructorDecl *D, CXXCtorType Type,
	bool ForVirtualBase, bool Delegating,
	Address This, CallArgList &Args,
	AggValueSlot::Overlap_t Overlap,
	SourceLocation Loc, bool NewPointerIsChecked);

	/// Emit assumption load for all bases. Requires to be called only on
	/// most-derived class and not under construction of the object.
	void EmitVTableAssumptionLoads(const CXXRecordDecl *ClassDecl, Address This);

	/// Emit assumption that vptr load == global vtable.
	void EmitVTableAssumptionLoad(const VPtr &vptr, Address This);

	void EmitSynthesizedCXXCopyCtorCall(const CXXConstructorDecl *D,
	Address This, Address Src,
	const CXXConstructExpr *E);

	void EmitCXXAggrConstructorCall(const CXXConstructorDecl *D,
	const ArrayType *ArrayTy,
	Address ArrayPtr,
	const CXXConstructExpr *E,
	bool NewPointerIsChecked,
	bool ZeroInitialization = false);

	void EmitCXXAggrConstructorCall(const CXXConstructorDecl *D,
	llvm::Value *NumElements,
	Address ArrayPtr,
	const CXXConstructExpr *E,
	bool NewPointerIsChecked,
	bool ZeroInitialization = false);

	static Destroyer destroyCXXObject;

	void EmitCXXDestructorCall(const CXXDestructorDecl *D, CXXDtorType Type,
	bool ForVirtualBase, bool Delegating, Address This,
	QualType ThisTy);

	void EmitNewArrayInitializer(const CXXNewExpr *E, QualType elementType,
	llvm::Type *ElementTy, Address NewPtr,
	llvm::Value *NumElements,
	llvm::Value *AllocSizeWithoutCookie);

	void EmitCXXTemporary(const CXXTemporary *Temporary, QualType TempType,
	Address Ptr);

	void EmitSehCppScopeBegin();
	void EmitSehCppScopeEnd();
	void EmitSehTryScopeBegin();
	void EmitSehTryScopeEnd();

	llvm::Value EmitLifetimeStart(llvm::TypeSize Size, llvm::Value Addr);
	void EmitLifetimeEnd(llvm::Value Size, llvm::Value Addr);

	llvm::Value EmitCXXNewExpr(const CXXNewExpr E);
	void EmitCXXDeleteExpr(const CXXDeleteExpr *E);

	void EmitDeleteCall(const FunctionDecl DeleteFD, llvm::Value Ptr,
	QualType DeleteTy, llvm::Value *NumElements = nullptr,
	CharUnits CookieSize = CharUnits());

	RValue EmitBuiltinNewDeleteCall(const FunctionProtoType *Type,
	const CallExpr *TheCallExpr, bool IsDelete);

	llvm::Value EmitCXXTypeidExpr(const CXXTypeidExpr E);
	llvm::Value EmitDynamicCast(Address V, const CXXDynamicCastExpr DCE);
	Address EmitCXXUuidofExpr(const CXXUuidofExpr *E);

	/// Situations in which we might emit a check for the suitability of a
	/// pointer or glvalue. Needs to be kept in sync with ubsan_handlers.cpp in
	/// compiler-rt.
	enum TypeCheckKind {
	/// Checking the operand of a load. Must be suitably sized and aligned.
	TCK_Load,
	/// Checking the destination of a store. Must be suitably sized and aligned.
	TCK_Store,
	/// Checking the bound value in a reference binding. Must be suitably sized
	/// and aligned, but is not required to refer to an object (until the
	/// reference is used), per core issue 453.
	TCK_ReferenceBinding,
	/// Checking the object expression in a non-static data member access. Must
	/// be an object within its lifetime.
	TCK_MemberAccess,
	/// Checking the 'this' pointer for a call to a non-static member function.
	/// Must be an object within its lifetime.
	TCK_MemberCall,
	/// Checking the 'this' pointer for a constructor call.
	TCK_ConstructorCall,
	/// Checking the operand of a static_cast to a derived pointer type. Must be
	/// null or an object within its lifetime.
	TCK_DowncastPointer,
	/// Checking the operand of a static_cast to a derived reference type. Must
	/// be an object within its lifetime.
	TCK_DowncastReference,
	/// Checking the operand of a cast to a base object. Must be suitably sized
	/// and aligned.
	TCK_Upcast,
	/// Checking the operand of a cast to a virtual base object. Must be an
	/// object within its lifetime.
	TCK_UpcastToVirtualBase,
	/// Checking the value assigned to a _Nonnull pointer. Must not be null.
	TCK_NonnullAssign,
	/// Checking the operand of a dynamic_cast or a typeid expression. Must be
	/// null or an object within its lifetime.
	TCK_DynamicOperation
	};

	/// Determine whether the pointer type check \p TCK permits null pointers.
	static bool isNullPointerAllowed(TypeCheckKind TCK);

	/// Determine whether the pointer type check \p TCK requires a vptr check.
	static bool isVptrCheckRequired(TypeCheckKind TCK, QualType Ty);

	/// Whether any type-checking sanitizers are enabled. If \c false,
	/// calls to EmitTypeCheck can be skipped.
	bool sanitizePerformTypeCheck() const;

	void EmitTypeCheck(TypeCheckKind TCK, SourceLocation Loc, LValue LV,
	QualType Type, SanitizerSet SkippedChecks = SanitizerSet(),
	llvm::Value *ArraySize = nullptr) {
	if (!sanitizePerformTypeCheck())
	return;
	EmitTypeCheck(TCK, Loc, LV.emitRawPointer(*this), Type, LV.getAlignment(),
	SkippedChecks, ArraySize);
	}

	void EmitTypeCheck(TypeCheckKind TCK, SourceLocation Loc, Address Addr,
	QualType Type, CharUnits Alignment = CharUnits::Zero(),
	SanitizerSet SkippedChecks = SanitizerSet(),
	llvm::Value *ArraySize = nullptr) {
	if (!sanitizePerformTypeCheck())
	return;
	EmitTypeCheck(TCK, Loc, Addr.emitRawPointer(*this), Type, Alignment,
	SkippedChecks, ArraySize);
	}

	/// Emit a check that \p V is the address of storage of the
	/// appropriate size and alignment for an object of type \p Type
	/// (or if ArraySize is provided, for an array of that bound).
	void EmitTypeCheck(TypeCheckKind TCK, SourceLocation Loc, llvm::Value *V,
	QualType Type, CharUnits Alignment = CharUnits::Zero(),
	SanitizerSet SkippedChecks = SanitizerSet(),
	llvm::Value *ArraySize = nullptr);

	/// Emit a check that \p Base points into an array object, which
	/// we can access at index \p Index. \p Accessed should be \c false if we
	/// this expression is used as an lvalue, for instance in "&Arr[Idx]".
	void EmitBoundsCheck(const Expr E, const Expr Base, llvm::Value *Index,
	QualType IndexType, bool Accessed);
	void EmitBoundsCheckImpl(const Expr E, llvm::Value Bound,
	llvm::Value *Index, QualType IndexType,
	QualType IndexedType, bool Accessed);

	// Find a struct's flexible array member and get its offset. It may be
	// embedded inside multiple sub-structs, but must still be the last field.
	const FieldDecl *
	FindFlexibleArrayMemberFieldAndOffset(ASTContext &Ctx, const RecordDecl *RD,
	const FieldDecl *FAMDecl,
	uint64_t &Offset);

	/// Find the FieldDecl specified in a FAM's "counted_by" attribute. Returns
	/// \p nullptr if either the attribute or the field doesn't exist.
	const FieldDecl FindCountedByField(const FieldDecl FD);

	/// Build an expression accessing the "counted_by" field.
	llvm::Value EmitCountedByFieldExpr(const Expr Base,
	const FieldDecl *FAMDecl,
	const FieldDecl *CountDecl);

	llvm::Value EmitScalarPrePostIncDec(const UnaryOperator E, LValue LV,
	bool isInc, bool isPre);
	ComplexPairTy EmitComplexPrePostIncDec(const UnaryOperator *E, LValue LV,
	bool isInc, bool isPre);

	/// Converts Location to a DebugLoc, if debug information is enabled.
	llvm::DebugLoc SourceLocToDebugLoc(SourceLocation Location);

	/// Get the record field index as represented in debug info.
	unsigned getDebugInfoFIndex(const RecordDecl *Rec, unsigned FieldIndex);


	//===--------------------------------------------------------------------===//
	// Declaration Emission
	//===--------------------------------------------------------------------===//

	/// EmitDecl - Emit a declaration.
	///
	/// This function can be called with a null (unreachable) insert point.
	void EmitDecl(const Decl &D);

	/// EmitVarDecl - Emit a local variable declaration.
	///
	/// This function can be called with a null (unreachable) insert point.
	void EmitVarDecl(const VarDecl &D);

	void EmitScalarInit(const Expr init, const ValueDecl D, LValue lvalue,
	bool capturedByInit);

	typedef void SpecialInitFn(CodeGenFunction &Init, const VarDecl &D,
	llvm::Value *Address);

	/// Determine whether the given initializer is trivial in the sense
	/// that it requires no code to be generated.
	bool isTrivialInitializer(const Expr *Init);

	/// EmitAutoVarDecl - Emit an auto variable declaration.
	///
	/// This function can be called with a null (unreachable) insert point.
	void EmitAutoVarDecl(const VarDecl &D);

	class AutoVarEmission {
	friend class CodeGenFunction;

	const VarDecl *Variable;

	/// The address of the alloca for languages with explicit address space
	/// (e.g. OpenCL) or alloca casted to generic pointer for address space
	/// agnostic languages (e.g. C++). Invalid if the variable was emitted
	/// as a global constant.
	Address Addr;

	llvm::Value *NRVOFlag;

	/// True if the variable is a __block variable that is captured by an
	/// escaping block.
	bool IsEscapingByRef;

	/// True if the variable is of aggregate type and has a constant
	/// initializer.
	bool IsConstantAggregate;

	/// Non-null if we should use lifetime annotations.
	llvm::Value *SizeForLifetimeMarkers;

	/// Address with original alloca instruction. Invalid if the variable was
	/// emitted as a global constant.
	RawAddress AllocaAddr;

	struct Invalid {};
	AutoVarEmission(Invalid)
	: Variable(nullptr), Addr(Address::invalid()),
	AllocaAddr(RawAddress::invalid()) {}

	AutoVarEmission(const VarDecl &variable)
	: Variable(&variable), Addr(Address::invalid()), NRVOFlag(nullptr),
	IsEscapingByRef(false), IsConstantAggregate(false),
	SizeForLifetimeMarkers(nullptr), AllocaAddr(RawAddress::invalid()) {}

	bool wasEmittedAsGlobal() const { return !Addr.isValid(); }

	public:
	static AutoVarEmission invalid() { return AutoVarEmission(Invalid()); }

	bool useLifetimeMarkers() const {
	return SizeForLifetimeMarkers != nullptr;
	}
	llvm::Value *getSizeForLifetimeMarkers() const {
	assert(useLifetimeMarkers());
	return SizeForLifetimeMarkers;
	}

	/// Returns the raw, allocated address, which is not necessarily
	/// the address of the object itself. It is casted to default
	/// address space for address space agnostic languages.
	Address getAllocatedAddress() const {
	return Addr;
	}

	/// Returns the address for the original alloca instruction.
	RawAddress getOriginalAllocatedAddress() const { return AllocaAddr; }

	/// Returns the address of the object within this declaration.
	/// Note that this does not chase the forwarding pointer for
	/// __block decls.
	Address getObjectAddress(CodeGenFunction &CGF) const {
	if (!IsEscapingByRef) return Addr;

	return CGF.emitBlockByrefAddress(Addr, Variable, /forward/ false);
	}
	};
	AutoVarEmission EmitAutoVarAlloca(const VarDecl &var);
	void EmitAutoVarInit(const AutoVarEmission &emission);
	void EmitAutoVarCleanups(const AutoVarEmission &emission);
	void emitAutoVarTypeCleanup(const AutoVarEmission &emission,
	QualType::DestructionKind dtorKind);

	/// Emits the alloca and debug information for the size expressions for each
	/// dimension of an array. It registers the association of its (1-dimensional)
	/// QualTypes and size expression's debug node, so that CGDebugInfo can
	/// reference this node when creating the DISubrange object to describe the
	/// array types.
	void EmitAndRegisterVariableArrayDimensions(CGDebugInfo *DI,
	const VarDecl &D,
	bool EmitDebugInfo);

	void EmitStaticVarDecl(const VarDecl &D,
	llvm::GlobalValue::LinkageTypes Linkage);

	class ParamValue {
	union {
	Address Addr;
	llvm::Value *Value;
	};

	bool IsIndirect;

	ParamValue(llvm::Value *V) : Value(V), IsIndirect(false) {}
	ParamValue(Address A) : Addr(A), IsIndirect(true) {}

	public:
	static ParamValue forDirect(llvm::Value *value) {
	return ParamValue(value);
	}
	static ParamValue forIndirect(Address addr) {
	assert(!addr.getAlignment().isZero());
	return ParamValue(addr);
	}

	bool isIndirect() const { return IsIndirect; }
	llvm::Value *getAnyValue() const {
	if (!isIndirect())
	return Value;
	assert(!Addr.hasOffset() && "unexpected offset");
	return Addr.getBasePointer();
	}

	llvm::Value *getDirectValue() const {
	assert(!isIndirect());
	return Value;
	}

	Address getIndirectAddress() const {
	assert(isIndirect());
	return Addr;
	}
	};

	/// EmitParmDecl - Emit a ParmVarDecl or an ImplicitParamDecl.
	void EmitParmDecl(const VarDecl &D, ParamValue Arg, unsigned ArgNo);

	/// protectFromPeepholes - Protect a value that we're intending to
	/// store to the side, but which will probably be used later, from
	/// aggressive peepholing optimizations that might delete it.
	///
	/// Pass the result to unprotectFromPeepholes to declare that
	/// protection is no longer required.
	///
	/// There's no particular reason why this shouldn't apply to
	/// l-values, it's just that no existing peepholes work on pointers.
	PeepholeProtection protectFromPeepholes(RValue rvalue);
	void unprotectFromPeepholes(PeepholeProtection protection);

	void emitAlignmentAssumptionCheck(llvm::Value *Ptr, QualType Ty,
	SourceLocation Loc,
	SourceLocation AssumptionLoc,
	llvm::Value *Alignment,
	llvm::Value *OffsetValue,
	llvm::Value *TheCheck,
	llvm::Instruction *Assumption);

	void emitAlignmentAssumption(llvm::Value *PtrValue, QualType Ty,
	SourceLocation Loc, SourceLocation AssumptionLoc,
	llvm::Value *Alignment,
	llvm::Value *OffsetValue = nullptr);

	void emitAlignmentAssumption(llvm::Value PtrValue, const Expr E,
	SourceLocation AssumptionLoc,
	llvm::Value *Alignment,
	llvm::Value *OffsetValue = nullptr);

	//===--------------------------------------------------------------------===//
	// Statement Emission
	//===--------------------------------------------------------------------===//

	/// EmitStopPoint - Emit a debug stoppoint if we are emitting debug info.
	void EmitStopPoint(const Stmt *S);

	/// EmitStmt - Emit the code for the statement \arg S. It is legal to call
	/// this function even if there is no current insertion point.
	///
	/// This function may clear the current insertion point; callers should use
	/// EnsureInsertPoint if they wish to subsequently generate code without first
	/// calling EmitBlock, EmitBranch, or EmitStmt.
	void EmitStmt(const Stmt S, ArrayRef<const Attr > Attrs = std::nullopt);

	/// EmitSimpleStmt - Try to emit a "simple" statement which does not
	/// necessarily require an insertion point or debug information; typically
	/// because the statement amounts to a jump or a container of other
	/// statements.
	///
	/// \return True if the statement was handled.
	bool EmitSimpleStmt(const Stmt S, ArrayRef<const Attr > Attrs);

	Address EmitCompoundStmt(const CompoundStmt &S, bool GetLast = false,
	AggValueSlot AVS = AggValueSlot::ignored());
	Address EmitCompoundStmtWithoutScope(const CompoundStmt &S,
	bool GetLast = false,
	AggValueSlot AVS =
	AggValueSlot::ignored());

	/// EmitLabel - Emit the block for the given label. It is legal to call this
	/// function even if there is no current insertion point.
	void EmitLabel(const LabelDecl *D); // helper for EmitLabelStmt.

	void EmitLabelStmt(const LabelStmt &S);
	void EmitAttributedStmt(const AttributedStmt &S);
	void EmitGotoStmt(const GotoStmt &S);
	void EmitIndirectGotoStmt(const IndirectGotoStmt &S);
	void EmitIfStmt(const IfStmt &S);

	void EmitWhileStmt(const WhileStmt &S,
	ArrayRef<const Attr *> Attrs = std::nullopt);
	void EmitDoStmt(const DoStmt &S, ArrayRef<const Attr *> Attrs = std::nullopt);
	void EmitForStmt(const ForStmt &S,
	ArrayRef<const Attr *> Attrs = std::nullopt);
	void EmitReturnStmt(const ReturnStmt &S);
	void EmitDeclStmt(const DeclStmt &S);
	void EmitBreakStmt(const BreakStmt &S);
	void EmitContinueStmt(const ContinueStmt &S);
	void EmitSwitchStmt(const SwitchStmt &S);
	void EmitDefaultStmt(const DefaultStmt &S, ArrayRef<const Attr *> Attrs);
	void EmitCaseStmt(const CaseStmt &S, ArrayRef<const Attr *> Attrs);
	void EmitCaseStmtRange(const CaseStmt &S, ArrayRef<const Attr *> Attrs);
	void EmitAsmStmt(const AsmStmt &S);

	void EmitObjCForCollectionStmt(const ObjCForCollectionStmt &S);
	void EmitObjCAtTryStmt(const ObjCAtTryStmt &S);
	void EmitObjCAtThrowStmt(const ObjCAtThrowStmt &S);
	void EmitObjCAtSynchronizedStmt(const ObjCAtSynchronizedStmt &S);
	void EmitObjCAutoreleasePoolStmt(const ObjCAutoreleasePoolStmt &S);

	void EmitCoroutineBody(const CoroutineBodyStmt &S);
	void EmitCoreturnStmt(const CoreturnStmt &S);
	RValue EmitCoawaitExpr(const CoawaitExpr &E,
	AggValueSlot aggSlot = AggValueSlot::ignored(),
	bool ignoreResult = false);
	LValue EmitCoawaitLValue(const CoawaitExpr *E);
	RValue EmitCoyieldExpr(const CoyieldExpr &E,
	AggValueSlot aggSlot = AggValueSlot::ignored(),
	bool ignoreResult = false);
	LValue EmitCoyieldLValue(const CoyieldExpr *E);
	RValue EmitCoroutineIntrinsic(const CallExpr *E, unsigned int IID);

	void EnterCXXTryStmt(const CXXTryStmt &S, bool IsFnTryBlock = false);
	void ExitCXXTryStmt(const CXXTryStmt &S, bool IsFnTryBlock = false);

	void EmitCXXTryStmt(const CXXTryStmt &S);
	void EmitSEHTryStmt(const SEHTryStmt &S);
	void EmitSEHLeaveStmt(const SEHLeaveStmt &S);
	void EnterSEHTryStmt(const SEHTryStmt &S);
	void ExitSEHTryStmt(const SEHTryStmt &S);
	void VolatilizeTryBlocks(llvm::BasicBlock *BB,
	llvm::SmallPtrSet<llvm::BasicBlock *, 10> &V);

	void pushSEHCleanup(CleanupKind kind,
	llvm::Function *FinallyFunc);
	void startOutlinedSEHHelper(CodeGenFunction &ParentCGF, bool IsFilter,
	const Stmt *OutlinedStmt);

	llvm::Function *GenerateSEHFilterFunction(CodeGenFunction &ParentCGF,
	const SEHExceptStmt &Except);

	llvm::Function *GenerateSEHFinallyFunction(CodeGenFunction &ParentCGF,
	const SEHFinallyStmt &Finally);

	void EmitSEHExceptionCodeSave(CodeGenFunction &ParentCGF,
	llvm::Value *ParentFP,
	llvm::Value *EntryEBP);
	llvm::Value *EmitSEHExceptionCode();
	llvm::Value *EmitSEHExceptionInfo();
	llvm::Value *EmitSEHAbnormalTermination();

	/// Emit simple code for OpenMP directives in Simd-only mode.
	void EmitSimpleOMPExecutableDirective(const OMPExecutableDirective &D);

	/// Scan the outlined statement for captures from the parent function. For
	/// each capture, mark the capture as escaped and emit a call to
	/// llvm.localrecover. Insert the localrecover result into the LocalDeclMap.
	void EmitCapturedLocals(CodeGenFunction &ParentCGF, const Stmt *OutlinedStmt,
	bool IsFilter);

	/// Recovers the address of a local in a parent function. ParentVar is the
	/// address of the variable used in the immediate parent function. It can
	/// either be an alloca or a call to llvm.localrecover if there are nested
	/// outlined functions. ParentFP is the frame pointer of the outermost parent
	/// frame.
	Address recoverAddrOfEscapedLocal(CodeGenFunction &ParentCGF,
	Address ParentVar,
	llvm::Value *ParentFP);

	void EmitCXXForRangeStmt(const CXXForRangeStmt &S,
	ArrayRef<const Attr *> Attrs = std::nullopt);

	/// Controls insertion of cancellation exit blocks in worksharing constructs.
	class OMPCancelStackRAII {
	CodeGenFunction &CGF;

	public:
	OMPCancelStackRAII(CodeGenFunction &CGF, OpenMPDirectiveKind Kind,
	bool HasCancel)
	: CGF(CGF) {
	CGF.OMPCancelStack.enter(CGF, Kind, HasCancel);
	}
	~OMPCancelStackRAII() { CGF.OMPCancelStack.exit(CGF); }
	};

	/// Returns calculated size of the specified type.
	llvm::Value *getTypeSize(QualType Ty);
	LValue InitCapturedStruct(const CapturedStmt &S);
	llvm::Function *EmitCapturedStmt(const CapturedStmt &S, CapturedRegionKind K);
	llvm::Function *GenerateCapturedStmtFunction(const CapturedStmt &S);
	Address GenerateCapturedStmtArgument(const CapturedStmt &S);
	llvm::Function *GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S,
	SourceLocation Loc);
	void GenerateOpenMPCapturedVars(const CapturedStmt &S,
	SmallVectorImpl<llvm::Value *> &CapturedVars);
	void emitOMPSimpleStore(LValue LVal, RValue RVal, QualType RValTy,
	SourceLocation Loc);
	/// Perform element by element copying of arrays with type \a
	/// OriginalType from \a SrcAddr to \a DestAddr using copying procedure
	/// generated by \a CopyGen.
	///
	/// \param DestAddr Address of the destination array.
	/// \param SrcAddr Address of the source array.
	/// \param OriginalType Type of destination and source arrays.
	/// \param CopyGen Copying procedure that copies value of single array element
	/// to another single array element.
	void EmitOMPAggregateAssign(
	Address DestAddr, Address SrcAddr, QualType OriginalType,
	const llvm::function_ref<void(Address, Address)> CopyGen);
	/// Emit proper copying of data from one variable to another.
	///
	/// \param OriginalType Original type of the copied variables.
	/// \param DestAddr Destination address.
	/// \param SrcAddr Source address.
	/// \param DestVD Destination variable used in \a CopyExpr (for arrays, has
	/// type of the base array element).
	/// \param SrcVD Source variable used in \a CopyExpr (for arrays, has type of
	/// the base array element).
	/// \param Copy Actual copygin expression for copying data from \a SrcVD to \a
	/// DestVD.
	void EmitOMPCopy(QualType OriginalType,
	Address DestAddr, Address SrcAddr,
	const VarDecl DestVD, const VarDecl SrcVD,
	const Expr *Copy);
	/// Emit atomic update code for constructs: \a X = \a X \a BO \a E or
	/// \a X = \a E \a BO \a E.
	///
	/// \param X Value to be updated.
	/// \param E Update value.
	/// \param BO Binary operation for update operation.
	/// \param IsXLHSInRHSPart true if \a X is LHS in RHS part of the update
	/// expression, false otherwise.
	/// \param AO Atomic ordering of the generated atomic instructions.
	/// \param CommonGen Code generator for complex expressions that cannot be
	/// expressed through atomicrmw instruction.
	/// \returns <true, OldAtomicValue> if simple 'atomicrmw' instruction was
	/// generated, <false, RValue::get(nullptr)> otherwise.
	std::pair<bool, RValue> EmitOMPAtomicSimpleUpdateExpr(
	LValue X, RValue E, BinaryOperatorKind BO, bool IsXLHSInRHSPart,
	llvm::AtomicOrdering AO, SourceLocation Loc,
	const llvm::function_ref<RValue(RValue)> CommonGen);
	bool EmitOMPFirstprivateClause(const OMPExecutableDirective &D,
	OMPPrivateScope &PrivateScope);
	void EmitOMPPrivateClause(const OMPExecutableDirective &D,
	OMPPrivateScope &PrivateScope);
	void EmitOMPUseDevicePtrClause(
	const OMPUseDevicePtrClause &C, OMPPrivateScope &PrivateScope,
	const llvm::DenseMap<const ValueDecl , llvm::Value >
	CaptureDeviceAddrMap);
	void EmitOMPUseDeviceAddrClause(
	const OMPUseDeviceAddrClause &C, OMPPrivateScope &PrivateScope,
	const llvm::DenseMap<const ValueDecl , llvm::Value >
	CaptureDeviceAddrMap);
	/// Emit code for copyin clause in \a D directive. The next code is
	/// generated at the start of outlined functions for directives:
	/// \code
	/// threadprivate_var1 = master_threadprivate_var1;
	/// operator=(threadprivate_var2, master_threadprivate_var2);
	/// ...
	/// __kmpc_barrier(&loc, global_tid);
	/// \endcode
	///
	/// \param D OpenMP directive possibly with 'copyin' clause(s).
	/// \returns true if at least one copyin variable is found, false otherwise.
	bool EmitOMPCopyinClause(const OMPExecutableDirective &D);
	/// Emit initial code for lastprivate variables. If some variable is
	/// not also firstprivate, then the default initialization is used. Otherwise
	/// initialization of this variable is performed by EmitOMPFirstprivateClause
	/// method.
	///
	/// \param D Directive that may have 'lastprivate' directives.
	/// \param PrivateScope Private scope for capturing lastprivate variables for
	/// proper codegen in internal captured statement.
	///
	/// \returns true if there is at least one lastprivate variable, false
	/// otherwise.
	bool EmitOMPLastprivateClauseInit(const OMPExecutableDirective &D,
	OMPPrivateScope &PrivateScope);
	/// Emit final copying of lastprivate values to original variables at
	/// the end of the worksharing or simd directive.
	///
	/// \param D Directive that has at least one 'lastprivate' directives.
	/// \param IsLastIterCond Boolean condition that must be set to 'i1 true' if
	/// it is the last iteration of the loop code in associated directive, or to
	/// 'i1 false' otherwise. If this item is nullptr, no final check is required.
	void EmitOMPLastprivateClauseFinal(const OMPExecutableDirective &D,
	bool NoFinals,
	llvm::Value *IsLastIterCond = nullptr);
	/// Emit initial code for linear clauses.
	void EmitOMPLinearClause(const OMPLoopDirective &D,
	CodeGenFunction::OMPPrivateScope &PrivateScope);
	/// Emit final code for linear clauses.
	/// \param CondGen Optional conditional code for final part of codegen for
	/// linear clause.
	void EmitOMPLinearClauseFinal(
	const OMPLoopDirective &D,
	const llvm::function_ref<llvm::Value *(CodeGenFunction &)> CondGen);
	/// Emit initial code for reduction variables. Creates reduction copies
	/// and initializes them with the values according to OpenMP standard.
	///
	/// \param D Directive (possibly) with the 'reduction' clause.
	/// \param PrivateScope Private scope for capturing reduction variables for
	/// proper codegen in internal captured statement.
	///
	void EmitOMPReductionClauseInit(const OMPExecutableDirective &D,
	OMPPrivateScope &PrivateScope,
	bool ForInscan = false);
	/// Emit final update of reduction values to original variables at
	/// the end of the directive.
	///
	/// \param D Directive that has at least one 'reduction' directives.
	/// \param ReductionKind The kind of reduction to perform.
	void EmitOMPReductionClauseFinal(const OMPExecutableDirective &D,
	const OpenMPDirectiveKind ReductionKind);
	/// Emit initial code for linear variables. Creates private copies
	/// and initializes them with the values according to OpenMP standard.
	///
	/// \param D Directive (possibly) with the 'linear' clause.
	/// \return true if at least one linear variable is found that should be
	/// initialized with the value of the original variable, false otherwise.
	bool EmitOMPLinearClauseInit(const OMPLoopDirective &D);

	typedef const llvm::function_ref<void(CodeGenFunction & /CGF/,
	llvm::Function * /OutlinedFn/,
	const OMPTaskDataTy & /Data/)>
	TaskGenTy;
	void EmitOMPTaskBasedDirective(const OMPExecutableDirective &S,
	const OpenMPDirectiveKind CapturedRegion,
	const RegionCodeGenTy &BodyGen,
	const TaskGenTy &TaskGen, OMPTaskDataTy &Data);
	struct OMPTargetDataInfo {
	Address BasePointersArray = Address::invalid();
	Address PointersArray = Address::invalid();
	Address SizesArray = Address::invalid();
	Address MappersArray = Address::invalid();
	unsigned NumberOfTargetItems = 0;
	explicit OMPTargetDataInfo() = default;
	OMPTargetDataInfo(Address BasePointersArray, Address PointersArray,
	Address SizesArray, Address MappersArray,
	unsigned NumberOfTargetItems)
	: BasePointersArray(BasePointersArray), PointersArray(PointersArray),
	SizesArray(SizesArray), MappersArray(MappersArray),
	NumberOfTargetItems(NumberOfTargetItems) {}
	};
	void EmitOMPTargetTaskBasedDirective(const OMPExecutableDirective &S,
	const RegionCodeGenTy &BodyGen,
	OMPTargetDataInfo &InputInfo);
	void processInReduction(const OMPExecutableDirective &S,
	OMPTaskDataTy &Data,
	CodeGenFunction &CGF,
	const CapturedStmt *CS,
	OMPPrivateScope &Scope);
	void EmitOMPMetaDirective(const OMPMetaDirective &S);
	void EmitOMPParallelDirective(const OMPParallelDirective &S);
	void EmitOMPSimdDirective(const OMPSimdDirective &S);
	void EmitOMPTileDirective(const OMPTileDirective &S);
	void EmitOMPUnrollDirective(const OMPUnrollDirective &S);
	void EmitOMPReverseDirective(const OMPReverseDirective &S);
	void EmitOMPInterchangeDirective(const OMPInterchangeDirective &S);
	void EmitOMPForDirective(const OMPForDirective &S);
	void EmitOMPForSimdDirective(const OMPForSimdDirective &S);
	void EmitOMPSectionsDirective(const OMPSectionsDirective &S);
	void EmitOMPSectionDirective(const OMPSectionDirective &S);
	void EmitOMPSingleDirective(const OMPSingleDirective &S);
	void EmitOMPMasterDirective(const OMPMasterDirective &S);
	void EmitOMPMaskedDirective(const OMPMaskedDirective &S);
	void EmitOMPCriticalDirective(const OMPCriticalDirective &S);
	void EmitOMPParallelForDirective(const OMPParallelForDirective &S);
	void EmitOMPParallelForSimdDirective(const OMPParallelForSimdDirective &S);
	void EmitOMPParallelSectionsDirective(const OMPParallelSectionsDirective &S);
	void EmitOMPParallelMasterDirective(const OMPParallelMasterDirective &S);
	void EmitOMPTaskDirective(const OMPTaskDirective &S);
	void EmitOMPTaskyieldDirective(const OMPTaskyieldDirective &S);
	void EmitOMPErrorDirective(const OMPErrorDirective &S);
	void EmitOMPBarrierDirective(const OMPBarrierDirective &S);
	void EmitOMPTaskwaitDirective(const OMPTaskwaitDirective &S);
	void EmitOMPTaskgroupDirective(const OMPTaskgroupDirective &S);
	void EmitOMPFlushDirective(const OMPFlushDirective &S);
	void EmitOMPDepobjDirective(const OMPDepobjDirective &S);
	void EmitOMPScanDirective(const OMPScanDirective &S);
	void EmitOMPOrderedDirective(const OMPOrderedDirective &S);
	void EmitOMPAtomicDirective(const OMPAtomicDirective &S);
	void EmitOMPTargetDirective(const OMPTargetDirective &S);
	void EmitOMPTargetDataDirective(const OMPTargetDataDirective &S);
	void EmitOMPTargetEnterDataDirective(const OMPTargetEnterDataDirective &S);
	void EmitOMPTargetExitDataDirective(const OMPTargetExitDataDirective &S);
	void EmitOMPTargetUpdateDirective(const OMPTargetUpdateDirective &S);
	void EmitOMPTargetParallelDirective(const OMPTargetParallelDirective &S);
	void
	EmitOMPTargetParallelForDirective(const OMPTargetParallelForDirective &S);
	void EmitOMPTeamsDirective(const OMPTeamsDirective &S);
	void
	EmitOMPCancellationPointDirective(const OMPCancellationPointDirective &S);
	void EmitOMPCancelDirective(const OMPCancelDirective &S);
	void EmitOMPTaskLoopBasedDirective(const OMPLoopDirective &S);
	void EmitOMPTaskLoopDirective(const OMPTaskLoopDirective &S);
	void EmitOMPTaskLoopSimdDirective(const OMPTaskLoopSimdDirective &S);
	void EmitOMPMasterTaskLoopDirective(const OMPMasterTaskLoopDirective &S);
	void
	EmitOMPMasterTaskLoopSimdDirective(const OMPMasterTaskLoopSimdDirective &S);
	void EmitOMPParallelMasterTaskLoopDirective(
	const OMPParallelMasterTaskLoopDirective &S);
	void EmitOMPParallelMasterTaskLoopSimdDirective(
	const OMPParallelMasterTaskLoopSimdDirective &S);
	void EmitOMPDistributeDirective(const OMPDistributeDirective &S);
	void EmitOMPDistributeParallelForDirective(
	const OMPDistributeParallelForDirective &S);
	void EmitOMPDistributeParallelForSimdDirective(
	const OMPDistributeParallelForSimdDirective &S);
	void EmitOMPDistributeSimdDirective(const OMPDistributeSimdDirective &S);
	void EmitOMPTargetParallelForSimdDirective(
	const OMPTargetParallelForSimdDirective &S);
	void EmitOMPTargetSimdDirective(const OMPTargetSimdDirective &S);
	void EmitOMPTeamsDistributeDirective(const OMPTeamsDistributeDirective &S);
	void
	EmitOMPTeamsDistributeSimdDirective(const OMPTeamsDistributeSimdDirective &S);
	void EmitOMPTeamsDistributeParallelForSimdDirective(
	const OMPTeamsDistributeParallelForSimdDirective &S);
	void EmitOMPTeamsDistributeParallelForDirective(
	const OMPTeamsDistributeParallelForDirective &S);
	void EmitOMPTargetTeamsDirective(const OMPTargetTeamsDirective &S);
	void EmitOMPTargetTeamsDistributeDirective(
	const OMPTargetTeamsDistributeDirective &S);
	void EmitOMPTargetTeamsDistributeParallelForDirective(
	const OMPTargetTeamsDistributeParallelForDirective &S);
	void EmitOMPTargetTeamsDistributeParallelForSimdDirective(
	const OMPTargetTeamsDistributeParallelForSimdDirective &S);
	void EmitOMPTargetTeamsDistributeSimdDirective(
	const OMPTargetTeamsDistributeSimdDirective &S);
	void EmitOMPGenericLoopDirective(const OMPGenericLoopDirective &S);
	void EmitOMPParallelGenericLoopDirective(const OMPLoopDirective &S);
	void EmitOMPTargetParallelGenericLoopDirective(
	const OMPTargetParallelGenericLoopDirective &S);
	void EmitOMPTargetTeamsGenericLoopDirective(
	const OMPTargetTeamsGenericLoopDirective &S);
	void EmitOMPTeamsGenericLoopDirective(const OMPTeamsGenericLoopDirective &S);
	void EmitOMPInteropDirective(const OMPInteropDirective &S);
	void EmitOMPParallelMaskedDirective(const OMPParallelMaskedDirective &S);

	/// Emit device code for the target directive.
	static void EmitOMPTargetDeviceFunction(CodeGenModule &CGM,
	StringRef ParentName,
	const OMPTargetDirective &S);
	static void
	EmitOMPTargetParallelDeviceFunction(CodeGenModule &CGM, StringRef ParentName,
	const OMPTargetParallelDirective &S);
	/// Emit device code for the target parallel for directive.
	static void EmitOMPTargetParallelForDeviceFunction(
	CodeGenModule &CGM, StringRef ParentName,
	const OMPTargetParallelForDirective &S);
	/// Emit device code for the target parallel for simd directive.
	static void EmitOMPTargetParallelForSimdDeviceFunction(
	CodeGenModule &CGM, StringRef ParentName,
	const OMPTargetParallelForSimdDirective &S);
	/// Emit device code for the target teams directive.
	static void
	EmitOMPTargetTeamsDeviceFunction(CodeGenModule &CGM, StringRef ParentName,
	const OMPTargetTeamsDirective &S);
	/// Emit device code for the target teams distribute directive.
	static void EmitOMPTargetTeamsDistributeDeviceFunction(
	CodeGenModule &CGM, StringRef ParentName,
	const OMPTargetTeamsDistributeDirective &S);
	/// Emit device code for the target teams distribute simd directive.
	static void EmitOMPTargetTeamsDistributeSimdDeviceFunction(
	CodeGenModule &CGM, StringRef ParentName,
	const OMPTargetTeamsDistributeSimdDirective &S);
	/// Emit device code for the target simd directive.
	static void EmitOMPTargetSimdDeviceFunction(CodeGenModule &CGM,
	StringRef ParentName,
	const OMPTargetSimdDirective &S);
	/// Emit device code for the target teams distribute parallel for simd
	/// directive.
	static void EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction(
	CodeGenModule &CGM, StringRef ParentName,
	const OMPTargetTeamsDistributeParallelForSimdDirective &S);

	/// Emit device code for the target teams loop directive.
	static void EmitOMPTargetTeamsGenericLoopDeviceFunction(
	CodeGenModule &CGM, StringRef ParentName,
	const OMPTargetTeamsGenericLoopDirective &S);

	/// Emit device code for the target parallel loop directive.
	static void EmitOMPTargetParallelGenericLoopDeviceFunction(
	CodeGenModule &CGM, StringRef ParentName,
	const OMPTargetParallelGenericLoopDirective &S);

	static void EmitOMPTargetTeamsDistributeParallelForDeviceFunction(
	CodeGenModule &CGM, StringRef ParentName,
	const OMPTargetTeamsDistributeParallelForDirective &S);

	/// Emit the Stmt \p S and return its topmost canonical loop, if any.
	/// TODO: The \p Depth paramter is not yet implemented and must be 1. In the
	/// future it is meant to be the number of loops expected in the loop nests
	/// (usually specified by the "collapse" clause) that are collapsed to a
	/// single loop by this function.
	llvm::CanonicalLoopInfo EmitOMPCollapsedCanonicalLoopNest(const Stmt S,
	int Depth);

	/// Emit an OMPCanonicalLoop using the OpenMPIRBuilder.
	void EmitOMPCanonicalLoop(const OMPCanonicalLoop *S);

	/// Emit inner loop of the worksharing/simd construct.
	///
	/// \param S Directive, for which the inner loop must be emitted.
	/// \param RequiresCleanup true, if directive has some associated private
	/// variables.
	/// \param LoopCond Bollean condition for loop continuation.
	/// \param IncExpr Increment expression for loop control variable.
	/// \param BodyGen Generator for the inner body of the inner loop.
	/// \param PostIncGen Genrator for post-increment code (required for ordered
	/// loop directvies).
	void EmitOMPInnerLoop(
	const OMPExecutableDirective &S, bool RequiresCleanup,
	const Expr LoopCond, const Expr IncExpr,
	const llvm::function_ref<void(CodeGenFunction &)> BodyGen,
	const llvm::function_ref<void(CodeGenFunction &)> PostIncGen);

	JumpDest getOMPCancelDestination(OpenMPDirectiveKind Kind);
	/// Emit initial code for loop counters of loop-based directives.
	void EmitOMPPrivateLoopCounters(const OMPLoopDirective &S,
	OMPPrivateScope &LoopScope);

	/// Helper for the OpenMP loop directives.
	void EmitOMPLoopBody(const OMPLoopDirective &D, JumpDest LoopExit);

	/// Emit code for the worksharing loop-based directive.
	/// \return true, if this construct has any lastprivate clause, false -
	/// otherwise.
	bool EmitOMPWorksharingLoop(const OMPLoopDirective &S, Expr *EUB,
	const CodeGenLoopBoundsTy &CodeGenLoopBounds,
	const CodeGenDispatchBoundsTy &CGDispatchBounds);

	/// Emit code for the distribute loop-based directive.
	void EmitOMPDistributeLoop(const OMPLoopDirective &S,
	const CodeGenLoopTy &CodeGenLoop, Expr *IncExpr);

	/// Helpers for the OpenMP loop directives.
	void EmitOMPSimdInit(const OMPLoopDirective &D);
	void EmitOMPSimdFinal(
	const OMPLoopDirective &D,
	const llvm::function_ref<llvm::Value *(CodeGenFunction &)> CondGen);

	/// Emits the lvalue for the expression with possibly captured variable.
	LValue EmitOMPSharedLValue(const Expr *E);

	private:
	/// Helpers for blocks.
	llvm::Value *EmitBlockLiteral(const CGBlockInfo &Info);

	/// struct with the values to be passed to the OpenMP loop-related functions
	struct OMPLoopArguments {
	/// loop lower bound
	Address LB = Address::invalid();
	/// loop upper bound
	Address UB = Address::invalid();
	/// loop stride
	Address ST = Address::invalid();
	/// isLastIteration argument for runtime functions
	Address IL = Address::invalid();
	/// Chunk value generated by sema
	llvm::Value *Chunk = nullptr;
	/// EnsureUpperBound
	Expr *EUB = nullptr;
	/// IncrementExpression
	Expr *IncExpr = nullptr;
	/// Loop initialization
	Expr *Init = nullptr;
	/// Loop exit condition
	Expr *Cond = nullptr;
	/// Update of LB after a whole chunk has been executed
	Expr *NextLB = nullptr;
	/// Update of UB after a whole chunk has been executed
	Expr *NextUB = nullptr;
	/// Distinguish between the for distribute and sections
	OpenMPDirectiveKind DKind = llvm::omp::OMPD_unknown;
	OMPLoopArguments() = default;
	OMPLoopArguments(Address LB, Address UB, Address ST, Address IL,
	llvm::Value Chunk = nullptr, Expr EUB = nullptr,
	Expr IncExpr = nullptr, Expr Init = nullptr,
	Expr Cond = nullptr, Expr NextLB = nullptr,
	Expr *NextUB = nullptr)
	: LB(LB), UB(UB), ST(ST), IL(IL), Chunk(Chunk), EUB(EUB),
	IncExpr(IncExpr), Init(Init), Cond(Cond), NextLB(NextLB),
	NextUB(NextUB) {}
	};
	void EmitOMPOuterLoop(bool DynamicOrOrdered, bool IsMonotonic,
	const OMPLoopDirective &S, OMPPrivateScope &LoopScope,
	const OMPLoopArguments &LoopArgs,
	const CodeGenLoopTy &CodeGenLoop,
	const CodeGenOrderedTy &CodeGenOrdered);
	void EmitOMPForOuterLoop(const OpenMPScheduleTy &ScheduleKind,
	bool IsMonotonic, const OMPLoopDirective &S,
	OMPPrivateScope &LoopScope, bool Ordered,
	const OMPLoopArguments &LoopArgs,
	const CodeGenDispatchBoundsTy &CGDispatchBounds);
	void EmitOMPDistributeOuterLoop(OpenMPDistScheduleClauseKind ScheduleKind,
	const OMPLoopDirective &S,
	OMPPrivateScope &LoopScope,
	const OMPLoopArguments &LoopArgs,
	const CodeGenLoopTy &CodeGenLoopContent);
	/// Emit code for sections directive.
	void EmitSections(const OMPExecutableDirective &S);

	public:
	//===--------------------------------------------------------------------===//
	// OpenACC Emission
	//===--------------------------------------------------------------------===//
	void EmitOpenACCComputeConstruct(const OpenACCComputeConstruct &S) {
	// TODO OpenACC: Implement this. It is currently implemented as a 'no-op',
	// simply emitting its structured block, but in the future we will implement
	// some sort of IR.
	EmitStmt(S.getStructuredBlock());
	}

	void EmitOpenACCLoopConstruct(const OpenACCLoopConstruct &S) {
	// TODO OpenACC: Implement this. It is currently implemented as a 'no-op',
	// simply emitting its loop, but in the future we will implement
	// some sort of IR.
	EmitStmt(S.getLoop());
	}

	//===--------------------------------------------------------------------===//
	// LValue Expression Emission
	//===--------------------------------------------------------------------===//

	/// Create a check that a scalar RValue is non-null.
	llvm::Value *EmitNonNullRValueCheck(RValue RV, QualType T);

	/// GetUndefRValue - Get an appropriate 'undef' rvalue for the given type.
	RValue GetUndefRValue(QualType Ty);

	/// EmitUnsupportedRValue - Emit a dummy r-value using the type of E
	/// and issue an ErrorUnsupported style diagnostic (using the
	/// provided Name).
	RValue EmitUnsupportedRValue(const Expr *E,
	const char *Name);

	/// EmitUnsupportedLValue - Emit a dummy l-value using the type of E and issue
	/// an ErrorUnsupported style diagnostic (using the provided Name).
	LValue EmitUnsupportedLValue(const Expr *E,
	const char *Name);

	/// EmitLValue - Emit code to compute a designator that specifies the location
	/// of the expression.
	///
	/// This can return one of two things: a simple address or a bitfield
	/// reference. In either case, the LLVM Value* in the LValue structure is
	/// guaranteed to be an LLVM pointer type.
	///
	/// If this returns a bitfield reference, nothing about the pointee type of
	/// the LLVM value is known: For example, it may not be a pointer to an
	/// integer.
	///
	/// If this returns a normal address, and if the lvalue's C type is fixed
	/// size, this method guarantees that the returned pointer type will point to
	/// an LLVM type of the same size of the lvalue's type. If the lvalue has a
	/// variable length type, this is not possible.
	///
	LValue EmitLValue(const Expr *E,
	KnownNonNull_t IsKnownNonNull = NotKnownNonNull);

	private:
	LValue EmitLValueHelper(const Expr *E, KnownNonNull_t IsKnownNonNull);

	public:
	/// Same as EmitLValue but additionally we generate checking code to
	/// guard against undefined behavior. This is only suitable when we know
	/// that the address will be used to access the object.
	LValue EmitCheckedLValue(const Expr *E, TypeCheckKind TCK);

	RValue convertTempToRValue(Address addr, QualType type,
	SourceLocation Loc);

	void EmitAtomicInit(Expr *E, LValue lvalue);

	bool LValueIsSuitableForInlineAtomic(LValue Src);

	RValue EmitAtomicLoad(LValue LV, SourceLocation SL,
	AggValueSlot Slot = AggValueSlot::ignored());

	RValue EmitAtomicLoad(LValue lvalue, SourceLocation loc,
	llvm::AtomicOrdering AO, bool IsVolatile = false,
	AggValueSlot slot = AggValueSlot::ignored());

	void EmitAtomicStore(RValue rvalue, LValue lvalue, bool isInit);

	void EmitAtomicStore(RValue rvalue, LValue lvalue, llvm::AtomicOrdering AO,
	bool IsVolatile, bool isInit);

	std::pair<RValue, llvm::Value *> EmitAtomicCompareExchange(
	LValue Obj, RValue Expected, RValue Desired, SourceLocation Loc,
	llvm::AtomicOrdering Success =
	llvm::AtomicOrdering::SequentiallyConsistent,
	llvm::AtomicOrdering Failure =
	llvm::AtomicOrdering::SequentiallyConsistent,
	bool IsWeak = false, AggValueSlot Slot = AggValueSlot::ignored());

	void EmitAtomicUpdate(LValue LVal, llvm::AtomicOrdering AO,
	const llvm::function_ref<RValue(RValue)> &UpdateOp,
	bool IsVolatile);

	/// EmitToMemory - Change a scalar value from its value
	/// representation to its in-memory representation.
	llvm::Value EmitToMemory(llvm::Value Value, QualType Ty);

	/// EmitFromMemory - Change a scalar value from its memory
	/// representation to its value representation.
	llvm::Value EmitFromMemory(llvm::Value Value, QualType Ty);

	/// Check if the scalar \p Value is within the valid range for the given
	/// type \p Ty.
	///
	/// Returns true if a check is needed (even if the range is unknown).
	bool EmitScalarRangeCheck(llvm::Value *Value, QualType Ty,
	SourceLocation Loc);

	/// EmitLoadOfScalar - Load a scalar value from an address, taking
	/// care to appropriately convert from the memory representation to
	/// the LLVM value representation.
	llvm::Value *EmitLoadOfScalar(Address Addr, bool Volatile, QualType Ty,
	SourceLocation Loc,
	AlignmentSource Source = AlignmentSource::Type,
	bool isNontemporal = false) {
	return EmitLoadOfScalar(Addr, Volatile, Ty, Loc, LValueBaseInfo(Source),
	CGM.getTBAAAccessInfo(Ty), isNontemporal);
	}

	llvm::Value *EmitLoadOfScalar(Address Addr, bool Volatile, QualType Ty,
	SourceLocation Loc, LValueBaseInfo BaseInfo,
	TBAAAccessInfo TBAAInfo,
	bool isNontemporal = false);

	/// EmitLoadOfScalar - Load a scalar value from an address, taking
	/// care to appropriately convert from the memory representation to
	/// the LLVM value representation. The l-value must be a simple
	/// l-value.
	llvm::Value *EmitLoadOfScalar(LValue lvalue, SourceLocation Loc);

	/// EmitStoreOfScalar - Store a scalar value to an address, taking
	/// care to appropriately convert from the memory representation to
	/// the LLVM value representation.
	void EmitStoreOfScalar(llvm::Value *Value, Address Addr,
	bool Volatile, QualType Ty,
	AlignmentSource Source = AlignmentSource::Type,
	bool isInit = false, bool isNontemporal = false) {
	EmitStoreOfScalar(Value, Addr, Volatile, Ty, LValueBaseInfo(Source),
	CGM.getTBAAAccessInfo(Ty), isInit, isNontemporal);
	}

	void EmitStoreOfScalar(llvm::Value *Value, Address Addr,
	bool Volatile, QualType Ty,
	LValueBaseInfo BaseInfo, TBAAAccessInfo TBAAInfo,
	bool isInit = false, bool isNontemporal = false);

	/// EmitStoreOfScalar - Store a scalar value to an address, taking
	/// care to appropriately convert from the memory representation to
	/// the LLVM value representation. The l-value must be a simple
	/// l-value. The isInit flag indicates whether this is an initialization.
	/// If so, atomic qualifiers are ignored and the store is always non-atomic.
	void EmitStoreOfScalar(llvm::Value *value, LValue lvalue, bool isInit=false);

	/// EmitLoadOfLValue - Given an expression that represents a value lvalue,
	/// this method emits the address of the lvalue, then loads the result as an
	/// rvalue, returning the rvalue.
	RValue EmitLoadOfLValue(LValue V, SourceLocation Loc);
	RValue EmitLoadOfExtVectorElementLValue(LValue V);
	RValue EmitLoadOfBitfieldLValue(LValue LV, SourceLocation Loc);
	RValue EmitLoadOfGlobalRegLValue(LValue LV);

	/// Like EmitLoadOfLValue but also handles complex and aggregate types.
	RValue EmitLoadOfAnyValue(LValue V,
	AggValueSlot Slot = AggValueSlot::ignored(),
	SourceLocation Loc = {});

	/// EmitStoreThroughLValue - Store the specified rvalue into the specified
	/// lvalue, where both are guaranteed to the have the same type, and that type
	/// is 'Ty'.
	void EmitStoreThroughLValue(RValue Src, LValue Dst, bool isInit = false);
	void EmitStoreThroughExtVectorComponentLValue(RValue Src, LValue Dst);
	void EmitStoreThroughGlobalRegLValue(RValue Src, LValue Dst);

	/// EmitStoreThroughBitfieldLValue - Store Src into Dst with same constraints
	/// as EmitStoreThroughLValue.
	///
	/// \param Result [out] - If non-null, this will be set to a Value* for the
	/// bit-field contents after the store, appropriate for use as the result of
	/// an assignment to the bit-field.
	void EmitStoreThroughBitfieldLValue(RValue Src, LValue Dst,
	llvm::Value **Result=nullptr);

	/// Emit an l-value for an assignment (simple or compound) of complex type.
	LValue EmitComplexAssignmentLValue(const BinaryOperator *E);
	LValue EmitComplexCompoundAssignmentLValue(const CompoundAssignOperator *E);
	LValue EmitScalarCompoundAssignWithComplex(const CompoundAssignOperator *E,
	llvm::Value *&Result);

	// Note: only available for agg return types
	LValue EmitBinaryOperatorLValue(const BinaryOperator *E);
	LValue EmitCompoundAssignmentLValue(const CompoundAssignOperator *E);
	// Note: only available for agg return types
	LValue EmitCallExprLValue(const CallExpr *E);
	// Note: only available for agg return types
	LValue EmitVAArgExprLValue(const VAArgExpr *E);
	LValue EmitDeclRefLValue(const DeclRefExpr *E);
	LValue EmitStringLiteralLValue(const StringLiteral *E);
	LValue EmitObjCEncodeExprLValue(const ObjCEncodeExpr *E);
	LValue EmitPredefinedLValue(const PredefinedExpr *E);
	LValue EmitUnaryOpLValue(const UnaryOperator *E);
	LValue EmitArraySubscriptExpr(const ArraySubscriptExpr *E,
	bool Accessed = false);
	LValue EmitMatrixSubscriptExpr(const MatrixSubscriptExpr *E);
	LValue EmitArraySectionExpr(const ArraySectionExpr *E,
	bool IsLowerBound = true);
	LValue EmitExtVectorElementExpr(const ExtVectorElementExpr *E);
	LValue EmitMemberExpr(const MemberExpr *E);
	LValue EmitObjCIsaExpr(const ObjCIsaExpr *E);
	LValue EmitCompoundLiteralLValue(const CompoundLiteralExpr *E);
	LValue EmitInitListLValue(const InitListExpr *E);
	void EmitIgnoredConditionalOperator(const AbstractConditionalOperator *E);
	LValue EmitConditionalOperatorLValue(const AbstractConditionalOperator *E);
	LValue EmitCastLValue(const CastExpr *E);
	LValue EmitMaterializeTemporaryExpr(const MaterializeTemporaryExpr *E);
	LValue EmitOpaqueValueLValue(const OpaqueValueExpr *e);

	Address EmitExtVectorElementLValue(LValue V);

	RValue EmitRValueForField(LValue LV, const FieldDecl *FD, SourceLocation Loc);

	Address EmitArrayToPointerDecay(const Expr *Array,
	LValueBaseInfo *BaseInfo = nullptr,
	TBAAAccessInfo *TBAAInfo = nullptr);

	class ConstantEmission {
	llvm::PointerIntPair<llvm::Constant*, 1, bool> ValueAndIsReference;
	ConstantEmission(llvm::Constant *C, bool isReference)
	: ValueAndIsReference(C, isReference) {}
	public:
	ConstantEmission() {}
	static ConstantEmission forReference(llvm::Constant *C) {
	return ConstantEmission(C, true);
	}
	static ConstantEmission forValue(llvm::Constant *C) {
	return ConstantEmission(C, false);
	}

	explicit operator bool() const {
	return ValueAndIsReference.getOpaqueValue() != nullptr;
	}

	bool isReference() const { return ValueAndIsReference.getInt(); }
	LValue getReferenceLValue(CodeGenFunction &CGF, Expr *refExpr) const {
	assert(isReference());
	return CGF.MakeNaturalAlignAddrLValue(ValueAndIsReference.getPointer(),
	refExpr->getType());
	}

	llvm::Constant *getValue() const {
	assert(!isReference());
	return ValueAndIsReference.getPointer();
	}
	};

	ConstantEmission tryEmitAsConstant(DeclRefExpr *refExpr);
	ConstantEmission tryEmitAsConstant(const MemberExpr *ME);
	llvm::Value emitScalarConstant(const ConstantEmission &Constant, Expr E);

	RValue EmitPseudoObjectRValue(const PseudoObjectExpr *e,
	AggValueSlot slot = AggValueSlot::ignored());
	LValue EmitPseudoObjectLValue(const PseudoObjectExpr *e);

	llvm::Value EmitIvarOffset(const ObjCInterfaceDecl Interface,
	const ObjCIvarDecl *Ivar);
	llvm::Value EmitIvarOffsetAsPointerDiff(const ObjCInterfaceDecl Interface,
	const ObjCIvarDecl *Ivar);
	LValue EmitLValueForField(LValue Base, const FieldDecl* Field);
	LValue EmitLValueForLambdaField(const FieldDecl *Field);
	LValue EmitLValueForLambdaField(const FieldDecl *Field,
	llvm::Value *ThisValue);

	/// EmitLValueForFieldInitialization - Like EmitLValueForField, except that
	/// if the Field is a reference, this will return the address of the reference
	/// and not the address of the value stored in the reference.
	LValue EmitLValueForFieldInitialization(LValue Base,
	const FieldDecl* Field);

	LValue EmitLValueForIvar(QualType ObjectTy,
	llvm::Value* Base, const ObjCIvarDecl *Ivar,
	unsigned CVRQualifiers);

	LValue EmitCXXConstructLValue(const CXXConstructExpr *E);
	LValue EmitCXXBindTemporaryLValue(const CXXBindTemporaryExpr *E);
	LValue EmitCXXTypeidLValue(const CXXTypeidExpr *E);
	LValue EmitCXXUuidofLValue(const CXXUuidofExpr *E);

	LValue EmitObjCMessageExprLValue(const ObjCMessageExpr *E);
	LValue EmitObjCIvarRefLValue(const ObjCIvarRefExpr *E);
	LValue EmitStmtExprLValue(const StmtExpr *E);
	LValue EmitPointerToDataMemberBinaryExpr(const BinaryOperator *E);
	LValue EmitObjCSelectorLValue(const ObjCSelectorExpr *E);
	void EmitDeclRefExprDbgValue(const DeclRefExpr *E, const APValue &Init);

	//===--------------------------------------------------------------------===//
	// Scalar Expression Emission
	//===--------------------------------------------------------------------===//

	/// EmitCall - Generate a call of the given function, expecting the given
	/// result type, and using the given argument list which specifies both the
	/// LLVM arguments and the types they were derived from.
	RValue EmitCall(const CGFunctionInfo &CallInfo, const CGCallee &Callee,
	ReturnValueSlot ReturnValue, const CallArgList &Args,
	llvm::CallBase **callOrInvoke, bool IsMustTail,
	SourceLocation Loc,
	bool IsVirtualFunctionPointerThunk = false);
	RValue EmitCall(const CGFunctionInfo &CallInfo, const CGCallee &Callee,
	ReturnValueSlot ReturnValue, const CallArgList &Args,
	llvm::CallBase **callOrInvoke = nullptr,
	bool IsMustTail = false) {
	return EmitCall(CallInfo, Callee, ReturnValue, Args, callOrInvoke,
	IsMustTail, SourceLocation());
	}
	RValue EmitCall(QualType FnType, const CGCallee &Callee, const CallExpr *E,
	ReturnValueSlot ReturnValue, llvm::Value *Chain = nullptr);
	RValue EmitCallExpr(const CallExpr *E,
	ReturnValueSlot ReturnValue = ReturnValueSlot());
	RValue EmitSimpleCallExpr(const CallExpr *E, ReturnValueSlot ReturnValue);
	CGCallee EmitCallee(const Expr *E);

	void checkTargetFeatures(const CallExpr E, const FunctionDecl TargetDecl);
	void checkTargetFeatures(SourceLocation Loc, const FunctionDecl *TargetDecl);

	llvm::CallInst *EmitRuntimeCall(llvm::FunctionCallee callee,
	const Twine &name = "");
	llvm::CallInst *EmitRuntimeCall(llvm::FunctionCallee callee,
	ArrayRef<llvm::Value *> args,
	const Twine &name = "");
	llvm::CallInst *EmitNounwindRuntimeCall(llvm::FunctionCallee callee,
	const Twine &name = "");
	llvm::CallInst *EmitNounwindRuntimeCall(llvm::FunctionCallee callee,
	ArrayRef<Address> args,
	const Twine &name = "");
	llvm::CallInst *EmitNounwindRuntimeCall(llvm::FunctionCallee callee,
	ArrayRef<llvm::Value *> args,
	const Twine &name = "");

	SmallVector<llvm::OperandBundleDef, 1>
	getBundlesForFunclet(llvm::Value *Callee);

	llvm::CallBase *EmitCallOrInvoke(llvm::FunctionCallee Callee,
	ArrayRef<llvm::Value *> Args,
	const Twine &Name = "");
	llvm::CallBase *EmitRuntimeCallOrInvoke(llvm::FunctionCallee callee,
	ArrayRef<llvm::Value *> args,
	const Twine &name = "");
	llvm::CallBase *EmitRuntimeCallOrInvoke(llvm::FunctionCallee callee,
	const Twine &name = "");
	void EmitNoreturnRuntimeCallOrInvoke(llvm::FunctionCallee callee,
	ArrayRef<llvm::Value *> args);

	CGCallee BuildAppleKextVirtualCall(const CXXMethodDecl *MD,
	NestedNameSpecifier *Qual,
	llvm::Type *Ty);

	CGCallee BuildAppleKextVirtualDestructorCall(const CXXDestructorDecl *DD,
	CXXDtorType Type,
	const CXXRecordDecl *RD);

	bool isPointerKnownNonNull(const Expr *E);

	/// Create the discriminator from the storage address and the entity hash.
	llvm::Value EmitPointerAuthBlendDiscriminator(llvm::Value StorageAddress,
	llvm::Value *Discriminator);
	CGPointerAuthInfo EmitPointerAuthInfo(const PointerAuthSchema &Schema,
	llvm::Value *StorageAddress,
	GlobalDecl SchemaDecl,
	QualType SchemaType);

	llvm::Value *EmitPointerAuthSign(const CGPointerAuthInfo &Info,
	llvm::Value *Pointer);

	llvm::Value *EmitPointerAuthAuth(const CGPointerAuthInfo &Info,
	llvm::Value *Pointer);

	llvm::Value emitPointerAuthResign(llvm::Value Pointer, QualType PointerType,
	const CGPointerAuthInfo &CurAuthInfo,
	const CGPointerAuthInfo &NewAuthInfo,
	bool IsKnownNonNull);
	llvm::Value emitPointerAuthResignCall(llvm::Value Pointer,
	const CGPointerAuthInfo &CurInfo,
	const CGPointerAuthInfo &NewInfo);

	void EmitPointerAuthOperandBundle(
	const CGPointerAuthInfo &Info,
	SmallVectorImpl<llvm::OperandBundleDef> &Bundles);

	llvm::Value authPointerToPointerCast(llvm::Value ResultPtr,
	QualType SourceType, QualType DestType);
	Address authPointerToPointerCast(Address Ptr, QualType SourceType,
	QualType DestType);

	Address getAsNaturalAddressOf(Address Addr, QualType PointeeTy);

	llvm::Value *getAsNaturalPointerTo(Address Addr, QualType PointeeType) {
	return getAsNaturalAddressOf(Addr, PointeeType).getBasePointer();
	}

	// Return the copy constructor name with the prefix "__copy_constructor_"
	// removed.
	static std::string getNonTrivialCopyConstructorStr(QualType QT,
	CharUnits Alignment,
	bool IsVolatile,
	ASTContext &Ctx);

	// Return the destructor name with the prefix "__destructor_" removed.
	static std::string getNonTrivialDestructorStr(QualType QT,
	CharUnits Alignment,
	bool IsVolatile,
	ASTContext &Ctx);

	// These functions emit calls to the special functions of non-trivial C
	// structs.
	void defaultInitNonTrivialCStructVar(LValue Dst);
	void callCStructDefaultConstructor(LValue Dst);
	void callCStructDestructor(LValue Dst);
	void callCStructCopyConstructor(LValue Dst, LValue Src);
	void callCStructMoveConstructor(LValue Dst, LValue Src);
	void callCStructCopyAssignmentOperator(LValue Dst, LValue Src);
	void callCStructMoveAssignmentOperator(LValue Dst, LValue Src);

	RValue
	EmitCXXMemberOrOperatorCall(const CXXMethodDecl *Method,
	const CGCallee &Callee,
	ReturnValueSlot ReturnValue, llvm::Value *This,
	llvm::Value *ImplicitParam,
	QualType ImplicitParamTy, const CallExpr *E,
	CallArgList *RtlArgs);
	RValue EmitCXXDestructorCall(GlobalDecl Dtor, const CGCallee &Callee,
	llvm::Value *This, QualType ThisTy,
	llvm::Value *ImplicitParam,
	QualType ImplicitParamTy, const CallExpr *E);
	RValue EmitCXXMemberCallExpr(const CXXMemberCallExpr *E,
	ReturnValueSlot ReturnValue);
	RValue EmitCXXMemberOrOperatorMemberCallExpr(const CallExpr *CE,
	const CXXMethodDecl *MD,
	ReturnValueSlot ReturnValue,
	bool HasQualifier,
	NestedNameSpecifier *Qualifier,
	bool IsArrow, const Expr *Base);
	// Compute the object pointer.
	Address EmitCXXMemberDataPointerAddress(const Expr *E, Address base,
	llvm::Value *memberPtr,
	const MemberPointerType *memberPtrType,
	LValueBaseInfo *BaseInfo = nullptr,
	TBAAAccessInfo *TBAAInfo = nullptr);
	RValue EmitCXXMemberPointerCallExpr(const CXXMemberCallExpr *E,
	ReturnValueSlot ReturnValue);

	RValue EmitCXXOperatorMemberCallExpr(const CXXOperatorCallExpr *E,
	const CXXMethodDecl *MD,
	ReturnValueSlot ReturnValue);
	RValue EmitCXXPseudoDestructorExpr(const CXXPseudoDestructorExpr *E);

	RValue EmitCUDAKernelCallExpr(const CUDAKernelCallExpr *E,
	ReturnValueSlot ReturnValue);

	RValue EmitNVPTXDevicePrintfCallExpr(const CallExpr *E);
	RValue EmitAMDGPUDevicePrintfCallExpr(const CallExpr *E);
	RValue EmitOpenMPDevicePrintfCallExpr(const CallExpr *E);

	RValue EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
	const CallExpr *E, ReturnValueSlot ReturnValue);

	RValue emitRotate(const CallExpr *E, bool IsRotateRight);

	/// Emit IR for __builtin_os_log_format.
	RValue emitBuiltinOSLogFormat(const CallExpr &E);

	/// Emit IR for __builtin_is_aligned.
	RValue EmitBuiltinIsAligned(const CallExpr *E);
	/// Emit IR for __builtin_align_up/__builtin_align_down.
	RValue EmitBuiltinAlignTo(const CallExpr *E, bool AlignUp);

	llvm::Function *generateBuiltinOSLogHelperFunction(
	const analyze_os_log::OSLogBufferLayout &Layout,
	CharUnits BufferAlignment);

	RValue EmitBlockCallExpr(const CallExpr *E, ReturnValueSlot ReturnValue);

	/// EmitTargetBuiltinExpr - Emit the given builtin call. Returns 0 if the call
	/// is unhandled by the current target.
	llvm::Value EmitTargetBuiltinExpr(unsigned BuiltinID, const CallExpr E,
	ReturnValueSlot ReturnValue);

	llvm::Value EmitAArch64CompareBuiltinExpr(llvm::Value Op, llvm::Type *Ty,
	const llvm::CmpInst::Predicate Fp,
	const llvm::CmpInst::Predicate Ip,
	const llvm::Twine &Name = "");
	llvm::Value EmitARMBuiltinExpr(unsigned BuiltinID, const CallExpr E,
	ReturnValueSlot ReturnValue,
	llvm::Triple::ArchType Arch);
	llvm::Value EmitARMMVEBuiltinExpr(unsigned BuiltinID, const CallExpr E,
	ReturnValueSlot ReturnValue,
	llvm::Triple::ArchType Arch);
	llvm::Value EmitARMCDEBuiltinExpr(unsigned BuiltinID, const CallExpr E,
	ReturnValueSlot ReturnValue,
	llvm::Triple::ArchType Arch);
	llvm::Value EmitCMSEClearRecord(llvm::Value V, llvm::IntegerType *ITy,
	QualType RTy);
	llvm::Value EmitCMSEClearRecord(llvm::Value V, llvm::ArrayType *ATy,
	QualType RTy);

	llvm::Value *EmitCommonNeonBuiltinExpr(unsigned BuiltinID,
	unsigned LLVMIntrinsic,
	unsigned AltLLVMIntrinsic,
	const char *NameHint,
	unsigned Modifier,
	const CallExpr *E,
	SmallVectorImpl<llvm::Value *> &Ops,
	Address PtrOp0, Address PtrOp1,
	llvm::Triple::ArchType Arch);

	llvm::Function *LookupNeonLLVMIntrinsic(unsigned IntrinsicID,
	unsigned Modifier, llvm::Type *ArgTy,
	const CallExpr *E);
	llvm::Value EmitNeonCall(llvm::Function F,
	SmallVectorImpl<llvm::Value*> &O,
	const char *name,
	unsigned shift = 0, bool rightshift = false);
	llvm::Value EmitNeonSplat(llvm::Value V, llvm::Constant *Idx,
	const llvm::ElementCount &Count);
	llvm::Value EmitNeonSplat(llvm::Value V, llvm::Constant *Idx);
	llvm::Value EmitNeonShiftVector(llvm::Value V, llvm::Type *Ty,
	bool negateForRightShift);
	llvm::Value EmitNeonRShiftImm(llvm::Value Vec, llvm::Value *Amt,
	llvm::Type Ty, bool usgn, const char name);
	llvm::Value vectorWrapScalar16(llvm::Value Op);
	/// SVEBuiltinMemEltTy - Returns the memory element type for this memory
	/// access builtin. Only required if it can't be inferred from the base
	/// pointer operand.
	llvm::Type *SVEBuiltinMemEltTy(const SVETypeFlags &TypeFlags);

	SmallVector<llvm::Type *, 2>
	getSVEOverloadTypes(const SVETypeFlags &TypeFlags, llvm::Type *ReturnType,
	ArrayRef<llvm::Value *> Ops);
	llvm::Type *getEltType(const SVETypeFlags &TypeFlags);
	llvm::ScalableVectorType *getSVEType(const SVETypeFlags &TypeFlags);
	llvm::ScalableVectorType *getSVEPredType(const SVETypeFlags &TypeFlags);
	llvm::Value *EmitSVETupleSetOrGet(const SVETypeFlags &TypeFlags,
	llvm::Type *ReturnType,
	ArrayRef<llvm::Value *> Ops);
	llvm::Value *EmitSVETupleCreate(const SVETypeFlags &TypeFlags,
	llvm::Type *ReturnType,
	ArrayRef<llvm::Value *> Ops);
	llvm::Value *EmitSVEAllTruePred(const SVETypeFlags &TypeFlags);
	llvm::Value EmitSVEDupX(llvm::Value Scalar);
	llvm::Value EmitSVEDupX(llvm::Value Scalar, llvm::Type *Ty);
	llvm::Value EmitSVEReinterpret(llvm::Value Val, llvm::Type *Ty);
	llvm::Value *EmitSVEPMull(const SVETypeFlags &TypeFlags,
	llvm::SmallVectorImpl<llvm::Value *> &Ops,
	unsigned BuiltinID);
	llvm::Value *EmitSVEMovl(const SVETypeFlags &TypeFlags,
	llvm::ArrayRef<llvm::Value *> Ops,
	unsigned BuiltinID);
	llvm::Value EmitSVEPredicateCast(llvm::Value Pred,
	llvm::ScalableVectorType *VTy);
	llvm::Value *EmitSVEGatherLoad(const SVETypeFlags &TypeFlags,
	llvm::SmallVectorImpl<llvm::Value *> &Ops,
	unsigned IntID);
	llvm::Value *EmitSVEScatterStore(const SVETypeFlags &TypeFlags,
	llvm::SmallVectorImpl<llvm::Value *> &Ops,
	unsigned IntID);
	llvm::Value EmitSVEMaskedLoad(const CallExpr , llvm::Type *ReturnTy,
	SmallVectorImpl<llvm::Value *> &Ops,
	unsigned BuiltinID, bool IsZExtReturn);
	llvm::Value EmitSVEMaskedStore(const CallExpr ,
	SmallVectorImpl<llvm::Value *> &Ops,
	unsigned BuiltinID);
	llvm::Value *EmitSVEPrefetchLoad(const SVETypeFlags &TypeFlags,
	SmallVectorImpl<llvm::Value *> &Ops,
	unsigned BuiltinID);
	llvm::Value *EmitSVEGatherPrefetch(const SVETypeFlags &TypeFlags,
	SmallVectorImpl<llvm::Value *> &Ops,
	unsigned IntID);
	llvm::Value *EmitSVEStructLoad(const SVETypeFlags &TypeFlags,
	SmallVectorImpl<llvm::Value *> &Ops,
	unsigned IntID);
	llvm::Value *EmitSVEStructStore(const SVETypeFlags &TypeFlags,
	SmallVectorImpl<llvm::Value *> &Ops,
	unsigned IntID);
	/// FormSVEBuiltinResult - Returns the struct of scalable vectors as a wider
	/// vector. It extracts the scalable vector from the struct and inserts into
	/// the wider vector. This avoids the error when allocating space in llvm
	/// for struct of scalable vectors if a function returns struct.
	llvm::Value FormSVEBuiltinResult(llvm::Value Call);

	llvm::Value EmitAArch64SVEBuiltinExpr(unsigned BuiltinID, const CallExpr E);

	llvm::Value *EmitSMELd1St1(const SVETypeFlags &TypeFlags,
	llvm::SmallVectorImpl<llvm::Value *> &Ops,
	unsigned IntID);
	llvm::Value *EmitSMEReadWrite(const SVETypeFlags &TypeFlags,
	llvm::SmallVectorImpl<llvm::Value *> &Ops,
	unsigned IntID);
	llvm::Value *EmitSMEZero(const SVETypeFlags &TypeFlags,
	llvm::SmallVectorImpl<llvm::Value *> &Ops,
	unsigned IntID);
	llvm::Value *EmitSMELdrStr(const SVETypeFlags &TypeFlags,
	llvm::SmallVectorImpl<llvm::Value *> &Ops,
	unsigned IntID);

	void GetAArch64SVEProcessedOperands(unsigned BuiltinID, const CallExpr *E,
	SmallVectorImpl<llvm::Value *> &Ops,
	SVETypeFlags TypeFlags);

	llvm::Value EmitAArch64SMEBuiltinExpr(unsigned BuiltinID, const CallExpr E);

	llvm::Value EmitAArch64BuiltinExpr(unsigned BuiltinID, const CallExpr E,
	llvm::Triple::ArchType Arch);
	llvm::Value EmitBPFBuiltinExpr(unsigned BuiltinID, const CallExpr E);

	llvm::Value BuildVector(ArrayRef<llvm::Value> Ops);
	llvm::Value EmitX86BuiltinExpr(unsigned BuiltinID, const CallExpr E);
	llvm::Value EmitPPCBuiltinExpr(unsigned BuiltinID, const CallExpr E);
	llvm::Value EmitAMDGPUBuiltinExpr(unsigned BuiltinID, const CallExpr E);
	llvm::Value EmitHLSLBuiltinExpr(unsigned BuiltinID, const CallExpr E);
	llvm::Value *EmitScalarOrConstFoldImmArg(unsigned ICEArguments, unsigned Idx,
	const CallExpr *E);
	llvm::Value EmitSystemZBuiltinExpr(unsigned BuiltinID, const CallExpr E);
	llvm::Value EmitNVPTXBuiltinExpr(unsigned BuiltinID, const CallExpr E);
	llvm::Value *EmitWebAssemblyBuiltinExpr(unsigned BuiltinID,
	const CallExpr *E);
	llvm::Value EmitHexagonBuiltinExpr(unsigned BuiltinID, const CallExpr E);
	llvm::Value EmitRISCVBuiltinExpr(unsigned BuiltinID, const CallExpr E,
	ReturnValueSlot ReturnValue);

	void AddAMDGPUFenceAddressSpaceMMRA(llvm::Instruction *Inst,
	const CallExpr *E);
	void ProcessOrderScopeAMDGCN(llvm::Value Order, llvm::Value Scope,
	llvm::AtomicOrdering &AO,
	llvm::SyncScope::ID &SSID);

	enum class MSVCIntrin;
	llvm::Value EmitMSVCBuiltinExpr(MSVCIntrin BuiltinID, const CallExpr E);

	llvm::Value *EmitBuiltinAvailable(const VersionTuple &Version);

	llvm::Value EmitObjCProtocolExpr(const ObjCProtocolExpr E);
	llvm::Value EmitObjCStringLiteral(const ObjCStringLiteral E);
	llvm::Value EmitObjCBoxedExpr(const ObjCBoxedExpr E);
	llvm::Value EmitObjCArrayLiteral(const ObjCArrayLiteral E);
	llvm::Value EmitObjCDictionaryLiteral(const ObjCDictionaryLiteral E);
	llvm::Value EmitObjCCollectionLiteral(const Expr E,
	const ObjCMethodDecl *MethodWithObjects);
	llvm::Value EmitObjCSelectorExpr(const ObjCSelectorExpr E);
	RValue EmitObjCMessageExpr(const ObjCMessageExpr *E,
	ReturnValueSlot Return = ReturnValueSlot());

	/// Retrieves the default cleanup kind for an ARC cleanup.
	/// Except under -fobjc-arc-eh, ARC cleanups are normal-only.
	CleanupKind getARCCleanupKind() {
	return CGM.getCodeGenOpts().ObjCAutoRefCountExceptions
	? NormalAndEHCleanup : NormalCleanup;
	}

	// ARC primitives.
	void EmitARCInitWeak(Address addr, llvm::Value *value);
	void EmitARCDestroyWeak(Address addr);
	llvm::Value *EmitARCLoadWeak(Address addr);
	llvm::Value *EmitARCLoadWeakRetained(Address addr);
	llvm::Value EmitARCStoreWeak(Address addr, llvm::Value value, bool ignored);
	void emitARCCopyAssignWeak(QualType Ty, Address DstAddr, Address SrcAddr);
	void emitARCMoveAssignWeak(QualType Ty, Address DstAddr, Address SrcAddr);
	void EmitARCCopyWeak(Address dst, Address src);
	void EmitARCMoveWeak(Address dst, Address src);
	llvm::Value EmitARCRetainAutorelease(QualType type, llvm::Value value);
	llvm::Value EmitARCRetainAutoreleaseNonBlock(llvm::Value value);
	llvm::Value EmitARCStoreStrong(LValue lvalue, llvm::Value value,
	bool resultIgnored);
	llvm::Value EmitARCStoreStrongCall(Address addr, llvm::Value value,
	bool resultIgnored);
	llvm::Value EmitARCRetain(QualType type, llvm::Value value);
	llvm::Value EmitARCRetainNonBlock(llvm::Value value);
	llvm::Value EmitARCRetainBlock(llvm::Value value, bool mandatory);
	void EmitARCDestroyStrong(Address addr, ARCPreciseLifetime_t precise);
	void EmitARCRelease(llvm::Value *value, ARCPreciseLifetime_t precise);
	llvm::Value EmitARCAutorelease(llvm::Value value);
	llvm::Value EmitARCAutoreleaseReturnValue(llvm::Value value);
	llvm::Value EmitARCRetainAutoreleaseReturnValue(llvm::Value value);
	llvm::Value EmitARCRetainAutoreleasedReturnValue(llvm::Value value);
	llvm::Value EmitARCUnsafeClaimAutoreleasedReturnValue(llvm::Value value);

	llvm::Value EmitObjCAutorelease(llvm::Value value, llvm::Type *returnType);
	llvm::Value EmitObjCRetainNonBlock(llvm::Value value,
	llvm::Type *returnType);
	void EmitObjCRelease(llvm::Value *value, ARCPreciseLifetime_t precise);

	std::pair<LValue,llvm::Value*>
	EmitARCStoreAutoreleasing(const BinaryOperator *e);
	std::pair<LValue,llvm::Value*>
	EmitARCStoreStrong(const BinaryOperator *e, bool ignored);
	std::pair<LValue,llvm::Value*>
	EmitARCStoreUnsafeUnretained(const BinaryOperator *e, bool ignored);

	llvm::Value EmitObjCAlloc(llvm::Value value,
	llvm::Type *returnType);
	llvm::Value EmitObjCAllocWithZone(llvm::Value value,
	llvm::Type *returnType);
	llvm::Value EmitObjCAllocInit(llvm::Value value, llvm::Type *resultType);

	llvm::Value EmitObjCThrowOperand(const Expr expr);
	llvm::Value EmitObjCConsumeObject(QualType T, llvm::Value Ptr);
	llvm::Value EmitObjCExtendObjectLifetime(QualType T, llvm::Value Ptr);

	llvm::Value EmitARCExtendBlockObject(const Expr expr);
	llvm::Value EmitARCReclaimReturnedObject(const Expr e,
	bool allowUnsafeClaim);
	llvm::Value EmitARCRetainScalarExpr(const Expr expr);
	llvm::Value EmitARCRetainAutoreleaseScalarExpr(const Expr expr);
	llvm::Value EmitARCUnsafeUnretainedScalarExpr(const Expr expr);

	void EmitARCIntrinsicUse(ArrayRef<llvm::Value*> values);

	void EmitARCNoopIntrinsicUse(ArrayRef<llvm::Value *> values);

	static Destroyer destroyARCStrongImprecise;
	static Destroyer destroyARCStrongPrecise;
	static Destroyer destroyARCWeak;
	static Destroyer emitARCIntrinsicUse;
	static Destroyer destroyNonTrivialCStruct;

	void EmitObjCAutoreleasePoolPop(llvm::Value *Ptr);
	llvm::Value *EmitObjCAutoreleasePoolPush();
	llvm::Value *EmitObjCMRRAutoreleasePoolPush();
	void EmitObjCAutoreleasePoolCleanup(llvm::Value *Ptr);
	void EmitObjCMRRAutoreleasePoolPop(llvm::Value *Ptr);

	/// Emits a reference binding to the passed in expression.
	RValue EmitReferenceBindingToExpr(const Expr *E);

	//===--------------------------------------------------------------------===//
	// Expression Emission
	//===--------------------------------------------------------------------===//

	// Expressions are broken into three classes: scalar, complex, aggregate.

	/// EmitScalarExpr - Emit the computation of the specified expression of LLVM
	/// scalar type, returning the result.
	llvm::Value EmitScalarExpr(const Expr E , bool IgnoreResultAssign = false);

	/// Emit a conversion from the specified type to the specified destination
	/// type, both of which are LLVM scalar types.
	llvm::Value EmitScalarConversion(llvm::Value Src, QualType SrcTy,
	QualType DstTy, SourceLocation Loc);

	/// Emit a conversion from the specified complex type to the specified
	/// destination type, where the destination type is an LLVM scalar type.
	llvm::Value *EmitComplexToScalarConversion(ComplexPairTy Src, QualType SrcTy,
	QualType DstTy,
	SourceLocation Loc);

	/// EmitAggExpr - Emit the computation of the specified expression
	/// of aggregate type. The result is computed into the given slot,
	/// which may be null to indicate that the value is not needed.
	void EmitAggExpr(const Expr *E, AggValueSlot AS);

	/// EmitAggExprToLValue - Emit the computation of the specified expression of
	/// aggregate type into a temporary LValue.
	LValue EmitAggExprToLValue(const Expr *E);

	enum ExprValueKind { EVK_RValue, EVK_NonRValue };

	/// EmitAggFinalDestCopy - Emit copy of the specified aggregate into
	/// destination address.
	void EmitAggFinalDestCopy(QualType Type, AggValueSlot Dest, const LValue &Src,
	ExprValueKind SrcKind);

	- /// Build all the stores needed to initialize an aggregate at Dest with the
	- /// value Val.
	- void EmitAggregateStore(llvm::Value *Val, Address Dest, bool DestIsVolatile);
	+ /// Create a store to \arg DstPtr from \arg Src, truncating the stored value
	+ /// to at most \arg DstSize bytes.
	+ void CreateCoercedStore(llvm::Value *Src, Address Dst, llvm::TypeSize DstSize,
	+ bool DstIsVolatile);

	/// EmitExtendGCLifetime - Given a pointer to an Objective-C object,
	/// make sure it survives garbage collection until this point.
	void EmitExtendGCLifetime(llvm::Value *object);

	/// EmitComplexExpr - Emit the computation of the specified expression of
	/// complex type, returning the result.
	ComplexPairTy EmitComplexExpr(const Expr *E,
	bool IgnoreReal = false,
	bool IgnoreImag = false);

	/// EmitComplexExprIntoLValue - Emit the given expression of complex
	/// type and place its result into the specified l-value.
	void EmitComplexExprIntoLValue(const Expr *E, LValue dest, bool isInit);

	/// EmitStoreOfComplex - Store a complex number into the specified l-value.
	void EmitStoreOfComplex(ComplexPairTy V, LValue dest, bool isInit);

	/// EmitLoadOfComplex - Load a complex number from the specified l-value.
	ComplexPairTy EmitLoadOfComplex(LValue src, SourceLocation loc);

	ComplexPairTy EmitPromotedComplexExpr(const Expr *E, QualType PromotionType);
	llvm::Value EmitPromotedScalarExpr(const Expr E, QualType PromotionType);
	ComplexPairTy EmitPromotedValue(ComplexPairTy result, QualType PromotionType);
	ComplexPairTy EmitUnPromotedValue(ComplexPairTy result, QualType PromotionType);

	Address emitAddrOfRealComponent(Address complex, QualType complexType);
	Address emitAddrOfImagComponent(Address complex, QualType complexType);

	/// AddInitializerToStaticVarDecl - Add the initializer for 'D' to the
	/// global variable that has already been created for it. If the initializer
	/// has a different type than GV does, this may free GV and return a different
	/// one. Otherwise it just returns GV.
	llvm::GlobalVariable *
	AddInitializerToStaticVarDecl(const VarDecl &D,
	llvm::GlobalVariable *GV);

	// Emit an @llvm.invariant.start call for the given memory region.
	void EmitInvariantStart(llvm::Constant *Addr, CharUnits Size);

	/// EmitCXXGlobalVarDeclInit - Create the initializer for a C++
	/// variable with global storage.
	void EmitCXXGlobalVarDeclInit(const VarDecl &D, llvm::GlobalVariable *GV,
	bool PerformInit);

	llvm::Constant *createAtExitStub(const VarDecl &VD, llvm::FunctionCallee Dtor,
	llvm::Constant *Addr);

	llvm::Function *createTLSAtExitStub(const VarDecl &VD,
	llvm::FunctionCallee Dtor,
	llvm::Constant *Addr,
	llvm::FunctionCallee &AtExit);

	/// Call atexit() with a function that passes the given argument to
	/// the given function.
	void registerGlobalDtorWithAtExit(const VarDecl &D, llvm::FunctionCallee fn,
	llvm::Constant *addr);

	/// Registers the dtor using 'llvm.global_dtors' for platforms that do not
	/// support an 'atexit()' function.
	void registerGlobalDtorWithLLVM(const VarDecl &D, llvm::FunctionCallee fn,
	llvm::Constant *addr);

	/// Call atexit() with function dtorStub.
	void registerGlobalDtorWithAtExit(llvm::Constant *dtorStub);

	/// Call unatexit() with function dtorStub.
	llvm::Value unregisterGlobalDtorWithUnAtExit(llvm::Constant dtorStub);

	/// Emit code in this function to perform a guarded variable
	/// initialization. Guarded initializations are used when it's not
	/// possible to prove that an initialization will be done exactly
	/// once, e.g. with a static local variable or a static data member
	/// of a class template.
	void EmitCXXGuardedInit(const VarDecl &D, llvm::GlobalVariable *DeclPtr,
	bool PerformInit);

	enum class GuardKind { VariableGuard, TlsGuard };

	/// Emit a branch to select whether or not to perform guarded initialization.
	void EmitCXXGuardedInitBranch(llvm::Value *NeedsInit,
	llvm::BasicBlock *InitBlock,
	llvm::BasicBlock *NoInitBlock,
	GuardKind Kind, const VarDecl *D);

	/// GenerateCXXGlobalInitFunc - Generates code for initializing global
	/// variables.
	void
	GenerateCXXGlobalInitFunc(llvm::Function *Fn,
	ArrayRef<llvm::Function *> CXXThreadLocals,
	ConstantAddress Guard = ConstantAddress::invalid());

	/// GenerateCXXGlobalCleanUpFunc - Generates code for cleaning up global
	/// variables.
	void GenerateCXXGlobalCleanUpFunc(
	llvm::Function *Fn,
	ArrayRef<std::tuple<llvm::FunctionType *, llvm::WeakTrackingVH,
	llvm::Constant *>>
	DtorsOrStermFinalizers);

	void GenerateCXXGlobalVarDeclInitFunc(llvm::Function *Fn,
	const VarDecl *D,
	llvm::GlobalVariable *Addr,
	bool PerformInit);

	void EmitCXXConstructExpr(const CXXConstructExpr *E, AggValueSlot Dest);

	void EmitSynthesizedCXXCopyCtor(Address Dest, Address Src, const Expr *Exp);

	void EmitCXXThrowExpr(const CXXThrowExpr *E, bool KeepInsertionPoint = true);

	RValue EmitAtomicExpr(AtomicExpr *E);

	//===--------------------------------------------------------------------===//
	// Annotations Emission
	//===--------------------------------------------------------------------===//

	/// Emit an annotation call (intrinsic).
	llvm::Value EmitAnnotationCall(llvm::Function AnnotationFn,
	llvm::Value *AnnotatedVal,
	StringRef AnnotationStr,
	SourceLocation Location,
	const AnnotateAttr *Attr);

	/// Emit local annotations for the local variable V, declared by D.
	void EmitVarAnnotations(const VarDecl D, llvm::Value V);

	/// Emit field annotations for the given field & value. Returns the
	/// annotation result.
	Address EmitFieldAnnotations(const FieldDecl *D, Address V);

	//===--------------------------------------------------------------------===//
	// Internal Helpers
	//===--------------------------------------------------------------------===//

	/// ContainsLabel - Return true if the statement contains a label in it. If
	/// this statement is not executed normally, it not containing a label means
	/// that we can just remove the code.
	static bool ContainsLabel(const Stmt *S, bool IgnoreCaseStmts = false);

	/// containsBreak - Return true if the statement contains a break out of it.
	/// If the statement (recursively) contains a switch or loop with a break
	/// inside of it, this is fine.
	static bool containsBreak(const Stmt *S);

	/// Determine if the given statement might introduce a declaration into the
	/// current scope, by being a (possibly-labelled) DeclStmt.
	static bool mightAddDeclToScope(const Stmt *S);

	/// ConstantFoldsToSimpleInteger - If the specified expression does not fold
	/// to a constant, or if it does but contains a label, return false. If it
	/// constant folds return true and set the boolean result in Result.
	bool ConstantFoldsToSimpleInteger(const Expr *Cond, bool &Result,
	bool AllowLabels = false);

	/// ConstantFoldsToSimpleInteger - If the specified expression does not fold
	/// to a constant, or if it does but contains a label, return false. If it
	/// constant folds return true and set the folded value.
	bool ConstantFoldsToSimpleInteger(const Expr *Cond, llvm::APSInt &Result,
	bool AllowLabels = false);

	/// Ignore parentheses and logical-NOT to track conditions consistently.
	static const Expr stripCond(const Expr C);

	/// isInstrumentedCondition - Determine whether the given condition is an
	/// instrumentable condition (i.e. no "&&" or "\|\|").
	static bool isInstrumentedCondition(const Expr *C);

	/// EmitBranchToCounterBlock - Emit a conditional branch to a new block that
	/// increments a profile counter based on the semantics of the given logical
	/// operator opcode. This is used to instrument branch condition coverage
	/// for logical operators.
	void EmitBranchToCounterBlock(const Expr *Cond, BinaryOperator::Opcode LOp,
	llvm::BasicBlock *TrueBlock,
	llvm::BasicBlock *FalseBlock,
	uint64_t TrueCount = 0,
	Stmt::Likelihood LH = Stmt::LH_None,
	const Expr *CntrIdx = nullptr);

	/// EmitBranchOnBoolExpr - Emit a branch on a boolean condition (e.g. for an
	/// if statement) to the specified blocks. Based on the condition, this might
	/// try to simplify the codegen of the conditional based on the branch.
	/// TrueCount should be the number of times we expect the condition to
	/// evaluate to true based on PGO data.
	void EmitBranchOnBoolExpr(const Expr Cond, llvm::BasicBlock TrueBlock,
	llvm::BasicBlock *FalseBlock, uint64_t TrueCount,
	Stmt::Likelihood LH = Stmt::LH_None,
	const Expr *ConditionalOp = nullptr);

	/// Given an assignment `*LHS = RHS`, emit a test that checks if \p RHS is
	/// nonnull, if \p LHS is marked _Nonnull.
	void EmitNullabilityCheck(LValue LHS, llvm::Value *RHS, SourceLocation Loc);

	/// An enumeration which makes it easier to specify whether or not an
	/// operation is a subtraction.
	enum { NotSubtraction = false, IsSubtraction = true };

	/// Same as IRBuilder::CreateInBoundsGEP, but additionally emits a check to
	/// detect undefined behavior when the pointer overflow sanitizer is enabled.
	/// \p SignedIndices indicates whether any of the GEP indices are signed.
	/// \p IsSubtraction indicates whether the expression used to form the GEP
	/// is a subtraction.
	llvm::Value EmitCheckedInBoundsGEP(llvm::Type ElemTy, llvm::Value *Ptr,
	ArrayRef<llvm::Value *> IdxList,
	bool SignedIndices,
	bool IsSubtraction,
	SourceLocation Loc,
	const Twine &Name = "");

	Address EmitCheckedInBoundsGEP(Address Addr, ArrayRef<llvm::Value *> IdxList,
	llvm::Type *elementType, bool SignedIndices,
	bool IsSubtraction, SourceLocation Loc,
	CharUnits Align, const Twine &Name = "");

	/// Specifies which type of sanitizer check to apply when handling a
	/// particular builtin.
	enum BuiltinCheckKind {
	BCK_CTZPassedZero,
	BCK_CLZPassedZero,
	};

	/// Emits an argument for a call to a builtin. If the builtin sanitizer is
	/// enabled, a runtime check specified by \p Kind is also emitted.
	llvm::Value EmitCheckedArgForBuiltin(const Expr E, BuiltinCheckKind Kind);

	/// Emit a description of a type in a format suitable for passing to
	/// a runtime sanitizer handler.
	llvm::Constant *EmitCheckTypeDescriptor(QualType T);

	/// Convert a value into a format suitable for passing to a runtime
	/// sanitizer handler.
	llvm::Value EmitCheckValue(llvm::Value V);

	/// Emit a description of a source location in a format suitable for
	/// passing to a runtime sanitizer handler.
	llvm::Constant *EmitCheckSourceLocation(SourceLocation Loc);

	void EmitKCFIOperandBundle(const CGCallee &Callee,
	SmallVectorImpl<llvm::OperandBundleDef> &Bundles);

	/// Create a basic block that will either trap or call a handler function in
	/// the UBSan runtime with the provided arguments, and create a conditional
	/// branch to it.
	void EmitCheck(ArrayRef<std::pair<llvm::Value *, SanitizerMask>> Checked,
	SanitizerHandler Check, ArrayRef<llvm::Constant *> StaticArgs,
	ArrayRef<llvm::Value *> DynamicArgs);

	/// Emit a slow path cross-DSO CFI check which calls __cfi_slowpath
	/// if Cond if false.
	void EmitCfiSlowPathCheck(SanitizerMask Kind, llvm::Value *Cond,
	llvm::ConstantInt TypeId, llvm::Value Ptr,
	ArrayRef<llvm::Constant *> StaticArgs);

	/// Emit a reached-unreachable diagnostic if \p Loc is valid and runtime
	/// checking is enabled. Otherwise, just emit an unreachable instruction.
	void EmitUnreachable(SourceLocation Loc);

	/// Create a basic block that will call the trap intrinsic, and emit a
	/// conditional branch to it, for the -ftrapv checks.
	void EmitTrapCheck(llvm::Value *Checked, SanitizerHandler CheckHandlerID);

	/// Emit a call to trap or debugtrap and attach function attribute
	/// "trap-func-name" if specified.
	llvm::CallInst *EmitTrapCall(llvm::Intrinsic::ID IntrID);

	/// Emit a stub for the cross-DSO CFI check function.
	void EmitCfiCheckStub();

	/// Emit a cross-DSO CFI failure handling function.
	void EmitCfiCheckFail();

	/// Create a check for a function parameter that may potentially be
	/// declared as non-null.
	void EmitNonNullArgCheck(RValue RV, QualType ArgType, SourceLocation ArgLoc,
	AbstractCallee AC, unsigned ParmNum);

	void EmitNonNullArgCheck(Address Addr, QualType ArgType,
	SourceLocation ArgLoc, AbstractCallee AC,
	unsigned ParmNum);

	/// EmitCallArg - Emit a single call argument.
	void EmitCallArg(CallArgList &args, const Expr *E, QualType ArgType);

	/// EmitDelegateCallArg - We are performing a delegate call; that
	/// is, the current function is delegating to another one. Produce
	/// a r-value suitable for passing the given parameter.
	void EmitDelegateCallArg(CallArgList &args, const VarDecl *param,
	SourceLocation loc);

	/// SetFPAccuracy - Set the minimum required accuracy of the given floating
	/// point operation, expressed as the maximum relative error in ulp.
	void SetFPAccuracy(llvm::Value *Val, float Accuracy);

	/// Set the minimum required accuracy of the given sqrt operation
	/// based on CodeGenOpts.
	void SetSqrtFPAccuracy(llvm::Value *Val);

	/// Set the minimum required accuracy of the given sqrt operation based on
	/// CodeGenOpts.
	void SetDivFPAccuracy(llvm::Value *Val);

	/// Set the codegen fast-math flags.
	void SetFastMathFlags(FPOptions FPFeatures);

	// Truncate or extend a boolean vector to the requested number of elements.
	llvm::Value emitBoolVecConversion(llvm::Value SrcVec,
	unsigned NumElementsDst,
	const llvm::Twine &Name = "");
	// Adds a convergence_ctrl token to \|Input\| and emits the required parent
	// convergence instructions.
	template <typename CallType>
	CallType addControlledConvergenceToken(CallType Input) {
	return cast<CallType>(
	addConvergenceControlToken(Input, ConvergenceTokenStack.back()));
	}

	private:
	// Emits a convergence_loop instruction for the given \|BB\|, with \|ParentToken\|
	// as it's parent convergence instr.
	llvm::IntrinsicInst emitConvergenceLoopToken(llvm::BasicBlock BB,
	llvm::Value *ParentToken);
	// Adds a convergence_ctrl token with \|ParentToken\| as parent convergence
	// instr to the call \|Input\|.
	llvm::CallBase addConvergenceControlToken(llvm::CallBase Input,
	llvm::Value *ParentToken);
	// Find the convergence_entry instruction \|F\|, or emits ones if none exists.
	// Returns the convergence instruction.
	llvm::IntrinsicInst getOrEmitConvergenceEntryToken(llvm::Function F);
	// Find the convergence_loop instruction for the loop defined by \|LI\|, or
	// emits one if none exists. Returns the convergence instruction.
	llvm::IntrinsicInst getOrEmitConvergenceLoopToken(const LoopInfo LI);

	private:
	llvm::MDNode *getRangeForLoadFromType(QualType Ty);
	void EmitReturnOfRValue(RValue RV, QualType Ty);

	void deferPlaceholderReplacement(llvm::Instruction Old, llvm::Value New);

	llvm::SmallVector<std::pair<llvm::WeakTrackingVH, llvm::Value *>, 4>
	DeferredReplacements;

	/// Set the address of a local variable.
	void setAddrOfLocalVar(const VarDecl *VD, Address Addr) {
	assert(!LocalDeclMap.count(VD) && "Decl already exists in LocalDeclMap!");
	LocalDeclMap.insert({VD, Addr});
	}

	/// ExpandTypeFromArgs - Reconstruct a structure of type \arg Ty
	/// from function arguments into \arg Dst. See ABIArgInfo::Expand.
	///
	/// \param AI - The first function argument of the expansion.
	void ExpandTypeFromArgs(QualType Ty, LValue Dst,
	llvm::Function::arg_iterator &AI);

	/// ExpandTypeToArgs - Expand an CallArg \arg Arg, with the LLVM type for \arg
	/// Ty, into individual arguments on the provided vector \arg IRCallArgs,
	/// starting at index \arg IRCallArgPos. See ABIArgInfo::Expand.
	void ExpandTypeToArgs(QualType Ty, CallArg Arg, llvm::FunctionType *IRFuncTy,
	SmallVectorImpl<llvm::Value *> &IRCallArgs,
	unsigned &IRCallArgPos);

	std::pair<llvm::Value , llvm::Type >
	EmitAsmInput(const TargetInfo::ConstraintInfo &Info, const Expr *InputExpr,
	std::string &ConstraintStr);

	std::pair<llvm::Value , llvm::Type >
	EmitAsmInputLValue(const TargetInfo::ConstraintInfo &Info, LValue InputValue,
	QualType InputType, std::string &ConstraintStr,
	SourceLocation Loc);

	/// Attempts to statically evaluate the object size of E. If that
	/// fails, emits code to figure the size of E out for us. This is
	/// pass_object_size aware.
	///
	/// If EmittedExpr is non-null, this will use that instead of re-emitting E.
	llvm::Value evaluateOrEmitBuiltinObjectSize(const Expr E, unsigned Type,
	llvm::IntegerType *ResType,
	llvm::Value *EmittedE,
	bool IsDynamic);

	/// Emits the size of E, as required by __builtin_object_size. This
	/// function is aware of pass_object_size parameters, and will act accordingly
	/// if E is a parameter with the pass_object_size attribute.
	llvm::Value emitBuiltinObjectSize(const Expr E, unsigned Type,
	llvm::IntegerType *ResType,
	llvm::Value *EmittedE,
	bool IsDynamic);

	llvm::Value emitFlexibleArrayMemberSize(const Expr E, unsigned Type,
	llvm::IntegerType *ResType);

	void emitZeroOrPatternForAutoVarInit(QualType type, const VarDecl &D,
	Address Loc);

	public:
	enum class EvaluationOrder {
	///! No language constraints on evaluation order.
	Default,
	///! Language semantics require left-to-right evaluation.
	ForceLeftToRight,
	///! Language semantics require right-to-left evaluation.
	ForceRightToLeft
	};

	// Wrapper for function prototype sources. Wraps either a FunctionProtoType or
	// an ObjCMethodDecl.
	struct PrototypeWrapper {
	llvm::PointerUnion<const FunctionProtoType , const ObjCMethodDecl > P;

	PrototypeWrapper(const FunctionProtoType *FT) : P(FT) {}
	PrototypeWrapper(const ObjCMethodDecl *MD) : P(MD) {}
	};

	void EmitCallArgs(CallArgList &Args, PrototypeWrapper Prototype,
	llvm::iterator_range<CallExpr::const_arg_iterator> ArgRange,
	AbstractCallee AC = AbstractCallee(),
	unsigned ParamsToSkip = 0,
	EvaluationOrder Order = EvaluationOrder::Default);

	/// EmitPointerWithAlignment - Given an expression with a pointer type,
	/// emit the value and compute our best estimate of the alignment of the
	/// pointee.
	///
	/// \param BaseInfo - If non-null, this will be initialized with
	/// information about the source of the alignment and the may-alias
	/// attribute. Note that this function will conservatively fall back on
	/// the type when it doesn't recognize the expression and may-alias will
	/// be set to false.
	///
	/// One reasonable way to use this information is when there's a language
	/// guarantee that the pointer must be aligned to some stricter value, and
	/// we're simply trying to ensure that sufficiently obvious uses of under-
	/// aligned objects don't get miscompiled; for example, a placement new
	/// into the address of a local variable. In such a case, it's quite
	/// reasonable to just ignore the returned alignment when it isn't from an
	/// explicit source.
	Address
	EmitPointerWithAlignment(const Expr Addr, LValueBaseInfo BaseInfo = nullptr,
	TBAAAccessInfo *TBAAInfo = nullptr,
	KnownNonNull_t IsKnownNonNull = NotKnownNonNull);

	/// If \p E references a parameter with pass_object_size info or a constant
	/// array size modifier, emit the object size divided by the size of \p EltTy.
	/// Otherwise return null.
	llvm::Value LoadPassedObjectSize(const Expr E, QualType EltTy);

	void EmitSanitizerStatReport(llvm::SanitizerStatKind SSK);

	struct MultiVersionResolverOption {
	llvm::Function *Function;
	struct Conds {
	StringRef Architecture;
	llvm::SmallVector<StringRef, 8> Features;

	Conds(StringRef Arch, ArrayRef<StringRef> Feats)
	: Architecture(Arch), Features(Feats.begin(), Feats.end()) {}
	} Conditions;

	MultiVersionResolverOption(llvm::Function *F, StringRef Arch,
	ArrayRef<StringRef> Feats)
	: Function(F), Conditions(Arch, Feats) {}
	};

	// Emits the body of a multiversion function's resolver. Assumes that the
	// options are already sorted in the proper order, with the 'default' option
	// last (if it exists).
	void EmitMultiVersionResolver(llvm::Function *Resolver,
	ArrayRef<MultiVersionResolverOption> Options);
	void
	EmitX86MultiVersionResolver(llvm::Function *Resolver,
	ArrayRef<MultiVersionResolverOption> Options);
	void
	EmitAArch64MultiVersionResolver(llvm::Function *Resolver,
	ArrayRef<MultiVersionResolverOption> Options);

	private:
	QualType getVarArgType(const Expr *Arg);

	void EmitDeclMetadata();

	BlockByrefHelpers *buildByrefHelpers(llvm::StructType &byrefType,
	const AutoVarEmission &emission);

	void AddObjCARCExceptionMetadata(llvm::Instruction *Inst);

	llvm::Value *GetValueForARMHint(unsigned BuiltinID);
	llvm::Value EmitX86CpuIs(const CallExpr E);
	llvm::Value *EmitX86CpuIs(StringRef CPUStr);
	llvm::Value EmitX86CpuSupports(const CallExpr E);
	llvm::Value *EmitX86CpuSupports(ArrayRef<StringRef> FeatureStrs);
	llvm::Value *EmitX86CpuSupports(std::array<uint32_t, 4> FeatureMask);
	llvm::Value *EmitX86CpuInit();
	llvm::Value *FormX86ResolverCondition(const MultiVersionResolverOption &RO);
	llvm::Value *EmitAArch64CpuInit();
	llvm::Value *
	FormAArch64ResolverCondition(const MultiVersionResolverOption &RO);
	llvm::Value EmitAArch64CpuSupports(const CallExpr E);
	llvm::Value *EmitAArch64CpuSupports(ArrayRef<StringRef> FeatureStrs);
	};

	inline DominatingLLVMValue::saved_type
	DominatingLLVMValue::save(CodeGenFunction &CGF, llvm::Value *value) {
	if (!needsSaving(value)) return saved_type(value, false);

	// Otherwise, we need an alloca.
	auto align = CharUnits::fromQuantity(
	CGF.CGM.getDataLayout().getPrefTypeAlign(value->getType()));
	Address alloca =
	CGF.CreateTempAlloca(value->getType(), align, "cond-cleanup.save");
	CGF.Builder.CreateStore(value, alloca);

	return saved_type(alloca.emitRawPointer(CGF), true);
	}

	inline llvm::Value *DominatingLLVMValue::restore(CodeGenFunction &CGF,
	saved_type value) {
	// If the value says it wasn't saved, trust that it's still dominating.
	if (!value.getInt()) return value.getPointer();

	// Otherwise, it should be an alloca instruction, as set up in save().
	auto alloca = cast<llvm::AllocaInst>(value.getPointer());
	return CGF.Builder.CreateAlignedLoad(alloca->getAllocatedType(), alloca,
	alloca->getAlign());
	}

	} // end namespace CodeGen

	// Map the LangOption for floating point exception behavior into
	// the corresponding enum in the IR.
	llvm::fp::ExceptionBehavior
	ToConstrainedExceptMD(LangOptions::FPExceptionModeKind Kind);
	} // end namespace clang

	#endif
	diff --git a/contrib/llvm-project/clang/lib/Format/UnwrappedLineParser.cpp b/contrib/llvm-project/clang/lib/Format/UnwrappedLineParser.cpp
	index 60e65aaa83e9..7813d86ff0ea 100644
	--- a/contrib/llvm-project/clang/lib/Format/UnwrappedLineParser.cpp
	+++ b/contrib/llvm-project/clang/lib/Format/UnwrappedLineParser.cpp
	@@ -1,5045 +1,5047 @@
	//===--- UnwrappedLineParser.cpp - Format C++ code ------------------------===//
	//
	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
	// See https://llvm.org/LICENSE.txt for license information.
	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
	//
	//===----------------------------------------------------------------------===//
	///
	/// \file
	/// This file contains the implementation of the UnwrappedLineParser,
	/// which turns a stream of tokens into UnwrappedLines.
	///
	//===----------------------------------------------------------------------===//

	#include "UnwrappedLineParser.h"
	#include "FormatToken.h"
	#include "FormatTokenLexer.h"
	#include "FormatTokenSource.h"
	#include "Macros.h"
	#include "TokenAnnotator.h"
	#include "clang/Basic/TokenKinds.h"
	#include "llvm/ADT/STLExtras.h"
	#include "llvm/ADT/StringRef.h"
	#include "llvm/Support/Debug.h"
	#include "llvm/Support/raw_os_ostream.h"
	#include "llvm/Support/raw_ostream.h"

	#include <algorithm>
	#include <utility>

	#define DEBUG_TYPE "format-parser"

	namespace clang {
	namespace format {

	namespace {

	void printLine(llvm::raw_ostream &OS, const UnwrappedLine &Line,
	StringRef Prefix = "", bool PrintText = false) {
	OS << Prefix << "Line(" << Line.Level << ", FSC=" << Line.FirstStartColumn
	<< ")" << (Line.InPPDirective ? " MACRO" : "") << ": ";
	bool NewLine = false;
	for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(),
	E = Line.Tokens.end();
	I != E; ++I) {
	if (NewLine) {
	OS << Prefix;
	NewLine = false;
	}
	OS << I->Tok->Tok.getName() << "["
	<< "T=" << (unsigned)I->Tok->getType()
	<< ", OC=" << I->Tok->OriginalColumn << ", \"" << I->Tok->TokenText
	<< "\"] ";
	for (SmallVectorImpl<UnwrappedLine>::const_iterator
	CI = I->Children.begin(),
	CE = I->Children.end();
	CI != CE; ++CI) {
	OS << "\n";
	printLine(OS, *CI, (Prefix + " ").str());
	NewLine = true;
	}
	}
	if (!NewLine)
	OS << "\n";
	}

	LLVM_ATTRIBUTE_UNUSED static void printDebugInfo(const UnwrappedLine &Line) {
	printLine(llvm::dbgs(), Line);
	}

	class ScopedDeclarationState {
	public:
	ScopedDeclarationState(UnwrappedLine &Line, llvm::BitVector &Stack,
	bool MustBeDeclaration)
	: Line(Line), Stack(Stack) {
	Line.MustBeDeclaration = MustBeDeclaration;
	Stack.push_back(MustBeDeclaration);
	}
	~ScopedDeclarationState() {
	Stack.pop_back();
	if (!Stack.empty())
	Line.MustBeDeclaration = Stack.back();
	else
	Line.MustBeDeclaration = true;
	}

	private:
	UnwrappedLine &Line;
	llvm::BitVector &Stack;
	};

	} // end anonymous namespace

	std::ostream &operator<<(std::ostream &Stream, const UnwrappedLine &Line) {
	llvm::raw_os_ostream OS(Stream);
	printLine(OS, Line);
	return Stream;
	}

	class ScopedLineState {
	public:
	ScopedLineState(UnwrappedLineParser &Parser,
	bool SwitchToPreprocessorLines = false)
	: Parser(Parser), OriginalLines(Parser.CurrentLines) {
	if (SwitchToPreprocessorLines)
	Parser.CurrentLines = &Parser.PreprocessorDirectives;
	else if (!Parser.Line->Tokens.empty())
	Parser.CurrentLines = &Parser.Line->Tokens.back().Children;
	PreBlockLine = std::move(Parser.Line);
	Parser.Line = std::make_unique<UnwrappedLine>();
	Parser.Line->Level = PreBlockLine->Level;
	Parser.Line->PPLevel = PreBlockLine->PPLevel;
	Parser.Line->InPPDirective = PreBlockLine->InPPDirective;
	Parser.Line->InMacroBody = PreBlockLine->InMacroBody;
	Parser.Line->UnbracedBodyLevel = PreBlockLine->UnbracedBodyLevel;
	}

	~ScopedLineState() {
	if (!Parser.Line->Tokens.empty())
	Parser.addUnwrappedLine();
	assert(Parser.Line->Tokens.empty());
	Parser.Line = std::move(PreBlockLine);
	if (Parser.CurrentLines == &Parser.PreprocessorDirectives)
	Parser.MustBreakBeforeNextToken = true;
	Parser.CurrentLines = OriginalLines;
	}

	private:
	UnwrappedLineParser &Parser;

	std::unique_ptr<UnwrappedLine> PreBlockLine;
	SmallVectorImpl<UnwrappedLine> *OriginalLines;
	};

	class CompoundStatementIndenter {
	public:
	CompoundStatementIndenter(UnwrappedLineParser *Parser,
	const FormatStyle &Style, unsigned &LineLevel)
	: CompoundStatementIndenter(Parser, LineLevel,
	Style.BraceWrapping.AfterControlStatement,
	Style.BraceWrapping.IndentBraces) {}
	CompoundStatementIndenter(UnwrappedLineParser *Parser, unsigned &LineLevel,
	bool WrapBrace, bool IndentBrace)
	: LineLevel(LineLevel), OldLineLevel(LineLevel) {
	if (WrapBrace)
	Parser->addUnwrappedLine();
	if (IndentBrace)
	++LineLevel;
	}
	~CompoundStatementIndenter() { LineLevel = OldLineLevel; }

	private:
	unsigned &LineLevel;
	unsigned OldLineLevel;
	};

	UnwrappedLineParser::UnwrappedLineParser(
	SourceManager &SourceMgr, const FormatStyle &Style,
	const AdditionalKeywords &Keywords, unsigned FirstStartColumn,
	ArrayRef<FormatToken *> Tokens, UnwrappedLineConsumer &Callback,
	llvm::SpecificBumpPtrAllocator<FormatToken> &Allocator,
	IdentifierTable &IdentTable)
	: Line(new UnwrappedLine), MustBreakBeforeNextToken(false),
	CurrentLines(&Lines), Style(Style), IsCpp(Style.isCpp()),
	LangOpts(getFormattingLangOpts(Style)), Keywords(Keywords),
	CommentPragmasRegex(Style.CommentPragmas), Tokens(nullptr),
	Callback(Callback), AllTokens(Tokens), PPBranchLevel(-1),
	IncludeGuard(Style.IndentPPDirectives == FormatStyle::PPDIS_None
	? IG_Rejected
	: IG_Inited),
	IncludeGuardToken(nullptr), FirstStartColumn(FirstStartColumn),
	Macros(Style.Macros, SourceMgr, Style, Allocator, IdentTable) {
	assert(IsCpp == LangOpts.CXXOperatorNames);
	}

	void UnwrappedLineParser::reset() {
	PPBranchLevel = -1;
	IncludeGuard = Style.IndentPPDirectives == FormatStyle::PPDIS_None
	? IG_Rejected
	: IG_Inited;
	IncludeGuardToken = nullptr;
	Line.reset(new UnwrappedLine);
	CommentsBeforeNextToken.clear();
	FormatTok = nullptr;
	MustBreakBeforeNextToken = false;
	IsDecltypeAutoFunction = false;
	PreprocessorDirectives.clear();
	CurrentLines = &Lines;
	DeclarationScopeStack.clear();
	NestedTooDeep.clear();
	NestedLambdas.clear();
	PPStack.clear();
	Line->FirstStartColumn = FirstStartColumn;

	if (!Unexpanded.empty())
	for (FormatToken *Token : AllTokens)
	Token->MacroCtx.reset();
	CurrentExpandedLines.clear();
	ExpandedLines.clear();
	Unexpanded.clear();
	InExpansion = false;
	Reconstruct.reset();
	}

	void UnwrappedLineParser::parse() {
	IndexedTokenSource TokenSource(AllTokens);
	Line->FirstStartColumn = FirstStartColumn;
	do {
	LLVM_DEBUG(llvm::dbgs() << "----\n");
	reset();
	Tokens = &TokenSource;
	TokenSource.reset();

	readToken();
	parseFile();

	// If we found an include guard then all preprocessor directives (other than
	// the guard) are over-indented by one.
	if (IncludeGuard == IG_Found) {
	for (auto &Line : Lines)
	if (Line.InPPDirective && Line.Level > 0)
	--Line.Level;
	}

	// Create line with eof token.
	assert(eof());
	pushToken(FormatTok);
	addUnwrappedLine();

	// In a first run, format everything with the lines containing macro calls
	// replaced by the expansion.
	if (!ExpandedLines.empty()) {
	LLVM_DEBUG(llvm::dbgs() << "Expanded lines:\n");
	for (const auto &Line : Lines) {
	if (!Line.Tokens.empty()) {
	auto it = ExpandedLines.find(Line.Tokens.begin()->Tok);
	if (it != ExpandedLines.end()) {
	for (const auto &Expanded : it->second) {
	LLVM_DEBUG(printDebugInfo(Expanded));
	Callback.consumeUnwrappedLine(Expanded);
	}
	continue;
	}
	}
	LLVM_DEBUG(printDebugInfo(Line));
	Callback.consumeUnwrappedLine(Line);
	}
	Callback.finishRun();
	}

	LLVM_DEBUG(llvm::dbgs() << "Unwrapped lines:\n");
	for (const UnwrappedLine &Line : Lines) {
	LLVM_DEBUG(printDebugInfo(Line));
	Callback.consumeUnwrappedLine(Line);
	}
	Callback.finishRun();
	Lines.clear();
	while (!PPLevelBranchIndex.empty() &&
	PPLevelBranchIndex.back() + 1 >= PPLevelBranchCount.back()) {
	PPLevelBranchIndex.resize(PPLevelBranchIndex.size() - 1);
	PPLevelBranchCount.resize(PPLevelBranchCount.size() - 1);
	}
	if (!PPLevelBranchIndex.empty()) {
	++PPLevelBranchIndex.back();
	assert(PPLevelBranchIndex.size() == PPLevelBranchCount.size());
	assert(PPLevelBranchIndex.back() <= PPLevelBranchCount.back());
	}
	} while (!PPLevelBranchIndex.empty());
	}

	void UnwrappedLineParser::parseFile() {
	// The top-level context in a file always has declarations, except for pre-
	// processor directives and JavaScript files.
	bool MustBeDeclaration = !Line->InPPDirective && !Style.isJavaScript();
	ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
	MustBeDeclaration);
	if (Style.Language == FormatStyle::LK_TextProto)
	parseBracedList();
	else
	parseLevel();
	// Make sure to format the remaining tokens.
	//
	// LK_TextProto is special since its top-level is parsed as the body of a
	// braced list, which does not necessarily have natural line separators such
	// as a semicolon. Comments after the last entry that have been determined to
	// not belong to that line, as in:
	// key: value
	// // endfile comment
	// do not have a chance to be put on a line of their own until this point.
	// Here we add this newline before end-of-file comments.
	if (Style.Language == FormatStyle::LK_TextProto &&
	!CommentsBeforeNextToken.empty()) {
	addUnwrappedLine();
	}
	flushComments(true);
	addUnwrappedLine();
	}

	void UnwrappedLineParser::parseCSharpGenericTypeConstraint() {
	do {
	switch (FormatTok->Tok.getKind()) {
	case tok::l_brace:
	return;
	default:
	if (FormatTok->is(Keywords.kw_where)) {
	addUnwrappedLine();
	nextToken();
	parseCSharpGenericTypeConstraint();
	break;
	}
	nextToken();
	break;
	}
	} while (!eof());
	}

	void UnwrappedLineParser::parseCSharpAttribute() {
	int UnpairedSquareBrackets = 1;
	do {
	switch (FormatTok->Tok.getKind()) {
	case tok::r_square:
	nextToken();
	--UnpairedSquareBrackets;
	if (UnpairedSquareBrackets == 0) {
	addUnwrappedLine();
	return;
	}
	break;
	case tok::l_square:
	++UnpairedSquareBrackets;
	nextToken();
	break;
	default:
	nextToken();
	break;
	}
	} while (!eof());
	}

	bool UnwrappedLineParser::precededByCommentOrPPDirective() const {
	if (!Lines.empty() && Lines.back().InPPDirective)
	return true;

	const FormatToken *Previous = Tokens->getPreviousToken();
	return Previous && Previous->is(tok::comment) &&
	(Previous->IsMultiline \|\| Previous->NewlinesBefore > 0);
	}

	/// \brief Parses a level, that is ???.
	/// \param OpeningBrace Opening brace (\p nullptr if absent) of that level.
	/// \param IfKind The \p if statement kind in the level.
	/// \param IfLeftBrace The left brace of the \p if block in the level.
	/// \returns true if a simple block of if/else/for/while, or false otherwise.
	/// (A simple block has a single statement.)
	bool UnwrappedLineParser::parseLevel(const FormatToken *OpeningBrace,
	IfStmtKind *IfKind,
	FormatToken **IfLeftBrace) {
	const bool InRequiresExpression =
	OpeningBrace && OpeningBrace->is(TT_RequiresExpressionLBrace);
	const bool IsPrecededByCommentOrPPDirective =
	!Style.RemoveBracesLLVM \|\| precededByCommentOrPPDirective();
	FormatToken *IfLBrace = nullptr;
	bool HasDoWhile = false;
	bool HasLabel = false;
	unsigned StatementCount = 0;
	bool SwitchLabelEncountered = false;

	do {
	if (FormatTok->isAttribute()) {
	nextToken();
	if (FormatTok->is(tok::l_paren))
	parseParens();
	continue;
	}
	tok::TokenKind Kind = FormatTok->Tok.getKind();
	if (FormatTok->is(TT_MacroBlockBegin))
	Kind = tok::l_brace;
	else if (FormatTok->is(TT_MacroBlockEnd))
	Kind = tok::r_brace;

	auto ParseDefault = [this, OpeningBrace, IfKind, &IfLBrace, &HasDoWhile,
	&HasLabel, &StatementCount] {
	parseStructuralElement(OpeningBrace, IfKind, &IfLBrace,
	HasDoWhile ? nullptr : &HasDoWhile,
	HasLabel ? nullptr : &HasLabel);
	++StatementCount;
	assert(StatementCount > 0 && "StatementCount overflow!");
	};

	switch (Kind) {
	case tok::comment:
	nextToken();
	addUnwrappedLine();
	break;
	case tok::l_brace:
	if (InRequiresExpression) {
	FormatTok->setFinalizedType(TT_RequiresExpressionLBrace);
	} else if (FormatTok->Previous &&
	FormatTok->Previous->ClosesRequiresClause) {
	// We need the 'default' case here to correctly parse a function
	// l_brace.
	ParseDefault();
	continue;
	}
	if (!InRequiresExpression && FormatTok->isNot(TT_MacroBlockBegin)) {
	if (tryToParseBracedList())
	continue;
	FormatTok->setFinalizedType(TT_BlockLBrace);
	}
	parseBlock();
	++StatementCount;
	assert(StatementCount > 0 && "StatementCount overflow!");
	addUnwrappedLine();
	break;
	case tok::r_brace:
	if (OpeningBrace) {
	if (!Style.RemoveBracesLLVM \|\| Line->InPPDirective \|\|
	!OpeningBrace->isOneOf(TT_ControlStatementLBrace, TT_ElseLBrace)) {
	return false;
	}
	if (FormatTok->isNot(tok::r_brace) \|\| StatementCount != 1 \|\| HasLabel \|\|
	HasDoWhile \|\| IsPrecededByCommentOrPPDirective \|\|
	precededByCommentOrPPDirective()) {
	return false;
	}
	const FormatToken *Next = Tokens->peekNextToken();
	if (Next->is(tok::comment) && Next->NewlinesBefore == 0)
	return false;
	if (IfLeftBrace)
	*IfLeftBrace = IfLBrace;
	return true;
	}
	nextToken();
	addUnwrappedLine();
	break;
	case tok::kw_default: {
	unsigned StoredPosition = Tokens->getPosition();
	auto *Next = Tokens->getNextNonComment();
	FormatTok = Tokens->setPosition(StoredPosition);
	if (!Next->isOneOf(tok::colon, tok::arrow)) {
	// default not followed by `:` or `->` is not a case label; treat it
	// like an identifier.
	parseStructuralElement();
	break;
	}
	// Else, if it is 'default:', fall through to the case handling.
	[[fallthrough]];
	}
	case tok::kw_case:
	if (Style.Language == FormatStyle::LK_Proto \|\| Style.isVerilog() \|\|
	(Style.isJavaScript() && Line->MustBeDeclaration)) {
	// Proto: there are no switch/case statements
	// Verilog: Case labels don't have this word. We handle case
	// labels including default in TokenAnnotator.
	// JavaScript: A 'case: string' style field declaration.
	ParseDefault();
	break;
	}
	if (!SwitchLabelEncountered &&
	(Style.IndentCaseLabels \|\|
	(OpeningBrace && OpeningBrace->is(TT_SwitchExpressionLBrace)) \|\|
	(Line->InPPDirective && Line->Level == 1))) {
	++Line->Level;
	}
	SwitchLabelEncountered = true;
	parseStructuralElement();
	break;
	case tok::l_square:
	if (Style.isCSharp()) {
	nextToken();
	parseCSharpAttribute();
	break;
	}
	if (handleCppAttributes())
	break;
	[[fallthrough]];
	default:
	ParseDefault();
	break;
	}
	} while (!eof());

	return false;
	}

	void UnwrappedLineParser::calculateBraceTypes(bool ExpectClassBody) {
	// We'll parse forward through the tokens until we hit
	// a closing brace or eof - note that getNextToken() will
	// parse macros, so this will magically work inside macro
	// definitions, too.
	unsigned StoredPosition = Tokens->getPosition();
	FormatToken *Tok = FormatTok;
	const FormatToken *PrevTok = Tok->Previous;
	// Keep a stack of positions of lbrace tokens. We will
	// update information about whether an lbrace starts a
	// braced init list or a different block during the loop.
	struct StackEntry {
	FormatToken *Tok;
	const FormatToken *PrevTok;
	};
	SmallVector<StackEntry, 8> LBraceStack;
	assert(Tok->is(tok::l_brace));

	do {
	auto *NextTok = Tokens->getNextNonComment();

	if (!Line->InMacroBody && !Style.isTableGen()) {
	// Skip PPDirective lines and comments.
	while (NextTok->is(tok::hash)) {
	NextTok = Tokens->getNextToken();
	if (NextTok->is(tok::pp_not_keyword))
	break;
	do {
	NextTok = Tokens->getNextToken();
	} while (NextTok->NewlinesBefore == 0 && NextTok->isNot(tok::eof));

	while (NextTok->is(tok::comment))
	NextTok = Tokens->getNextToken();
	}
	}

	switch (Tok->Tok.getKind()) {
	case tok::l_brace:
	if (Style.isJavaScript() && PrevTok) {
	if (PrevTok->isOneOf(tok::colon, tok::less)) {
	// A ':' indicates this code is in a type, or a braced list
	// following a label in an object literal ({a: {b: 1}}).
	// A '<' could be an object used in a comparison, but that is nonsense
	// code (can never return true), so more likely it is a generic type
	// argument (`X<{a: string; b: number}>`).
	// The code below could be confused by semicolons between the
	// individual members in a type member list, which would normally
	// trigger BK_Block. In both cases, this must be parsed as an inline
	// braced init.
	Tok->setBlockKind(BK_BracedInit);
	} else if (PrevTok->is(tok::r_paren)) {
	// `) { }` can only occur in function or method declarations in JS.
	Tok->setBlockKind(BK_Block);
	}
	} else {
	Tok->setBlockKind(BK_Unknown);
	}
	LBraceStack.push_back({Tok, PrevTok});
	break;
	case tok::r_brace:
	if (LBraceStack.empty())
	break;
	if (auto *LBrace = LBraceStack.back().Tok; LBrace->is(BK_Unknown)) {
	bool ProbablyBracedList = false;
	if (Style.Language == FormatStyle::LK_Proto) {
	ProbablyBracedList = NextTok->isOneOf(tok::comma, tok::r_square);
	} else if (LBrace->isNot(TT_EnumLBrace)) {
	// Using OriginalColumn to distinguish between ObjC methods and
	// binary operators is a bit hacky.
	bool NextIsObjCMethod = NextTok->isOneOf(tok::plus, tok::minus) &&
	NextTok->OriginalColumn == 0;

	// Try to detect a braced list. Note that regardless how we mark inner
	// braces here, we will overwrite the BlockKind later if we parse a
	// braced list (where all blocks inside are by default braced lists),
	// or when we explicitly detect blocks (for example while parsing
	// lambdas).

	// If we already marked the opening brace as braced list, the closing
	// must also be part of it.
	ProbablyBracedList = LBrace->is(TT_BracedListLBrace);

	ProbablyBracedList = ProbablyBracedList \|\|
	(Style.isJavaScript() &&
	NextTok->isOneOf(Keywords.kw_of, Keywords.kw_in,
	Keywords.kw_as));
	ProbablyBracedList =
	- ProbablyBracedList \|\| (IsCpp && NextTok->is(tok::l_paren));
	+ ProbablyBracedList \|\| (IsCpp && (PrevTok->Tok.isLiteral() \|\|
	+ NextTok->is(tok::l_paren)));

	// If there is a comma, semicolon or right paren after the closing
	// brace, we assume this is a braced initializer list.
	// FIXME: Some of these do not apply to JS, e.g. "} {" can never be a
	// braced list in JS.
	ProbablyBracedList =
	ProbablyBracedList \|\|
	NextTok->isOneOf(tok::comma, tok::period, tok::colon,
	tok::r_paren, tok::r_square, tok::ellipsis);

	// Distinguish between braced list in a constructor initializer list
	// followed by constructor body, or just adjacent blocks.
	ProbablyBracedList =
	ProbablyBracedList \|\|
	(NextTok->is(tok::l_brace) && LBraceStack.back().PrevTok &&
	LBraceStack.back().PrevTok->isOneOf(tok::identifier,
	tok::greater));

	ProbablyBracedList =
	ProbablyBracedList \|\|
	(NextTok->is(tok::identifier) &&
	!PrevTok->isOneOf(tok::semi, tok::r_brace, tok::l_brace));

	ProbablyBracedList = ProbablyBracedList \|\|
	(NextTok->is(tok::semi) &&
	(!ExpectClassBody \|\| LBraceStack.size() != 1));

	ProbablyBracedList =
	ProbablyBracedList \|\|
	(NextTok->isBinaryOperator() && !NextIsObjCMethod);

	if (!Style.isCSharp() && NextTok->is(tok::l_square)) {
	// We can have an array subscript after a braced init
	// list, but C++11 attributes are expected after blocks.
	NextTok = Tokens->getNextToken();
	ProbablyBracedList = NextTok->isNot(tok::l_square);
	}

	- // Cpp macro definition body containing nonempty braced list or block:
	+ // Cpp macro definition body that is a nonempty braced list or block:
	if (IsCpp && Line->InMacroBody && PrevTok != FormatTok &&
	+ !FormatTok->Previous && NextTok->is(tok::eof) &&
	// A statement can end with only `;` (simple statement), a block
	// closing brace (compound statement), or `:` (label statement).
	// If PrevTok is a block opening brace, Tok ends an empty block.
	!PrevTok->isOneOf(tok::semi, BK_Block, tok::colon)) {
	ProbablyBracedList = true;
	}
	}
	const auto BlockKind = ProbablyBracedList ? BK_BracedInit : BK_Block;
	Tok->setBlockKind(BlockKind);
	LBrace->setBlockKind(BlockKind);
	}
	LBraceStack.pop_back();
	break;
	case tok::identifier:
	if (Tok->isNot(TT_StatementMacro))
	break;
	[[fallthrough]];
	case tok::at:
	case tok::semi:
	case tok::kw_if:
	case tok::kw_while:
	case tok::kw_for:
	case tok::kw_switch:
	case tok::kw_try:
	case tok::kw___try:
	if (!LBraceStack.empty() && LBraceStack.back().Tok->is(BK_Unknown))
	LBraceStack.back().Tok->setBlockKind(BK_Block);
	break;
	default:
	break;
	}

	PrevTok = Tok;
	Tok = NextTok;
	} while (Tok->isNot(tok::eof) && !LBraceStack.empty());

	// Assume other blocks for all unclosed opening braces.
	for (const auto &Entry : LBraceStack)
	if (Entry.Tok->is(BK_Unknown))
	Entry.Tok->setBlockKind(BK_Block);

	FormatTok = Tokens->setPosition(StoredPosition);
	}

	// Sets the token type of the directly previous right brace.
	void UnwrappedLineParser::setPreviousRBraceType(TokenType Type) {
	if (auto Prev = FormatTok->getPreviousNonComment();
	Prev && Prev->is(tok::r_brace)) {
	Prev->setFinalizedType(Type);
	}
	}

	template <class T>
	static inline void hash_combine(std::size_t &seed, const T &v) {
	std::hash<T> hasher;
	seed ^= hasher(v) + 0x9e3779b9 + (seed << 6) + (seed >> 2);
	}

	size_t UnwrappedLineParser::computePPHash() const {
	size_t h = 0;
	for (const auto &i : PPStack) {
	hash_combine(h, size_t(i.Kind));
	hash_combine(h, i.Line);
	}
	return h;
	}

	// Checks whether \p ParsedLine might fit on a single line. If \p OpeningBrace
	// is not null, subtracts its length (plus the preceding space) when computing
	// the length of \p ParsedLine. We must clone the tokens of \p ParsedLine before
	// running the token annotator on it so that we can restore them afterward.
	bool UnwrappedLineParser::mightFitOnOneLine(
	UnwrappedLine &ParsedLine, const FormatToken *OpeningBrace) const {
	const auto ColumnLimit = Style.ColumnLimit;
	if (ColumnLimit == 0)
	return true;

	auto &Tokens = ParsedLine.Tokens;
	assert(!Tokens.empty());

	const auto *LastToken = Tokens.back().Tok;
	assert(LastToken);

	SmallVector<UnwrappedLineNode> SavedTokens(Tokens.size());

	int Index = 0;
	for (const auto &Token : Tokens) {
	assert(Token.Tok);
	auto &SavedToken = SavedTokens[Index++];
	SavedToken.Tok = new FormatToken;
	SavedToken.Tok->copyFrom(*Token.Tok);
	SavedToken.Children = std::move(Token.Children);
	}

	AnnotatedLine Line(ParsedLine);
	assert(Line.Last == LastToken);

	TokenAnnotator Annotator(Style, Keywords);
	Annotator.annotate(Line);
	Annotator.calculateFormattingInformation(Line);

	auto Length = LastToken->TotalLength;
	if (OpeningBrace) {
	assert(OpeningBrace != Tokens.front().Tok);
	if (auto Prev = OpeningBrace->Previous;
	Prev && Prev->TotalLength + ColumnLimit == OpeningBrace->TotalLength) {
	Length -= ColumnLimit;
	}
	Length -= OpeningBrace->TokenText.size() + 1;
	}

	if (const auto *FirstToken = Line.First; FirstToken->is(tok::r_brace)) {
	assert(!OpeningBrace \|\| OpeningBrace->is(TT_ControlStatementLBrace));
	Length -= FirstToken->TokenText.size() + 1;
	}

	Index = 0;
	for (auto &Token : Tokens) {
	const auto &SavedToken = SavedTokens[Index++];
	Token.Tok->copyFrom(*SavedToken.Tok);
	Token.Children = std::move(SavedToken.Children);
	delete SavedToken.Tok;
	}

	// If these change PPLevel needs to be used for get correct indentation.
	assert(!Line.InMacroBody);
	assert(!Line.InPPDirective);
	return Line.Level * Style.IndentWidth + Length <= ColumnLimit;
	}

	FormatToken *UnwrappedLineParser::parseBlock(bool MustBeDeclaration,
	unsigned AddLevels, bool MunchSemi,
	bool KeepBraces,
	IfStmtKind *IfKind,
	bool UnindentWhitesmithsBraces) {
	auto HandleVerilogBlockLabel = [this]() {
	// ":" name
	if (Style.isVerilog() && FormatTok->is(tok::colon)) {
	nextToken();
	if (Keywords.isVerilogIdentifier(*FormatTok))
	nextToken();
	}
	};

	// Whether this is a Verilog-specific block that has a special header like a
	// module.
	const bool VerilogHierarchy =
	Style.isVerilog() && Keywords.isVerilogHierarchy(*FormatTok);
	assert((FormatTok->isOneOf(tok::l_brace, TT_MacroBlockBegin) \|\|
	(Style.isVerilog() &&
	(Keywords.isVerilogBegin(*FormatTok) \|\| VerilogHierarchy))) &&
	"'{' or macro block token expected");
	FormatToken *Tok = FormatTok;
	const bool FollowedByComment = Tokens->peekNextToken()->is(tok::comment);
	auto Index = CurrentLines->size();
	const bool MacroBlock = FormatTok->is(TT_MacroBlockBegin);
	FormatTok->setBlockKind(BK_Block);

	// For Whitesmiths mode, jump to the next level prior to skipping over the
	// braces.
	if (!VerilogHierarchy && AddLevels > 0 &&
	Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths) {
	++Line->Level;
	}

	size_t PPStartHash = computePPHash();

	const unsigned InitialLevel = Line->Level;
	if (VerilogHierarchy) {
	AddLevels += parseVerilogHierarchyHeader();
	} else {
	nextToken(/LevelDifference=/AddLevels);
	HandleVerilogBlockLabel();
	}

	// Bail out if there are too many levels. Otherwise, the stack might overflow.
	if (Line->Level > 300)
	return nullptr;

	if (MacroBlock && FormatTok->is(tok::l_paren))
	parseParens();

	size_t NbPreprocessorDirectives =
	!parsingPPDirective() ? PreprocessorDirectives.size() : 0;
	addUnwrappedLine();
	size_t OpeningLineIndex =
	CurrentLines->empty()
	? (UnwrappedLine::kInvalidIndex)
	: (CurrentLines->size() - 1 - NbPreprocessorDirectives);

	// Whitesmiths is weird here. The brace needs to be indented for the namespace
	// block, but the block itself may not be indented depending on the style
	// settings. This allows the format to back up one level in those cases.
	if (UnindentWhitesmithsBraces)
	--Line->Level;

	ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
	MustBeDeclaration);
	if (AddLevels > 0u && Style.BreakBeforeBraces != FormatStyle::BS_Whitesmiths)
	Line->Level += AddLevels;

	FormatToken *IfLBrace = nullptr;
	const bool SimpleBlock = parseLevel(Tok, IfKind, &IfLBrace);

	if (eof())
	return IfLBrace;

	if (MacroBlock ? FormatTok->isNot(TT_MacroBlockEnd)
	: FormatTok->isNot(tok::r_brace)) {
	Line->Level = InitialLevel;
	FormatTok->setBlockKind(BK_Block);
	return IfLBrace;
	}

	if (FormatTok->is(tok::r_brace)) {
	FormatTok->setBlockKind(BK_Block);
	if (Tok->is(TT_NamespaceLBrace))
	FormatTok->setFinalizedType(TT_NamespaceRBrace);
	}

	const bool IsFunctionRBrace =
	FormatTok->is(tok::r_brace) && Tok->is(TT_FunctionLBrace);

	auto RemoveBraces = [=]() mutable {
	if (!SimpleBlock)
	return false;
	assert(Tok->isOneOf(TT_ControlStatementLBrace, TT_ElseLBrace));
	assert(FormatTok->is(tok::r_brace));
	const bool WrappedOpeningBrace = !Tok->Previous;
	if (WrappedOpeningBrace && FollowedByComment)
	return false;
	const bool HasRequiredIfBraces = IfLBrace && !IfLBrace->Optional;
	if (KeepBraces && !HasRequiredIfBraces)
	return false;
	if (Tok->isNot(TT_ElseLBrace) \|\| !HasRequiredIfBraces) {
	const FormatToken *Previous = Tokens->getPreviousToken();
	assert(Previous);
	if (Previous->is(tok::r_brace) && !Previous->Optional)
	return false;
	}
	assert(!CurrentLines->empty());
	auto &LastLine = CurrentLines->back();
	if (LastLine.Level == InitialLevel + 1 && !mightFitOnOneLine(LastLine))
	return false;
	if (Tok->is(TT_ElseLBrace))
	return true;
	if (WrappedOpeningBrace) {
	assert(Index > 0);
	--Index; // The line above the wrapped l_brace.
	Tok = nullptr;
	}
	return mightFitOnOneLine((*CurrentLines)[Index], Tok);
	};
	if (RemoveBraces()) {
	Tok->MatchingParen = FormatTok;
	FormatTok->MatchingParen = Tok;
	}

	size_t PPEndHash = computePPHash();

	// Munch the closing brace.
	nextToken(/LevelDifference=/-AddLevels);

	// When this is a function block and there is an unnecessary semicolon
	// afterwards then mark it as optional (so the RemoveSemi pass can get rid of
	// it later).
	if (Style.RemoveSemicolon && IsFunctionRBrace) {
	while (FormatTok->is(tok::semi)) {
	FormatTok->Optional = true;
	nextToken();
	}
	}

	HandleVerilogBlockLabel();

	if (MacroBlock && FormatTok->is(tok::l_paren))
	parseParens();

	Line->Level = InitialLevel;

	if (FormatTok->is(tok::kw_noexcept)) {
	// A noexcept in a requires expression.
	nextToken();
	}

	if (FormatTok->is(tok::arrow)) {
	// Following the } or noexcept we can find a trailing return type arrow
	// as part of an implicit conversion constraint.
	nextToken();
	parseStructuralElement();
	}

	if (MunchSemi && FormatTok->is(tok::semi))
	nextToken();

	if (PPStartHash == PPEndHash) {
	Line->MatchingOpeningBlockLineIndex = OpeningLineIndex;
	if (OpeningLineIndex != UnwrappedLine::kInvalidIndex) {
	// Update the opening line to add the forward reference as well
	(*CurrentLines)[OpeningLineIndex].MatchingClosingBlockLineIndex =
	CurrentLines->size() - 1;
	}
	}

	return IfLBrace;
	}

	static bool isGoogScope(const UnwrappedLine &Line) {
	// FIXME: Closure-library specific stuff should not be hard-coded but be
	// configurable.
	if (Line.Tokens.size() < 4)
	return false;
	auto I = Line.Tokens.begin();
	if (I->Tok->TokenText != "goog")
	return false;
	++I;
	if (I->Tok->isNot(tok::period))
	return false;
	++I;
	if (I->Tok->TokenText != "scope")
	return false;
	++I;
	return I->Tok->is(tok::l_paren);
	}

	static bool isIIFE(const UnwrappedLine &Line,
	const AdditionalKeywords &Keywords) {
	// Look for the start of an immediately invoked anonymous function.
	// https://en.wikipedia.org/wiki/Immediately-invoked_function_expression
	// This is commonly done in JavaScript to create a new, anonymous scope.
	// Example: (function() { ... })()
	if (Line.Tokens.size() < 3)
	return false;
	auto I = Line.Tokens.begin();
	if (I->Tok->isNot(tok::l_paren))
	return false;
	++I;
	if (I->Tok->isNot(Keywords.kw_function))
	return false;
	++I;
	return I->Tok->is(tok::l_paren);
	}

	static bool ShouldBreakBeforeBrace(const FormatStyle &Style,
	const FormatToken &InitialToken) {
	tok::TokenKind Kind = InitialToken.Tok.getKind();
	if (InitialToken.is(TT_NamespaceMacro))
	Kind = tok::kw_namespace;

	switch (Kind) {
	case tok::kw_namespace:
	return Style.BraceWrapping.AfterNamespace;
	case tok::kw_class:
	return Style.BraceWrapping.AfterClass;
	case tok::kw_union:
	return Style.BraceWrapping.AfterUnion;
	case tok::kw_struct:
	return Style.BraceWrapping.AfterStruct;
	case tok::kw_enum:
	return Style.BraceWrapping.AfterEnum;
	default:
	return false;
	}
	}

	void UnwrappedLineParser::parseChildBlock() {
	assert(FormatTok->is(tok::l_brace));
	FormatTok->setBlockKind(BK_Block);
	const FormatToken *OpeningBrace = FormatTok;
	nextToken();
	{
	bool SkipIndent = (Style.isJavaScript() &&
	(isGoogScope(Line) \|\| isIIFE(Line, Keywords)));
	ScopedLineState LineState(*this);
	ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
	/MustBeDeclaration=/false);
	Line->Level += SkipIndent ? 0 : 1;
	parseLevel(OpeningBrace);
	flushComments(isOnNewLine(*FormatTok));
	Line->Level -= SkipIndent ? 0 : 1;
	}
	nextToken();
	}

	void UnwrappedLineParser::parsePPDirective() {
	assert(FormatTok->is(tok::hash) && "'#' expected");
	ScopedMacroState MacroState(*Line, Tokens, FormatTok);

	nextToken();

	if (!FormatTok->Tok.getIdentifierInfo()) {
	parsePPUnknown();
	return;
	}

	switch (FormatTok->Tok.getIdentifierInfo()->getPPKeywordID()) {
	case tok::pp_define:
	parsePPDefine();
	return;
	case tok::pp_if:
	parsePPIf(/IfDef=/false);
	break;
	case tok::pp_ifdef:
	case tok::pp_ifndef:
	parsePPIf(/IfDef=/true);
	break;
	case tok::pp_else:
	case tok::pp_elifdef:
	case tok::pp_elifndef:
	case tok::pp_elif:
	parsePPElse();
	break;
	case tok::pp_endif:
	parsePPEndIf();
	break;
	case tok::pp_pragma:
	parsePPPragma();
	break;
	default:
	parsePPUnknown();
	break;
	}
	}

	void UnwrappedLineParser::conditionalCompilationCondition(bool Unreachable) {
	size_t Line = CurrentLines->size();
	if (CurrentLines == &PreprocessorDirectives)
	Line += Lines.size();

	if (Unreachable \|\|
	(!PPStack.empty() && PPStack.back().Kind == PP_Unreachable)) {
	PPStack.push_back({PP_Unreachable, Line});
	} else {
	PPStack.push_back({PP_Conditional, Line});
	}
	}

	void UnwrappedLineParser::conditionalCompilationStart(bool Unreachable) {
	++PPBranchLevel;
	assert(PPBranchLevel >= 0 && PPBranchLevel <= (int)PPLevelBranchIndex.size());
	if (PPBranchLevel == (int)PPLevelBranchIndex.size()) {
	PPLevelBranchIndex.push_back(0);
	PPLevelBranchCount.push_back(0);
	}
	PPChainBranchIndex.push(Unreachable ? -1 : 0);
	bool Skip = PPLevelBranchIndex[PPBranchLevel] > 0;
	conditionalCompilationCondition(Unreachable \|\| Skip);
	}

	void UnwrappedLineParser::conditionalCompilationAlternative() {
	if (!PPStack.empty())
	PPStack.pop_back();
	assert(PPBranchLevel < (int)PPLevelBranchIndex.size());
	if (!PPChainBranchIndex.empty())
	++PPChainBranchIndex.top();
	conditionalCompilationCondition(
	PPBranchLevel >= 0 && !PPChainBranchIndex.empty() &&
	PPLevelBranchIndex[PPBranchLevel] != PPChainBranchIndex.top());
	}

	void UnwrappedLineParser::conditionalCompilationEnd() {
	assert(PPBranchLevel < (int)PPLevelBranchIndex.size());
	if (PPBranchLevel >= 0 && !PPChainBranchIndex.empty()) {
	if (PPChainBranchIndex.top() + 1 > PPLevelBranchCount[PPBranchLevel])
	PPLevelBranchCount[PPBranchLevel] = PPChainBranchIndex.top() + 1;
	}
	// Guard against #endif's without #if.
	if (PPBranchLevel > -1)
	--PPBranchLevel;
	if (!PPChainBranchIndex.empty())
	PPChainBranchIndex.pop();
	if (!PPStack.empty())
	PPStack.pop_back();
	}

	void UnwrappedLineParser::parsePPIf(bool IfDef) {
	bool IfNDef = FormatTok->is(tok::pp_ifndef);
	nextToken();
	bool Unreachable = false;
	if (!IfDef && (FormatTok->is(tok::kw_false) \|\| FormatTok->TokenText == "0"))
	Unreachable = true;
	if (IfDef && !IfNDef && FormatTok->TokenText == "SWIG")
	Unreachable = true;
	conditionalCompilationStart(Unreachable);
	FormatToken *IfCondition = FormatTok;
	// If there's a #ifndef on the first line, and the only lines before it are
	// comments, it could be an include guard.
	bool MaybeIncludeGuard = IfNDef;
	if (IncludeGuard == IG_Inited && MaybeIncludeGuard) {
	for (auto &Line : Lines) {
	if (Line.Tokens.front().Tok->isNot(tok::comment)) {
	MaybeIncludeGuard = false;
	IncludeGuard = IG_Rejected;
	break;
	}
	}
	}
	--PPBranchLevel;
	parsePPUnknown();
	++PPBranchLevel;
	if (IncludeGuard == IG_Inited && MaybeIncludeGuard) {
	IncludeGuard = IG_IfNdefed;
	IncludeGuardToken = IfCondition;
	}
	}

	void UnwrappedLineParser::parsePPElse() {
	// If a potential include guard has an #else, it's not an include guard.
	if (IncludeGuard == IG_Defined && PPBranchLevel == 0)
	IncludeGuard = IG_Rejected;
	// Don't crash when there is an #else without an #if.
	assert(PPBranchLevel >= -1);
	if (PPBranchLevel == -1)
	conditionalCompilationStart(/Unreachable=/true);
	conditionalCompilationAlternative();
	--PPBranchLevel;
	parsePPUnknown();
	++PPBranchLevel;
	}

	void UnwrappedLineParser::parsePPEndIf() {
	conditionalCompilationEnd();
	parsePPUnknown();
	// If the #endif of a potential include guard is the last thing in the file,
	// then we found an include guard.
	if (IncludeGuard == IG_Defined && PPBranchLevel == -1 && Tokens->isEOF() &&
	Style.IndentPPDirectives != FormatStyle::PPDIS_None) {
	IncludeGuard = IG_Found;
	}
	}

	void UnwrappedLineParser::parsePPDefine() {
	nextToken();

	if (!FormatTok->Tok.getIdentifierInfo()) {
	IncludeGuard = IG_Rejected;
	IncludeGuardToken = nullptr;
	parsePPUnknown();
	return;
	}

	if (IncludeGuard == IG_IfNdefed &&
	IncludeGuardToken->TokenText == FormatTok->TokenText) {
	IncludeGuard = IG_Defined;
	IncludeGuardToken = nullptr;
	for (auto &Line : Lines) {
	if (!Line.Tokens.front().Tok->isOneOf(tok::comment, tok::hash)) {
	IncludeGuard = IG_Rejected;
	break;
	}
	}
	}

	// In the context of a define, even keywords should be treated as normal
	// identifiers. Setting the kind to identifier is not enough, because we need
	// to treat additional keywords like __except as well, which are already
	// identifiers. Setting the identifier info to null interferes with include
	// guard processing above, and changes preprocessing nesting.
	FormatTok->Tok.setKind(tok::identifier);
	FormatTok->Tok.setIdentifierInfo(Keywords.kw_internal_ident_after_define);
	nextToken();
	if (FormatTok->Tok.getKind() == tok::l_paren &&
	!FormatTok->hasWhitespaceBefore()) {
	parseParens();
	}
	if (Style.IndentPPDirectives != FormatStyle::PPDIS_None)
	Line->Level += PPBranchLevel + 1;
	addUnwrappedLine();
	++Line->Level;

	Line->PPLevel = PPBranchLevel + (IncludeGuard == IG_Defined ? 0 : 1);
	assert((int)Line->PPLevel >= 0);
	Line->InMacroBody = true;

	if (Style.SkipMacroDefinitionBody) {
	while (!eof()) {
	FormatTok->Finalized = true;
	FormatTok = Tokens->getNextToken();
	}
	addUnwrappedLine();
	return;
	}

	// Errors during a preprocessor directive can only affect the layout of the
	// preprocessor directive, and thus we ignore them. An alternative approach
	// would be to use the same approach we use on the file level (no
	// re-indentation if there was a structural error) within the macro
	// definition.
	parseFile();
	}

	void UnwrappedLineParser::parsePPPragma() {
	Line->InPragmaDirective = true;
	parsePPUnknown();
	}

	void UnwrappedLineParser::parsePPUnknown() {
	do {
	nextToken();
	} while (!eof());
	if (Style.IndentPPDirectives != FormatStyle::PPDIS_None)
	Line->Level += PPBranchLevel + 1;
	addUnwrappedLine();
	}

	// Here we exclude certain tokens that are not usually the first token in an
	// unwrapped line. This is used in attempt to distinguish macro calls without
	// trailing semicolons from other constructs split to several lines.
	static bool tokenCanStartNewLine(const FormatToken &Tok) {
	// Semicolon can be a null-statement, l_square can be a start of a macro or
	// a C++11 attribute, but this doesn't seem to be common.
	return !Tok.isOneOf(tok::semi, tok::l_brace,
	// Tokens that can only be used as binary operators and a
	// part of overloaded operator names.
	tok::period, tok::periodstar, tok::arrow, tok::arrowstar,
	tok::less, tok::greater, tok::slash, tok::percent,
	tok::lessless, tok::greatergreater, tok::equal,
	tok::plusequal, tok::minusequal, tok::starequal,
	tok::slashequal, tok::percentequal, tok::ampequal,
	tok::pipeequal, tok::caretequal, tok::greatergreaterequal,
	tok::lesslessequal,
	// Colon is used in labels, base class lists, initializer
	// lists, range-based for loops, ternary operator, but
	// should never be the first token in an unwrapped line.
	tok::colon,
	// 'noexcept' is a trailing annotation.
	tok::kw_noexcept);
	}

	static bool mustBeJSIdent(const AdditionalKeywords &Keywords,
	const FormatToken *FormatTok) {
	// FIXME: This returns true for C/C++ keywords like 'struct'.
	return FormatTok->is(tok::identifier) &&
	(!FormatTok->Tok.getIdentifierInfo() \|\|
	!FormatTok->isOneOf(
	Keywords.kw_in, Keywords.kw_of, Keywords.kw_as, Keywords.kw_async,
	Keywords.kw_await, Keywords.kw_yield, Keywords.kw_finally,
	Keywords.kw_function, Keywords.kw_import, Keywords.kw_is,
	Keywords.kw_let, Keywords.kw_var, tok::kw_const,
	Keywords.kw_abstract, Keywords.kw_extends, Keywords.kw_implements,
	Keywords.kw_instanceof, Keywords.kw_interface,
	Keywords.kw_override, Keywords.kw_throws, Keywords.kw_from));
	}

	static bool mustBeJSIdentOrValue(const AdditionalKeywords &Keywords,
	const FormatToken *FormatTok) {
	return FormatTok->Tok.isLiteral() \|\|
	FormatTok->isOneOf(tok::kw_true, tok::kw_false) \|\|
	mustBeJSIdent(Keywords, FormatTok);
	}

	// isJSDeclOrStmt returns true if \|FormatTok\| starts a declaration or statement
	// when encountered after a value (see mustBeJSIdentOrValue).
	static bool isJSDeclOrStmt(const AdditionalKeywords &Keywords,
	const FormatToken *FormatTok) {
	return FormatTok->isOneOf(
	tok::kw_return, Keywords.kw_yield,
	// conditionals
	tok::kw_if, tok::kw_else,
	// loops
	tok::kw_for, tok::kw_while, tok::kw_do, tok::kw_continue, tok::kw_break,
	// switch/case
	tok::kw_switch, tok::kw_case,
	// exceptions
	tok::kw_throw, tok::kw_try, tok::kw_catch, Keywords.kw_finally,
	// declaration
	tok::kw_const, tok::kw_class, Keywords.kw_var, Keywords.kw_let,
	Keywords.kw_async, Keywords.kw_function,
	// import/export
	Keywords.kw_import, tok::kw_export);
	}

	// Checks whether a token is a type in K&R C (aka C78).
	static bool isC78Type(const FormatToken &Tok) {
	return Tok.isOneOf(tok::kw_char, tok::kw_short, tok::kw_int, tok::kw_long,
	tok::kw_unsigned, tok::kw_float, tok::kw_double,
	tok::identifier);
	}

	// This function checks whether a token starts the first parameter declaration
	// in a K&R C (aka C78) function definition, e.g.:
	// int f(a, b)
	// short a, b;
	// {
	// return a + b;
	// }
	static bool isC78ParameterDecl(const FormatToken Tok, const FormatToken Next,
	const FormatToken *FuncName) {
	assert(Tok);
	assert(Next);
	assert(FuncName);

	if (FuncName->isNot(tok::identifier))
	return false;

	const FormatToken *Prev = FuncName->Previous;
	if (!Prev \|\| (Prev->isNot(tok::star) && !isC78Type(*Prev)))
	return false;

	if (!isC78Type(*Tok) &&
	!Tok->isOneOf(tok::kw_register, tok::kw_struct, tok::kw_union)) {
	return false;
	}

	if (Next->isNot(tok::star) && !Next->Tok.getIdentifierInfo())
	return false;

	Tok = Tok->Previous;
	if (!Tok \|\| Tok->isNot(tok::r_paren))
	return false;

	Tok = Tok->Previous;
	if (!Tok \|\| Tok->isNot(tok::identifier))
	return false;

	return Tok->Previous && Tok->Previous->isOneOf(tok::l_paren, tok::comma);
	}

	bool UnwrappedLineParser::parseModuleImport() {
	assert(FormatTok->is(Keywords.kw_import) && "'import' expected");

	if (auto Token = Tokens->peekNextToken(/SkipComment=/true);
	!Token->Tok.getIdentifierInfo() &&
	!Token->isOneOf(tok::colon, tok::less, tok::string_literal)) {
	return false;
	}

	nextToken();
	while (!eof()) {
	if (FormatTok->is(tok::colon)) {
	FormatTok->setFinalizedType(TT_ModulePartitionColon);
	}
	// Handle import <foo/bar.h> as we would an include statement.
	else if (FormatTok->is(tok::less)) {
	nextToken();
	while (!FormatTok->isOneOf(tok::semi, tok::greater, tok::eof)) {
	// Mark tokens up to the trailing line comments as implicit string
	// literals.
	if (FormatTok->isNot(tok::comment) &&
	!FormatTok->TokenText.starts_with("//")) {
	FormatTok->setFinalizedType(TT_ImplicitStringLiteral);
	}
	nextToken();
	}
	}
	if (FormatTok->is(tok::semi)) {
	nextToken();
	break;
	}
	nextToken();
	}

	addUnwrappedLine();
	return true;
	}

	// readTokenWithJavaScriptASI reads the next token and terminates the current
	// line if JavaScript Automatic Semicolon Insertion must
	// happen between the current token and the next token.
	//
	// This method is conservative - it cannot cover all edge cases of JavaScript,
	// but only aims to correctly handle certain well known cases. It must not
	// return true in speculative cases.
	void UnwrappedLineParser::readTokenWithJavaScriptASI() {
	FormatToken *Previous = FormatTok;
	readToken();
	FormatToken *Next = FormatTok;

	bool IsOnSameLine =
	CommentsBeforeNextToken.empty()
	? Next->NewlinesBefore == 0
	: CommentsBeforeNextToken.front()->NewlinesBefore == 0;
	if (IsOnSameLine)
	return;

	bool PreviousMustBeValue = mustBeJSIdentOrValue(Keywords, Previous);
	bool PreviousStartsTemplateExpr =
	Previous->is(TT_TemplateString) && Previous->TokenText.ends_with("${");
	if (PreviousMustBeValue \|\| Previous->is(tok::r_paren)) {
	// If the line contains an '@' sign, the previous token might be an
	// annotation, which can precede another identifier/value.
	bool HasAt = llvm::any_of(Line->Tokens, [](UnwrappedLineNode &LineNode) {
	return LineNode.Tok->is(tok::at);
	});
	if (HasAt)
	return;
	}
	if (Next->is(tok::exclaim) && PreviousMustBeValue)
	return addUnwrappedLine();
	bool NextMustBeValue = mustBeJSIdentOrValue(Keywords, Next);
	bool NextEndsTemplateExpr =
	Next->is(TT_TemplateString) && Next->TokenText.starts_with("}");
	if (NextMustBeValue && !NextEndsTemplateExpr && !PreviousStartsTemplateExpr &&
	(PreviousMustBeValue \|\|
	Previous->isOneOf(tok::r_square, tok::r_paren, tok::plusplus,
	tok::minusminus))) {
	return addUnwrappedLine();
	}
	if ((PreviousMustBeValue \|\| Previous->is(tok::r_paren)) &&
	isJSDeclOrStmt(Keywords, Next)) {
	return addUnwrappedLine();
	}
	}

	void UnwrappedLineParser::parseStructuralElement(
	const FormatToken OpeningBrace, IfStmtKind IfKind,
	FormatToken *IfLeftBrace, bool HasDoWhile, bool *HasLabel) {
	if (Style.Language == FormatStyle::LK_TableGen &&
	FormatTok->is(tok::pp_include)) {
	nextToken();
	if (FormatTok->is(tok::string_literal))
	nextToken();
	addUnwrappedLine();
	return;
	}

	if (IsCpp) {
	while (FormatTok->is(tok::l_square) && handleCppAttributes()) {
	}
	} else if (Style.isVerilog()) {
	if (Keywords.isVerilogStructuredProcedure(*FormatTok)) {
	parseForOrWhileLoop(/HasParens=/false);
	return;
	}
	if (FormatTok->isOneOf(Keywords.kw_foreach, Keywords.kw_repeat)) {
	parseForOrWhileLoop();
	return;
	}
	if (FormatTok->isOneOf(tok::kw_restrict, Keywords.kw_assert,
	Keywords.kw_assume, Keywords.kw_cover)) {
	parseIfThenElse(IfKind, /KeepBraces=/false, /IsVerilogAssert=/true);
	return;
	}

	// Skip things that can exist before keywords like 'if' and 'case'.
	while (true) {
	if (FormatTok->isOneOf(Keywords.kw_priority, Keywords.kw_unique,
	Keywords.kw_unique0)) {
	nextToken();
	} else if (FormatTok->is(tok::l_paren) &&
	Tokens->peekNextToken()->is(tok::star)) {
	parseParens();
	} else {
	break;
	}
	}
	}

	// Tokens that only make sense at the beginning of a line.
	if (FormatTok->isAccessSpecifierKeyword()) {
	if (Style.Language == FormatStyle::LK_Java \|\| Style.isJavaScript() \|\|
	Style.isCSharp()) {
	nextToken();
	} else {
	parseAccessSpecifier();
	}
	return;
	}
	switch (FormatTok->Tok.getKind()) {
	case tok::kw_asm:
	nextToken();
	if (FormatTok->is(tok::l_brace)) {
	FormatTok->setFinalizedType(TT_InlineASMBrace);
	nextToken();
	while (FormatTok && !eof()) {
	if (FormatTok->is(tok::r_brace)) {
	FormatTok->setFinalizedType(TT_InlineASMBrace);
	nextToken();
	addUnwrappedLine();
	break;
	}
	FormatTok->Finalized = true;
	nextToken();
	}
	}
	break;
	case tok::kw_namespace:
	parseNamespace();
	return;
	case tok::kw_if: {
	if (Style.isJavaScript() && Line->MustBeDeclaration) {
	// field/method declaration.
	break;
	}
	FormatToken *Tok = parseIfThenElse(IfKind);
	if (IfLeftBrace)
	*IfLeftBrace = Tok;
	return;
	}
	case tok::kw_for:
	case tok::kw_while:
	if (Style.isJavaScript() && Line->MustBeDeclaration) {
	// field/method declaration.
	break;
	}
	parseForOrWhileLoop();
	return;
	case tok::kw_do:
	if (Style.isJavaScript() && Line->MustBeDeclaration) {
	// field/method declaration.
	break;
	}
	parseDoWhile();
	if (HasDoWhile)
	*HasDoWhile = true;
	return;
	case tok::kw_switch:
	if (Style.isJavaScript() && Line->MustBeDeclaration) {
	// 'switch: string' field declaration.
	break;
	}
	parseSwitch(/IsExpr=/false);
	return;
	case tok::kw_default: {
	// In Verilog default along with other labels are handled in the next loop.
	if (Style.isVerilog())
	break;
	if (Style.isJavaScript() && Line->MustBeDeclaration) {
	// 'default: string' field declaration.
	break;
	}
	auto *Default = FormatTok;
	nextToken();
	if (FormatTok->is(tok::colon)) {
	FormatTok->setFinalizedType(TT_CaseLabelColon);
	parseLabel();
	return;
	}
	if (FormatTok->is(tok::arrow)) {
	FormatTok->setFinalizedType(TT_CaseLabelArrow);
	Default->setFinalizedType(TT_SwitchExpressionLabel);
	parseLabel();
	return;
	}
	// e.g. "default void f() {}" in a Java interface.
	break;
	}
	case tok::kw_case:
	// Proto: there are no switch/case statements.
	if (Style.Language == FormatStyle::LK_Proto) {
	nextToken();
	return;
	}
	if (Style.isVerilog()) {
	parseBlock();
	addUnwrappedLine();
	return;
	}
	if (Style.isJavaScript() && Line->MustBeDeclaration) {
	// 'case: string' field declaration.
	nextToken();
	break;
	}
	parseCaseLabel();
	return;
	case tok::kw_try:
	case tok::kw___try:
	if (Style.isJavaScript() && Line->MustBeDeclaration) {
	// field/method declaration.
	break;
	}
	parseTryCatch();
	return;
	case tok::kw_extern:
	nextToken();
	if (Style.isVerilog()) {
	// In Verilog and extern module declaration looks like a start of module.
	// But there is no body and endmodule. So we handle it separately.
	if (Keywords.isVerilogHierarchy(*FormatTok)) {
	parseVerilogHierarchyHeader();
	return;
	}
	} else if (FormatTok->is(tok::string_literal)) {
	nextToken();
	if (FormatTok->is(tok::l_brace)) {
	if (Style.BraceWrapping.AfterExternBlock)
	addUnwrappedLine();
	// Either we indent or for backwards compatibility we follow the
	// AfterExternBlock style.
	unsigned AddLevels =
	(Style.IndentExternBlock == FormatStyle::IEBS_Indent) \|\|
	(Style.BraceWrapping.AfterExternBlock &&
	Style.IndentExternBlock ==
	FormatStyle::IEBS_AfterExternBlock)
	? 1u
	: 0u;
	parseBlock(/MustBeDeclaration=/true, AddLevels);
	addUnwrappedLine();
	return;
	}
	}
	break;
	case tok::kw_export:
	if (Style.isJavaScript()) {
	parseJavaScriptEs6ImportExport();
	return;
	}
	if (IsCpp) {
	nextToken();
	if (FormatTok->is(tok::kw_namespace)) {
	parseNamespace();
	return;
	}
	if (FormatTok->is(Keywords.kw_import) && parseModuleImport())
	return;
	}
	break;
	case tok::kw_inline:
	nextToken();
	if (FormatTok->is(tok::kw_namespace)) {
	parseNamespace();
	return;
	}
	break;
	case tok::identifier:
	if (FormatTok->is(TT_ForEachMacro)) {
	parseForOrWhileLoop();
	return;
	}
	if (FormatTok->is(TT_MacroBlockBegin)) {
	parseBlock(/MustBeDeclaration=/false, /AddLevels=/1u,
	/MunchSemi=/false);
	return;
	}
	if (FormatTok->is(Keywords.kw_import)) {
	if (Style.isJavaScript()) {
	parseJavaScriptEs6ImportExport();
	return;
	}
	if (Style.Language == FormatStyle::LK_Proto) {
	nextToken();
	if (FormatTok->is(tok::kw_public))
	nextToken();
	if (FormatTok->isNot(tok::string_literal))
	return;
	nextToken();
	if (FormatTok->is(tok::semi))
	nextToken();
	addUnwrappedLine();
	return;
	}
	if (IsCpp && parseModuleImport())
	return;
	}
	if (IsCpp && FormatTok->isOneOf(Keywords.kw_signals, Keywords.kw_qsignals,
	Keywords.kw_slots, Keywords.kw_qslots)) {
	nextToken();
	if (FormatTok->is(tok::colon)) {
	nextToken();
	addUnwrappedLine();
	return;
	}
	}
	if (IsCpp && FormatTok->is(TT_StatementMacro)) {
	parseStatementMacro();
	return;
	}
	if (IsCpp && FormatTok->is(TT_NamespaceMacro)) {
	parseNamespace();
	return;
	}
	// In Verilog labels can be any expression, so we don't do them here.
	// JS doesn't have macros, and within classes colons indicate fields, not
	// labels.
	// TableGen doesn't have labels.
	if (!Style.isJavaScript() && !Style.isVerilog() && !Style.isTableGen() &&
	Tokens->peekNextToken()->is(tok::colon) && !Line->MustBeDeclaration) {
	nextToken();
	if (!Line->InMacroBody \|\| CurrentLines->size() > 1)
	Line->Tokens.begin()->Tok->MustBreakBefore = true;
	FormatTok->setFinalizedType(TT_GotoLabelColon);
	parseLabel(!Style.IndentGotoLabels);
	if (HasLabel)
	*HasLabel = true;
	return;
	}
	// In all other cases, parse the declaration.
	break;
	default:
	break;
	}

	for (const bool InRequiresExpression =
	OpeningBrace && OpeningBrace->is(TT_RequiresExpressionLBrace);
	!eof();) {
	if (IsCpp && FormatTok->isCppAlternativeOperatorKeyword()) {
	if (auto Next = Tokens->peekNextToken(/SkipComment=*/true);
	Next && Next->isBinaryOperator()) {
	FormatTok->Tok.setKind(tok::identifier);
	}
	}
	const FormatToken *Previous = FormatTok->Previous;
	switch (FormatTok->Tok.getKind()) {
	case tok::at:
	nextToken();
	if (FormatTok->is(tok::l_brace)) {
	nextToken();
	parseBracedList();
	break;
	} else if (Style.Language == FormatStyle::LK_Java &&
	FormatTok->is(Keywords.kw_interface)) {
	nextToken();
	break;
	}
	switch (FormatTok->Tok.getObjCKeywordID()) {
	case tok::objc_public:
	case tok::objc_protected:
	case tok::objc_package:
	case tok::objc_private:
	return parseAccessSpecifier();
	case tok::objc_interface:
	case tok::objc_implementation:
	return parseObjCInterfaceOrImplementation();
	case tok::objc_protocol:
	if (parseObjCProtocol())
	return;
	break;
	case tok::objc_end:
	return; // Handled by the caller.
	case tok::objc_optional:
	case tok::objc_required:
	nextToken();
	addUnwrappedLine();
	return;
	case tok::objc_autoreleasepool:
	nextToken();
	if (FormatTok->is(tok::l_brace)) {
	if (Style.BraceWrapping.AfterControlStatement ==
	FormatStyle::BWACS_Always) {
	addUnwrappedLine();
	}
	parseBlock();
	}
	addUnwrappedLine();
	return;
	case tok::objc_synchronized:
	nextToken();
	if (FormatTok->is(tok::l_paren)) {
	// Skip synchronization object
	parseParens();
	}
	if (FormatTok->is(tok::l_brace)) {
	if (Style.BraceWrapping.AfterControlStatement ==
	FormatStyle::BWACS_Always) {
	addUnwrappedLine();
	}
	parseBlock();
	}
	addUnwrappedLine();
	return;
	case tok::objc_try:
	// This branch isn't strictly necessary (the kw_try case below would
	// do this too after the tok::at is parsed above). But be explicit.
	parseTryCatch();
	return;
	default:
	break;
	}
	break;
	case tok::kw_requires: {
	if (IsCpp) {
	bool ParsedClause = parseRequires();
	if (ParsedClause)
	return;
	} else {
	nextToken();
	}
	break;
	}
	case tok::kw_enum:
	// Ignore if this is part of "template <enum ..." or "... -> enum" or
	// "template <..., enum ...>".
	if (Previous && Previous->isOneOf(tok::less, tok::arrow, tok::comma)) {
	nextToken();
	break;
	}

	// parseEnum falls through and does not yet add an unwrapped line as an
	// enum definition can start a structural element.
	if (!parseEnum())
	break;
	// This only applies to C++ and Verilog.
	if (!IsCpp && !Style.isVerilog()) {
	addUnwrappedLine();
	return;
	}
	break;
	case tok::kw_typedef:
	nextToken();
	if (FormatTok->isOneOf(Keywords.kw_NS_ENUM, Keywords.kw_NS_OPTIONS,
	Keywords.kw_CF_ENUM, Keywords.kw_CF_OPTIONS,
	Keywords.kw_CF_CLOSED_ENUM,
	Keywords.kw_NS_CLOSED_ENUM)) {
	parseEnum();
	}
	break;
	case tok::kw_class:
	if (Style.isVerilog()) {
	parseBlock();
	addUnwrappedLine();
	return;
	}
	if (Style.isTableGen()) {
	// Do nothing special. In this case the l_brace becomes FunctionLBrace.
	// This is same as def and so on.
	nextToken();
	break;
	}
	[[fallthrough]];
	case tok::kw_struct:
	case tok::kw_union:
	if (parseStructLike())
	return;
	break;
	case tok::kw_decltype:
	nextToken();
	if (FormatTok->is(tok::l_paren)) {
	parseParens();
	assert(FormatTok->Previous);
	if (FormatTok->Previous->endsSequence(tok::r_paren, tok::kw_auto,
	tok::l_paren)) {
	Line->SeenDecltypeAuto = true;
	}
	}
	break;
	case tok::period:
	nextToken();
	// In Java, classes have an implicit static member "class".
	if (Style.Language == FormatStyle::LK_Java && FormatTok &&
	FormatTok->is(tok::kw_class)) {
	nextToken();
	}
	if (Style.isJavaScript() && FormatTok &&
	FormatTok->Tok.getIdentifierInfo()) {
	// JavaScript only has pseudo keywords, all keywords are allowed to
	// appear in "IdentifierName" positions. See http://es5.github.io/#x7.6
	nextToken();
	}
	break;
	case tok::semi:
	nextToken();
	addUnwrappedLine();
	return;
	case tok::r_brace:
	addUnwrappedLine();
	return;
	case tok::l_paren: {
	parseParens();
	// Break the unwrapped line if a K&R C function definition has a parameter
	// declaration.
	if (OpeningBrace \|\| !IsCpp \|\| !Previous \|\| eof())
	break;
	if (isC78ParameterDecl(FormatTok,
	Tokens->peekNextToken(/SkipComment=/true),
	Previous)) {
	addUnwrappedLine();
	return;
	}
	break;
	}
	case tok::kw_operator:
	nextToken();
	if (FormatTok->isBinaryOperator())
	nextToken();
	break;
	case tok::caret:
	nextToken();
	// Block return type.
	if (FormatTok->Tok.isAnyIdentifier() \|\| FormatTok->isTypeName(LangOpts)) {
	nextToken();
	// Return types: pointers are ok too.
	while (FormatTok->is(tok::star))
	nextToken();
	}
	// Block argument list.
	if (FormatTok->is(tok::l_paren))
	parseParens();
	// Block body.
	if (FormatTok->is(tok::l_brace))
	parseChildBlock();
	break;
	case tok::l_brace:
	if (InRequiresExpression)
	FormatTok->setFinalizedType(TT_BracedListLBrace);
	if (!tryToParsePropertyAccessor() && !tryToParseBracedList()) {
	IsDecltypeAutoFunction = Line->SeenDecltypeAuto;
	// A block outside of parentheses must be the last part of a
	// structural element.
	// FIXME: Figure out cases where this is not true, and add projections
	// for them (the one we know is missing are lambdas).
	if (Style.Language == FormatStyle::LK_Java &&
	Line->Tokens.front().Tok->is(Keywords.kw_synchronized)) {
	// If necessary, we could set the type to something different than
	// TT_FunctionLBrace.
	if (Style.BraceWrapping.AfterControlStatement ==
	FormatStyle::BWACS_Always) {
	addUnwrappedLine();
	}
	} else if (Style.BraceWrapping.AfterFunction) {
	addUnwrappedLine();
	}
	if (!Previous \|\| Previous->isNot(TT_TypeDeclarationParen))
	FormatTok->setFinalizedType(TT_FunctionLBrace);
	parseBlock();
	IsDecltypeAutoFunction = false;
	addUnwrappedLine();
	return;
	}
	// Otherwise this was a braced init list, and the structural
	// element continues.
	break;
	case tok::kw_try:
	if (Style.isJavaScript() && Line->MustBeDeclaration) {
	// field/method declaration.
	nextToken();
	break;
	}
	// We arrive here when parsing function-try blocks.
	if (Style.BraceWrapping.AfterFunction)
	addUnwrappedLine();
	parseTryCatch();
	return;
	case tok::identifier: {
	if (Style.isCSharp() && FormatTok->is(Keywords.kw_where) &&
	Line->MustBeDeclaration) {
	addUnwrappedLine();
	parseCSharpGenericTypeConstraint();
	break;
	}
	if (FormatTok->is(TT_MacroBlockEnd)) {
	addUnwrappedLine();
	return;
	}

	// Function declarations (as opposed to function expressions) are parsed
	// on their own unwrapped line by continuing this loop. Function
	// expressions (functions that are not on their own line) must not create
	// a new unwrapped line, so they are special cased below.
	size_t TokenCount = Line->Tokens.size();
	if (Style.isJavaScript() && FormatTok->is(Keywords.kw_function) &&
	(TokenCount > 1 \|\|
	(TokenCount == 1 &&
	Line->Tokens.front().Tok->isNot(Keywords.kw_async)))) {
	tryToParseJSFunction();
	break;
	}
	if ((Style.isJavaScript() \|\| Style.Language == FormatStyle::LK_Java) &&
	FormatTok->is(Keywords.kw_interface)) {
	if (Style.isJavaScript()) {
	// In JavaScript/TypeScript, "interface" can be used as a standalone
	// identifier, e.g. in `var interface = 1;`. If "interface" is
	// followed by another identifier, it is very like to be an actual
	// interface declaration.
	unsigned StoredPosition = Tokens->getPosition();
	FormatToken *Next = Tokens->getNextToken();
	FormatTok = Tokens->setPosition(StoredPosition);
	if (!mustBeJSIdent(Keywords, Next)) {
	nextToken();
	break;
	}
	}
	parseRecord();
	addUnwrappedLine();
	return;
	}

	if (Style.isVerilog()) {
	if (FormatTok->is(Keywords.kw_table)) {
	parseVerilogTable();
	return;
	}
	if (Keywords.isVerilogBegin(*FormatTok) \|\|
	Keywords.isVerilogHierarchy(*FormatTok)) {
	parseBlock();
	addUnwrappedLine();
	return;
	}
	}

	if (!IsCpp && FormatTok->is(Keywords.kw_interface)) {
	if (parseStructLike())
	return;
	break;
	}

	if (IsCpp && FormatTok->is(TT_StatementMacro)) {
	parseStatementMacro();
	return;
	}

	// See if the following token should start a new unwrapped line.
	StringRef Text = FormatTok->TokenText;

	FormatToken *PreviousToken = FormatTok;
	nextToken();

	// JS doesn't have macros, and within classes colons indicate fields, not
	// labels.
	if (Style.isJavaScript())
	break;

	auto OneTokenSoFar = [&]() {
	auto I = Line->Tokens.begin(), E = Line->Tokens.end();
	while (I != E && I->Tok->is(tok::comment))
	++I;
	if (Style.isVerilog())
	while (I != E && I->Tok->is(tok::hash))
	++I;
	return I != E && (++I == E);
	};
	if (OneTokenSoFar()) {
	// Recognize function-like macro usages without trailing semicolon as
	// well as free-standing macros like Q_OBJECT.
	bool FunctionLike = FormatTok->is(tok::l_paren);
	if (FunctionLike)
	parseParens();

	bool FollowedByNewline =
	CommentsBeforeNextToken.empty()
	? FormatTok->NewlinesBefore > 0
	: CommentsBeforeNextToken.front()->NewlinesBefore > 0;

	if (FollowedByNewline && (Text.size() >= 5 \|\| FunctionLike) &&
	tokenCanStartNewLine(*FormatTok) && Text == Text.upper()) {
	if (PreviousToken->isNot(TT_UntouchableMacroFunc))
	PreviousToken->setFinalizedType(TT_FunctionLikeOrFreestandingMacro);
	addUnwrappedLine();
	return;
	}
	}
	break;
	}
	case tok::equal:
	if ((Style.isJavaScript() \|\| Style.isCSharp()) &&
	FormatTok->is(TT_FatArrow)) {
	tryToParseChildBlock();
	break;
	}

	nextToken();
	if (FormatTok->is(tok::l_brace)) {
	// Block kind should probably be set to BK_BracedInit for any language.
	// C# needs this change to ensure that array initialisers and object
	// initialisers are indented the same way.
	if (Style.isCSharp())
	FormatTok->setBlockKind(BK_BracedInit);
	// TableGen's defset statement has syntax of the form,
	// `defset <type> <name> = { <statement>... }`
	if (Style.isTableGen() &&
	Line->Tokens.begin()->Tok->is(Keywords.kw_defset)) {
	FormatTok->setFinalizedType(TT_FunctionLBrace);
	parseBlock(/MustBeDeclaration=/false, /AddLevels=/1u,
	/MunchSemi=/false);
	addUnwrappedLine();
	break;
	}
	nextToken();
	parseBracedList();
	} else if (Style.Language == FormatStyle::LK_Proto &&
	FormatTok->is(tok::less)) {
	nextToken();
	parseBracedList(/IsAngleBracket=/true);
	}
	break;
	case tok::l_square:
	parseSquare();
	break;
	case tok::kw_new:
	parseNew();
	break;
	case tok::kw_switch:
	if (Style.Language == FormatStyle::LK_Java)
	parseSwitch(/IsExpr=/true);
	nextToken();
	break;
	case tok::kw_case:
	// Proto: there are no switch/case statements.
	if (Style.Language == FormatStyle::LK_Proto) {
	nextToken();
	return;
	}
	// In Verilog switch is called case.
	if (Style.isVerilog()) {
	parseBlock();
	addUnwrappedLine();
	return;
	}
	if (Style.isJavaScript() && Line->MustBeDeclaration) {
	// 'case: string' field declaration.
	nextToken();
	break;
	}
	parseCaseLabel();
	break;
	case tok::kw_default:
	nextToken();
	if (Style.isVerilog()) {
	if (FormatTok->is(tok::colon)) {
	// The label will be handled in the next iteration.
	break;
	}
	if (FormatTok->is(Keywords.kw_clocking)) {
	// A default clocking block.
	parseBlock();
	addUnwrappedLine();
	return;
	}
	parseVerilogCaseLabel();
	return;
	}
	break;
	case tok::colon:
	nextToken();
	if (Style.isVerilog()) {
	parseVerilogCaseLabel();
	return;
	}
	break;
	default:
	nextToken();
	break;
	}
	}
	}

	bool UnwrappedLineParser::tryToParsePropertyAccessor() {
	assert(FormatTok->is(tok::l_brace));
	if (!Style.isCSharp())
	return false;
	// See if it's a property accessor.
	if (FormatTok->Previous->isNot(tok::identifier))
	return false;

	// See if we are inside a property accessor.
	//
	// Record the current tokenPosition so that we can advance and
	// reset the current token. `Next` is not set yet so we need
	// another way to advance along the token stream.
	unsigned int StoredPosition = Tokens->getPosition();
	FormatToken *Tok = Tokens->getNextToken();

	// A trivial property accessor is of the form:
	// { [ACCESS_SPECIFIER] [get]; [ACCESS_SPECIFIER] [set\|init] }
	// Track these as they do not require line breaks to be introduced.
	bool HasSpecialAccessor = false;
	bool IsTrivialPropertyAccessor = true;
	while (!eof()) {
	if (Tok->isAccessSpecifierKeyword() \|\|
	Tok->isOneOf(tok::semi, Keywords.kw_internal, Keywords.kw_get,
	Keywords.kw_init, Keywords.kw_set)) {
	if (Tok->isOneOf(Keywords.kw_get, Keywords.kw_init, Keywords.kw_set))
	HasSpecialAccessor = true;
	Tok = Tokens->getNextToken();
	continue;
	}
	if (Tok->isNot(tok::r_brace))
	IsTrivialPropertyAccessor = false;
	break;
	}

	if (!HasSpecialAccessor) {
	Tokens->setPosition(StoredPosition);
	return false;
	}

	// Try to parse the property accessor:
	// https://docs.microsoft.com/en-us/dotnet/csharp/programming-guide/classes-and-structs/properties
	Tokens->setPosition(StoredPosition);
	if (!IsTrivialPropertyAccessor && Style.BraceWrapping.AfterFunction)
	addUnwrappedLine();
	nextToken();
	do {
	switch (FormatTok->Tok.getKind()) {
	case tok::r_brace:
	nextToken();
	if (FormatTok->is(tok::equal)) {
	while (!eof() && FormatTok->isNot(tok::semi))
	nextToken();
	nextToken();
	}
	addUnwrappedLine();
	return true;
	case tok::l_brace:
	++Line->Level;
	parseBlock(/MustBeDeclaration=/true);
	addUnwrappedLine();
	--Line->Level;
	break;
	case tok::equal:
	if (FormatTok->is(TT_FatArrow)) {
	++Line->Level;
	do {
	nextToken();
	} while (!eof() && FormatTok->isNot(tok::semi));
	nextToken();
	addUnwrappedLine();
	--Line->Level;
	break;
	}
	nextToken();
	break;
	default:
	if (FormatTok->isOneOf(Keywords.kw_get, Keywords.kw_init,
	Keywords.kw_set) &&
	!IsTrivialPropertyAccessor) {
	// Non-trivial get/set needs to be on its own line.
	addUnwrappedLine();
	}
	nextToken();
	}
	} while (!eof());

	// Unreachable for well-formed code (paired '{' and '}').
	return true;
	}

	bool UnwrappedLineParser::tryToParseLambda() {
	assert(FormatTok->is(tok::l_square));
	if (!IsCpp) {
	nextToken();
	return false;
	}
	FormatToken &LSquare = *FormatTok;
	if (!tryToParseLambdaIntroducer())
	return false;

	bool SeenArrow = false;
	bool InTemplateParameterList = false;

	while (FormatTok->isNot(tok::l_brace)) {
	if (FormatTok->isTypeName(LangOpts) \|\| FormatTok->isAttribute()) {
	nextToken();
	continue;
	}
	switch (FormatTok->Tok.getKind()) {
	case tok::l_brace:
	break;
	case tok::l_paren:
	parseParens(/AmpAmpTokenType=/TT_PointerOrReference);
	break;
	case tok::l_square:
	parseSquare();
	break;
	case tok::less:
	assert(FormatTok->Previous);
	if (FormatTok->Previous->is(tok::r_square))
	InTemplateParameterList = true;
	nextToken();
	break;
	case tok::kw_auto:
	case tok::kw_class:
	case tok::kw_struct:
	case tok::kw_union:
	case tok::kw_template:
	case tok::kw_typename:
	case tok::amp:
	case tok::star:
	case tok::kw_const:
	case tok::kw_constexpr:
	case tok::kw_consteval:
	case tok::comma:
	case tok::greater:
	case tok::identifier:
	case tok::numeric_constant:
	case tok::coloncolon:
	case tok::kw_mutable:
	case tok::kw_noexcept:
	case tok::kw_static:
	nextToken();
	break;
	// Specialization of a template with an integer parameter can contain
	// arithmetic, logical, comparison and ternary operators.
	//
	// FIXME: This also accepts sequences of operators that are not in the scope
	// of a template argument list.
	//
	// In a C++ lambda a template type can only occur after an arrow. We use
	// this as an heuristic to distinguish between Objective-C expressions
	// followed by an `a->b` expression, such as:
	// ([obj func:arg] + a->b)
	// Otherwise the code below would parse as a lambda.
	case tok::plus:
	case tok::minus:
	case tok::exclaim:
	case tok::tilde:
	case tok::slash:
	case tok::percent:
	case tok::lessless:
	case tok::pipe:
	case tok::pipepipe:
	case tok::ampamp:
	case tok::caret:
	case tok::equalequal:
	case tok::exclaimequal:
	case tok::greaterequal:
	case tok::lessequal:
	case tok::question:
	case tok::colon:
	case tok::ellipsis:
	case tok::kw_true:
	case tok::kw_false:
	if (SeenArrow \|\| InTemplateParameterList) {
	nextToken();
	break;
	}
	return true;
	case tok::arrow:
	// This might or might not actually be a lambda arrow (this could be an
	// ObjC method invocation followed by a dereferencing arrow). We might
	// reset this back to TT_Unknown in TokenAnnotator.
	FormatTok->setFinalizedType(TT_LambdaArrow);
	SeenArrow = true;
	nextToken();
	break;
	case tok::kw_requires: {
	auto *RequiresToken = FormatTok;
	nextToken();
	parseRequiresClause(RequiresToken);
	break;
	}
	case tok::equal:
	if (!InTemplateParameterList)
	return true;
	nextToken();
	break;
	default:
	return true;
	}
	}

	FormatTok->setFinalizedType(TT_LambdaLBrace);
	LSquare.setFinalizedType(TT_LambdaLSquare);

	NestedLambdas.push_back(Line->SeenDecltypeAuto);
	parseChildBlock();
	assert(!NestedLambdas.empty());
	NestedLambdas.pop_back();

	return true;
	}

	bool UnwrappedLineParser::tryToParseLambdaIntroducer() {
	const FormatToken *Previous = FormatTok->Previous;
	const FormatToken *LeftSquare = FormatTok;
	nextToken();
	if ((Previous && ((Previous->Tok.getIdentifierInfo() &&
	!Previous->isOneOf(tok::kw_return, tok::kw_co_await,
	tok::kw_co_yield, tok::kw_co_return)) \|\|
	Previous->closesScope())) \|\|
	LeftSquare->isCppStructuredBinding(IsCpp)) {
	return false;
	}
	if (FormatTok->is(tok::l_square) \|\| tok::isLiteral(FormatTok->Tok.getKind()))
	return false;
	if (FormatTok->is(tok::r_square)) {
	const FormatToken Next = Tokens->peekNextToken(/SkipComment=*/true);
	if (Next->is(tok::greater))
	return false;
	}
	parseSquare(/LambdaIntroducer=/true);
	return true;
	}

	void UnwrappedLineParser::tryToParseJSFunction() {
	assert(FormatTok->is(Keywords.kw_function));
	if (FormatTok->is(Keywords.kw_async))
	nextToken();
	// Consume "function".
	nextToken();

	// Consume * (generator function). Treat it like C++'s overloaded operators.
	if (FormatTok->is(tok::star)) {
	FormatTok->setFinalizedType(TT_OverloadedOperator);
	nextToken();
	}

	// Consume function name.
	if (FormatTok->is(tok::identifier))
	nextToken();

	if (FormatTok->isNot(tok::l_paren))
	return;

	// Parse formal parameter list.
	parseParens();

	if (FormatTok->is(tok::colon)) {
	// Parse a type definition.
	nextToken();

	// Eat the type declaration. For braced inline object types, balance braces,
	// otherwise just parse until finding an l_brace for the function body.
	if (FormatTok->is(tok::l_brace))
	tryToParseBracedList();
	else
	while (!FormatTok->isOneOf(tok::l_brace, tok::semi) && !eof())
	nextToken();
	}

	if (FormatTok->is(tok::semi))
	return;

	parseChildBlock();
	}

	bool UnwrappedLineParser::tryToParseBracedList() {
	if (FormatTok->is(BK_Unknown))
	calculateBraceTypes();
	assert(FormatTok->isNot(BK_Unknown));
	if (FormatTok->is(BK_Block))
	return false;
	nextToken();
	parseBracedList();
	return true;
	}

	bool UnwrappedLineParser::tryToParseChildBlock() {
	assert(Style.isJavaScript() \|\| Style.isCSharp());
	assert(FormatTok->is(TT_FatArrow));
	// Fat arrows (=>) have tok::TokenKind tok::equal but TokenType TT_FatArrow.
	// They always start an expression or a child block if followed by a curly
	// brace.
	nextToken();
	if (FormatTok->isNot(tok::l_brace))
	return false;
	parseChildBlock();
	return true;
	}

	bool UnwrappedLineParser::parseBracedList(bool IsAngleBracket, bool IsEnum) {
	assert(!IsAngleBracket \|\| !IsEnum);
	bool HasError = false;

	// FIXME: Once we have an expression parser in the UnwrappedLineParser,
	// replace this by using parseAssignmentExpression() inside.
	do {
	if (Style.isCSharp() && FormatTok->is(TT_FatArrow) &&
	tryToParseChildBlock()) {
	continue;
	}
	if (Style.isJavaScript()) {
	if (FormatTok->is(Keywords.kw_function)) {
	tryToParseJSFunction();
	continue;
	}
	if (FormatTok->is(tok::l_brace)) {
	// Could be a method inside of a braced list `{a() { return 1; }}`.
	if (tryToParseBracedList())
	continue;
	parseChildBlock();
	}
	}
	if (FormatTok->is(IsAngleBracket ? tok::greater : tok::r_brace)) {
	if (IsEnum) {
	FormatTok->setBlockKind(BK_Block);
	if (!Style.AllowShortEnumsOnASingleLine)
	addUnwrappedLine();
	}
	nextToken();
	return !HasError;
	}
	switch (FormatTok->Tok.getKind()) {
	case tok::l_square:
	if (Style.isCSharp())
	parseSquare();
	else
	tryToParseLambda();
	break;
	case tok::l_paren:
	parseParens();
	// JavaScript can just have free standing methods and getters/setters in
	// object literals. Detect them by a "{" following ")".
	if (Style.isJavaScript()) {
	if (FormatTok->is(tok::l_brace))
	parseChildBlock();
	break;
	}
	break;
	case tok::l_brace:
	// Assume there are no blocks inside a braced init list apart
	// from the ones we explicitly parse out (like lambdas).
	FormatTok->setBlockKind(BK_BracedInit);
	nextToken();
	parseBracedList();
	break;
	case tok::less:
	nextToken();
	if (IsAngleBracket)
	parseBracedList(/IsAngleBracket=/true);
	break;
	case tok::semi:
	// JavaScript (or more precisely TypeScript) can have semicolons in braced
	// lists (in so-called TypeMemberLists). Thus, the semicolon cannot be
	// used for error recovery if we have otherwise determined that this is
	// a braced list.
	if (Style.isJavaScript()) {
	nextToken();
	break;
	}
	HasError = true;
	if (!IsEnum)
	return false;
	nextToken();
	break;
	case tok::comma:
	nextToken();
	if (IsEnum && !Style.AllowShortEnumsOnASingleLine)
	addUnwrappedLine();
	break;
	default:
	nextToken();
	break;
	}
	} while (!eof());
	return false;
	}

	/// \brief Parses a pair of parentheses (and everything between them).
	/// \param AmpAmpTokenType If different than TT_Unknown sets this type for all
	/// double ampersands. This applies for all nested scopes as well.
	///
	/// Returns whether there is a `=` token between the parentheses.
	bool UnwrappedLineParser::parseParens(TokenType AmpAmpTokenType) {
	assert(FormatTok->is(tok::l_paren) && "'(' expected.");
	auto *LeftParen = FormatTok;
	bool SeenEqual = false;
	bool MightBeFoldExpr = false;
	const bool MightBeStmtExpr = Tokens->peekNextToken()->is(tok::l_brace);
	nextToken();
	do {
	switch (FormatTok->Tok.getKind()) {
	case tok::l_paren:
	if (parseParens(AmpAmpTokenType))
	SeenEqual = true;
	if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_brace))
	parseChildBlock();
	break;
	case tok::r_paren: {
	const auto *Prev = LeftParen->Previous;
	if (!MightBeStmtExpr && !MightBeFoldExpr && !Line->InMacroBody &&
	Style.RemoveParentheses > FormatStyle::RPS_Leave) {
	const auto *Next = Tokens->peekNextToken();
	const bool DoubleParens =
	Prev && Prev->is(tok::l_paren) && Next && Next->is(tok::r_paren);
	const auto *PrevPrev = Prev ? Prev->getPreviousNonComment() : nullptr;
	const bool Blacklisted =
	PrevPrev &&
	(PrevPrev->isOneOf(tok::kw___attribute, tok::kw_decltype) \|\|
	(SeenEqual &&
	(PrevPrev->isOneOf(tok::kw_if, tok::kw_while) \|\|
	PrevPrev->endsSequence(tok::kw_constexpr, tok::kw_if))));
	const bool ReturnParens =
	Style.RemoveParentheses == FormatStyle::RPS_ReturnStatement &&
	((NestedLambdas.empty() && !IsDecltypeAutoFunction) \|\|
	(!NestedLambdas.empty() && !NestedLambdas.back())) &&
	Prev && Prev->isOneOf(tok::kw_return, tok::kw_co_return) && Next &&
	Next->is(tok::semi);
	if ((DoubleParens && !Blacklisted) \|\| ReturnParens) {
	LeftParen->Optional = true;
	FormatTok->Optional = true;
	}
	}
	if (Prev && Prev->is(TT_TypenameMacro)) {
	LeftParen->setFinalizedType(TT_TypeDeclarationParen);
	FormatTok->setFinalizedType(TT_TypeDeclarationParen);
	}
	nextToken();
	return SeenEqual;
	}
	case tok::r_brace:
	// A "}" inside parenthesis is an error if there wasn't a matching "{".
	return SeenEqual;
	case tok::l_square:
	tryToParseLambda();
	break;
	case tok::l_brace:
	if (!tryToParseBracedList())
	parseChildBlock();
	break;
	case tok::at:
	nextToken();
	if (FormatTok->is(tok::l_brace)) {
	nextToken();
	parseBracedList();
	}
	break;
	case tok::ellipsis:
	MightBeFoldExpr = true;
	nextToken();
	break;
	case tok::equal:
	SeenEqual = true;
	if (Style.isCSharp() && FormatTok->is(TT_FatArrow))
	tryToParseChildBlock();
	else
	nextToken();
	break;
	case tok::kw_class:
	if (Style.isJavaScript())
	parseRecord(/ParseAsExpr=/true);
	else
	nextToken();
	break;
	case tok::identifier:
	if (Style.isJavaScript() && (FormatTok->is(Keywords.kw_function)))
	tryToParseJSFunction();
	else
	nextToken();
	break;
	case tok::kw_switch:
	parseSwitch(/IsExpr=/true);
	break;
	case tok::kw_requires: {
	auto RequiresToken = FormatTok;
	nextToken();
	parseRequiresExpression(RequiresToken);
	break;
	}
	case tok::ampamp:
	if (AmpAmpTokenType != TT_Unknown)
	FormatTok->setFinalizedType(AmpAmpTokenType);
	[[fallthrough]];
	default:
	nextToken();
	break;
	}
	} while (!eof());
	return SeenEqual;
	}

	void UnwrappedLineParser::parseSquare(bool LambdaIntroducer) {
	if (!LambdaIntroducer) {
	assert(FormatTok->is(tok::l_square) && "'[' expected.");
	if (tryToParseLambda())
	return;
	}
	do {
	switch (FormatTok->Tok.getKind()) {
	case tok::l_paren:
	parseParens();
	break;
	case tok::r_square:
	nextToken();
	return;
	case tok::r_brace:
	// A "}" inside parenthesis is an error if there wasn't a matching "{".
	return;
	case tok::l_square:
	parseSquare();
	break;
	case tok::l_brace: {
	if (!tryToParseBracedList())
	parseChildBlock();
	break;
	}
	case tok::at:
	case tok::colon:
	nextToken();
	if (FormatTok->is(tok::l_brace)) {
	nextToken();
	parseBracedList();
	}
	break;
	default:
	nextToken();
	break;
	}
	} while (!eof());
	}

	void UnwrappedLineParser::keepAncestorBraces() {
	if (!Style.RemoveBracesLLVM)
	return;

	const int MaxNestingLevels = 2;
	const int Size = NestedTooDeep.size();
	if (Size >= MaxNestingLevels)
	NestedTooDeep[Size - MaxNestingLevels] = true;
	NestedTooDeep.push_back(false);
	}

	static FormatToken *getLastNonComment(const UnwrappedLine &Line) {
	for (const auto &Token : llvm::reverse(Line.Tokens))
	if (Token.Tok->isNot(tok::comment))
	return Token.Tok;

	return nullptr;
	}

	void UnwrappedLineParser::parseUnbracedBody(bool CheckEOF) {
	FormatToken *Tok = nullptr;

	if (Style.InsertBraces && !Line->InPPDirective && !Line->Tokens.empty() &&
	PreprocessorDirectives.empty() && FormatTok->isNot(tok::semi)) {
	Tok = Style.BraceWrapping.AfterControlStatement == FormatStyle::BWACS_Never
	? getLastNonComment(*Line)
	: Line->Tokens.back().Tok;
	assert(Tok);
	if (Tok->BraceCount < 0) {
	assert(Tok->BraceCount == -1);
	Tok = nullptr;
	} else {
	Tok->BraceCount = -1;
	}
	}

	addUnwrappedLine();
	++Line->Level;
	++Line->UnbracedBodyLevel;
	parseStructuralElement();
	--Line->UnbracedBodyLevel;

	if (Tok) {
	assert(!Line->InPPDirective);
	Tok = nullptr;
	for (const auto &L : llvm::reverse(*CurrentLines)) {
	if (!L.InPPDirective && getLastNonComment(L)) {
	Tok = L.Tokens.back().Tok;
	break;
	}
	}
	assert(Tok);
	++Tok->BraceCount;
	}

	if (CheckEOF && eof())
	addUnwrappedLine();

	--Line->Level;
	}

	static void markOptionalBraces(FormatToken *LeftBrace) {
	if (!LeftBrace)
	return;

	assert(LeftBrace->is(tok::l_brace));

	FormatToken *RightBrace = LeftBrace->MatchingParen;
	if (!RightBrace) {
	assert(!LeftBrace->Optional);
	return;
	}

	assert(RightBrace->is(tok::r_brace));
	assert(RightBrace->MatchingParen == LeftBrace);
	assert(LeftBrace->Optional == RightBrace->Optional);

	LeftBrace->Optional = true;
	RightBrace->Optional = true;
	}

	void UnwrappedLineParser::handleAttributes() {
	// Handle AttributeMacro, e.g. `if (x) UNLIKELY`.
	if (FormatTok->isAttribute())
	nextToken();
	else if (FormatTok->is(tok::l_square))
	handleCppAttributes();
	}

	bool UnwrappedLineParser::handleCppAttributes() {
	// Handle [[likely]] / [[unlikely]] attributes.
	assert(FormatTok->is(tok::l_square));
	if (!tryToParseSimpleAttribute())
	return false;
	parseSquare();
	return true;
	}

	/// Returns whether \c Tok begins a block.
	bool UnwrappedLineParser::isBlockBegin(const FormatToken &Tok) const {
	// FIXME: rename the function or make
	// Tok.isOneOf(tok::l_brace, TT_MacroBlockBegin) work.
	return Style.isVerilog() ? Keywords.isVerilogBegin(Tok)
	: Tok.is(tok::l_brace);
	}

	FormatToken UnwrappedLineParser::parseIfThenElse(IfStmtKind IfKind,
	bool KeepBraces,
	bool IsVerilogAssert) {
	assert((FormatTok->is(tok::kw_if) \|\|
	(Style.isVerilog() &&
	FormatTok->isOneOf(tok::kw_restrict, Keywords.kw_assert,
	Keywords.kw_assume, Keywords.kw_cover))) &&
	"'if' expected");
	nextToken();

	if (IsVerilogAssert) {
	// Handle `assert #0` and `assert final`.
	if (FormatTok->is(Keywords.kw_verilogHash)) {
	nextToken();
	if (FormatTok->is(tok::numeric_constant))
	nextToken();
	} else if (FormatTok->isOneOf(Keywords.kw_final, Keywords.kw_property,
	Keywords.kw_sequence)) {
	nextToken();
	}
	}

	// TableGen's if statement has the form of `if <cond> then { ... }`.
	if (Style.isTableGen()) {
	while (!eof() && FormatTok->isNot(Keywords.kw_then)) {
	// Simply skip until then. This range only contains a value.
	nextToken();
	}
	}

	// Handle `if !consteval`.
	if (FormatTok->is(tok::exclaim))
	nextToken();

	bool KeepIfBraces = true;
	if (FormatTok->is(tok::kw_consteval)) {
	nextToken();
	} else {
	KeepIfBraces = !Style.RemoveBracesLLVM \|\| KeepBraces;
	if (FormatTok->isOneOf(tok::kw_constexpr, tok::identifier))
	nextToken();
	if (FormatTok->is(tok::l_paren)) {
	FormatTok->setFinalizedType(TT_ConditionLParen);
	parseParens();
	}
	}
	handleAttributes();
	// The then action is optional in Verilog assert statements.
	if (IsVerilogAssert && FormatTok->is(tok::semi)) {
	nextToken();
	addUnwrappedLine();
	return nullptr;
	}

	bool NeedsUnwrappedLine = false;
	keepAncestorBraces();

	FormatToken *IfLeftBrace = nullptr;
	IfStmtKind IfBlockKind = IfStmtKind::NotIf;

	if (isBlockBegin(*FormatTok)) {
	FormatTok->setFinalizedType(TT_ControlStatementLBrace);
	IfLeftBrace = FormatTok;
	CompoundStatementIndenter Indenter(this, Style, Line->Level);
	parseBlock(/MustBeDeclaration=/false, /AddLevels=/1u,
	/MunchSemi=/true, KeepIfBraces, &IfBlockKind);
	setPreviousRBraceType(TT_ControlStatementRBrace);
	if (Style.BraceWrapping.BeforeElse)
	addUnwrappedLine();
	else
	NeedsUnwrappedLine = true;
	} else if (IsVerilogAssert && FormatTok->is(tok::kw_else)) {
	addUnwrappedLine();
	} else {
	parseUnbracedBody();
	}

	if (Style.RemoveBracesLLVM) {
	assert(!NestedTooDeep.empty());
	KeepIfBraces = KeepIfBraces \|\|
	(IfLeftBrace && !IfLeftBrace->MatchingParen) \|\|
	NestedTooDeep.back() \|\| IfBlockKind == IfStmtKind::IfOnly \|\|
	IfBlockKind == IfStmtKind::IfElseIf;
	}

	bool KeepElseBraces = KeepIfBraces;
	FormatToken *ElseLeftBrace = nullptr;
	IfStmtKind Kind = IfStmtKind::IfOnly;

	if (FormatTok->is(tok::kw_else)) {
	if (Style.RemoveBracesLLVM) {
	NestedTooDeep.back() = false;
	Kind = IfStmtKind::IfElse;
	}
	nextToken();
	handleAttributes();
	if (isBlockBegin(*FormatTok)) {
	const bool FollowedByIf = Tokens->peekNextToken()->is(tok::kw_if);
	FormatTok->setFinalizedType(TT_ElseLBrace);
	ElseLeftBrace = FormatTok;
	CompoundStatementIndenter Indenter(this, Style, Line->Level);
	IfStmtKind ElseBlockKind = IfStmtKind::NotIf;
	FormatToken *IfLBrace =
	parseBlock(/MustBeDeclaration=/false, /AddLevels=/1u,
	/MunchSemi=/true, KeepElseBraces, &ElseBlockKind);
	setPreviousRBraceType(TT_ElseRBrace);
	if (FormatTok->is(tok::kw_else)) {
	KeepElseBraces = KeepElseBraces \|\|
	ElseBlockKind == IfStmtKind::IfOnly \|\|
	ElseBlockKind == IfStmtKind::IfElseIf;
	} else if (FollowedByIf && IfLBrace && !IfLBrace->Optional) {
	KeepElseBraces = true;
	assert(ElseLeftBrace->MatchingParen);
	markOptionalBraces(ElseLeftBrace);
	}
	addUnwrappedLine();
	} else if (!IsVerilogAssert && FormatTok->is(tok::kw_if)) {
	const FormatToken *Previous = Tokens->getPreviousToken();
	assert(Previous);
	const bool IsPrecededByComment = Previous->is(tok::comment);
	if (IsPrecededByComment) {
	addUnwrappedLine();
	++Line->Level;
	}
	bool TooDeep = true;
	if (Style.RemoveBracesLLVM) {
	Kind = IfStmtKind::IfElseIf;
	TooDeep = NestedTooDeep.pop_back_val();
	}
	ElseLeftBrace = parseIfThenElse(/IfKind=/nullptr, KeepIfBraces);
	if (Style.RemoveBracesLLVM)
	NestedTooDeep.push_back(TooDeep);
	if (IsPrecededByComment)
	--Line->Level;
	} else {
	parseUnbracedBody(/CheckEOF=/true);
	}
	} else {
	KeepIfBraces = KeepIfBraces \|\| IfBlockKind == IfStmtKind::IfElse;
	if (NeedsUnwrappedLine)
	addUnwrappedLine();
	}

	if (!Style.RemoveBracesLLVM)
	return nullptr;

	assert(!NestedTooDeep.empty());
	KeepElseBraces = KeepElseBraces \|\|
	(ElseLeftBrace && !ElseLeftBrace->MatchingParen) \|\|
	NestedTooDeep.back();

	NestedTooDeep.pop_back();

	if (!KeepIfBraces && !KeepElseBraces) {
	markOptionalBraces(IfLeftBrace);
	markOptionalBraces(ElseLeftBrace);
	} else if (IfLeftBrace) {
	FormatToken *IfRightBrace = IfLeftBrace->MatchingParen;
	if (IfRightBrace) {
	assert(IfRightBrace->MatchingParen == IfLeftBrace);
	assert(!IfLeftBrace->Optional);
	assert(!IfRightBrace->Optional);
	IfLeftBrace->MatchingParen = nullptr;
	IfRightBrace->MatchingParen = nullptr;
	}
	}

	if (IfKind)
	*IfKind = Kind;

	return IfLeftBrace;
	}

	void UnwrappedLineParser::parseTryCatch() {
	assert(FormatTok->isOneOf(tok::kw_try, tok::kw___try) && "'try' expected");
	nextToken();
	bool NeedsUnwrappedLine = false;
	bool HasCtorInitializer = false;
	if (FormatTok->is(tok::colon)) {
	auto *Colon = FormatTok;
	// We are in a function try block, what comes is an initializer list.
	nextToken();
	if (FormatTok->is(tok::identifier)) {
	HasCtorInitializer = true;
	Colon->setFinalizedType(TT_CtorInitializerColon);
	}

	// In case identifiers were removed by clang-tidy, what might follow is
	// multiple commas in sequence - before the first identifier.
	while (FormatTok->is(tok::comma))
	nextToken();

	while (FormatTok->is(tok::identifier)) {
	nextToken();
	if (FormatTok->is(tok::l_paren)) {
	parseParens();
	} else if (FormatTok->is(tok::l_brace)) {
	nextToken();
	parseBracedList();
	}

	// In case identifiers were removed by clang-tidy, what might follow is
	// multiple commas in sequence - after the first identifier.
	while (FormatTok->is(tok::comma))
	nextToken();
	}
	}
	// Parse try with resource.
	if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_paren))
	parseParens();

	keepAncestorBraces();

	if (FormatTok->is(tok::l_brace)) {
	if (HasCtorInitializer)
	FormatTok->setFinalizedType(TT_FunctionLBrace);
	CompoundStatementIndenter Indenter(this, Style, Line->Level);
	parseBlock();
	if (Style.BraceWrapping.BeforeCatch)
	addUnwrappedLine();
	else
	NeedsUnwrappedLine = true;
	} else if (FormatTok->isNot(tok::kw_catch)) {
	// The C++ standard requires a compound-statement after a try.
	// If there's none, we try to assume there's a structuralElement
	// and try to continue.
	addUnwrappedLine();
	++Line->Level;
	parseStructuralElement();
	--Line->Level;
	}
	while (true) {
	if (FormatTok->is(tok::at))
	nextToken();
	if (!(FormatTok->isOneOf(tok::kw_catch, Keywords.kw___except,
	tok::kw___finally) \|\|
	((Style.Language == FormatStyle::LK_Java \|\| Style.isJavaScript()) &&
	FormatTok->is(Keywords.kw_finally)) \|\|
	(FormatTok->isObjCAtKeyword(tok::objc_catch) \|\|
	FormatTok->isObjCAtKeyword(tok::objc_finally)))) {
	break;
	}
	nextToken();
	while (FormatTok->isNot(tok::l_brace)) {
	if (FormatTok->is(tok::l_paren)) {
	parseParens();
	continue;
	}
	if (FormatTok->isOneOf(tok::semi, tok::r_brace, tok::eof)) {
	if (Style.RemoveBracesLLVM)
	NestedTooDeep.pop_back();
	return;
	}
	nextToken();
	}
	NeedsUnwrappedLine = false;
	Line->MustBeDeclaration = false;
	CompoundStatementIndenter Indenter(this, Style, Line->Level);
	parseBlock();
	if (Style.BraceWrapping.BeforeCatch)
	addUnwrappedLine();
	else
	NeedsUnwrappedLine = true;
	}

	if (Style.RemoveBracesLLVM)
	NestedTooDeep.pop_back();

	if (NeedsUnwrappedLine)
	addUnwrappedLine();
	}

	void UnwrappedLineParser::parseNamespace() {
	assert(FormatTok->isOneOf(tok::kw_namespace, TT_NamespaceMacro) &&
	"'namespace' expected");

	const FormatToken &InitialToken = *FormatTok;
	nextToken();
	if (InitialToken.is(TT_NamespaceMacro)) {
	parseParens();
	} else {
	while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::kw_inline,
	tok::l_square, tok::period, tok::l_paren) \|\|
	(Style.isCSharp() && FormatTok->is(tok::kw_union))) {
	if (FormatTok->is(tok::l_square))
	parseSquare();
	else if (FormatTok->is(tok::l_paren))
	parseParens();
	else
	nextToken();
	}
	}
	if (FormatTok->is(tok::l_brace)) {
	FormatTok->setFinalizedType(TT_NamespaceLBrace);

	if (ShouldBreakBeforeBrace(Style, InitialToken))
	addUnwrappedLine();

	unsigned AddLevels =
	Style.NamespaceIndentation == FormatStyle::NI_All \|\|
	(Style.NamespaceIndentation == FormatStyle::NI_Inner &&
	DeclarationScopeStack.size() > 1)
	? 1u
	: 0u;
	bool ManageWhitesmithsBraces =
	AddLevels == 0u &&
	Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths;

	// If we're in Whitesmiths mode, indent the brace if we're not indenting
	// the whole block.
	if (ManageWhitesmithsBraces)
	++Line->Level;

	// Munch the semicolon after a namespace. This is more common than one would
	// think. Putting the semicolon into its own line is very ugly.
	parseBlock(/MustBeDeclaration=/true, AddLevels, /MunchSemi=/true,
	/KeepBraces=/true, /IfKind=/nullptr,
	ManageWhitesmithsBraces);

	addUnwrappedLine(AddLevels > 0 ? LineLevel::Remove : LineLevel::Keep);

	if (ManageWhitesmithsBraces)
	--Line->Level;
	}
	// FIXME: Add error handling.
	}

	void UnwrappedLineParser::parseNew() {
	assert(FormatTok->is(tok::kw_new) && "'new' expected");
	nextToken();

	if (Style.isCSharp()) {
	do {
	// Handle constructor invocation, e.g. `new(field: value)`.
	if (FormatTok->is(tok::l_paren))
	parseParens();

	// Handle array initialization syntax, e.g. `new[] {10, 20, 30}`.
	if (FormatTok->is(tok::l_brace))
	parseBracedList();

	if (FormatTok->isOneOf(tok::semi, tok::comma))
	return;

	nextToken();
	} while (!eof());
	}

	if (Style.Language != FormatStyle::LK_Java)
	return;

	// In Java, we can parse everything up to the parens, which aren't optional.
	do {
	// There should not be a ;, { or } before the new's open paren.
	if (FormatTok->isOneOf(tok::semi, tok::l_brace, tok::r_brace))
	return;

	// Consume the parens.
	if (FormatTok->is(tok::l_paren)) {
	parseParens();

	// If there is a class body of an anonymous class, consume that as child.
	if (FormatTok->is(tok::l_brace))
	parseChildBlock();
	return;
	}
	nextToken();
	} while (!eof());
	}

	void UnwrappedLineParser::parseLoopBody(bool KeepBraces, bool WrapRightBrace) {
	keepAncestorBraces();

	if (isBlockBegin(*FormatTok)) {
	FormatTok->setFinalizedType(TT_ControlStatementLBrace);
	FormatToken *LeftBrace = FormatTok;
	CompoundStatementIndenter Indenter(this, Style, Line->Level);
	parseBlock(/MustBeDeclaration=/false, /AddLevels=/1u,
	/MunchSemi=/true, KeepBraces);
	setPreviousRBraceType(TT_ControlStatementRBrace);
	if (!KeepBraces) {
	assert(!NestedTooDeep.empty());
	if (!NestedTooDeep.back())
	markOptionalBraces(LeftBrace);
	}
	if (WrapRightBrace)
	addUnwrappedLine();
	} else {
	parseUnbracedBody();
	}

	if (!KeepBraces)
	NestedTooDeep.pop_back();
	}

	void UnwrappedLineParser::parseForOrWhileLoop(bool HasParens) {
	assert((FormatTok->isOneOf(tok::kw_for, tok::kw_while, TT_ForEachMacro) \|\|
	(Style.isVerilog() &&
	FormatTok->isOneOf(Keywords.kw_always, Keywords.kw_always_comb,
	Keywords.kw_always_ff, Keywords.kw_always_latch,
	Keywords.kw_final, Keywords.kw_initial,
	Keywords.kw_foreach, Keywords.kw_forever,
	Keywords.kw_repeat))) &&
	"'for', 'while' or foreach macro expected");
	const bool KeepBraces = !Style.RemoveBracesLLVM \|\|
	!FormatTok->isOneOf(tok::kw_for, tok::kw_while);

	nextToken();
	// JS' for await ( ...
	if (Style.isJavaScript() && FormatTok->is(Keywords.kw_await))
	nextToken();
	if (IsCpp && FormatTok->is(tok::kw_co_await))
	nextToken();
	if (HasParens && FormatTok->is(tok::l_paren)) {
	// The type is only set for Verilog basically because we were afraid to
	// change the existing behavior for loops. See the discussion on D121756 for
	// details.
	if (Style.isVerilog())
	FormatTok->setFinalizedType(TT_ConditionLParen);
	parseParens();
	}

	if (Style.isVerilog()) {
	// Event control.
	parseVerilogSensitivityList();
	} else if (Style.AllowShortLoopsOnASingleLine && FormatTok->is(tok::semi) &&
	Tokens->getPreviousToken()->is(tok::r_paren)) {
	nextToken();
	addUnwrappedLine();
	return;
	}

	handleAttributes();
	parseLoopBody(KeepBraces, /WrapRightBrace=/true);
	}

	void UnwrappedLineParser::parseDoWhile() {
	assert(FormatTok->is(tok::kw_do) && "'do' expected");
	nextToken();

	parseLoopBody(/KeepBraces=/true, Style.BraceWrapping.BeforeWhile);

	// FIXME: Add error handling.
	if (FormatTok->isNot(tok::kw_while)) {
	addUnwrappedLine();
	return;
	}

	FormatTok->setFinalizedType(TT_DoWhile);

	// If in Whitesmiths mode, the line with the while() needs to be indented
	// to the same level as the block.
	if (Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths)
	++Line->Level;

	nextToken();
	parseStructuralElement();
	}

	void UnwrappedLineParser::parseLabel(bool LeftAlignLabel) {
	nextToken();
	unsigned OldLineLevel = Line->Level;

	if (LeftAlignLabel)
	Line->Level = 0;
	else if (Line->Level > 1 \|\| (!Line->InPPDirective && Line->Level > 0))
	--Line->Level;

	if (!Style.IndentCaseBlocks && CommentsBeforeNextToken.empty() &&
	FormatTok->is(tok::l_brace)) {

	CompoundStatementIndenter Indenter(this, Line->Level,
	Style.BraceWrapping.AfterCaseLabel,
	Style.BraceWrapping.IndentBraces);
	parseBlock();
	if (FormatTok->is(tok::kw_break)) {
	if (Style.BraceWrapping.AfterControlStatement ==
	FormatStyle::BWACS_Always) {
	addUnwrappedLine();
	if (!Style.IndentCaseBlocks &&
	Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths) {
	++Line->Level;
	}
	}
	parseStructuralElement();
	}
	addUnwrappedLine();
	} else {
	if (FormatTok->is(tok::semi))
	nextToken();
	addUnwrappedLine();
	}
	Line->Level = OldLineLevel;
	if (FormatTok->isNot(tok::l_brace)) {
	parseStructuralElement();
	addUnwrappedLine();
	}
	}

	void UnwrappedLineParser::parseCaseLabel() {
	assert(FormatTok->is(tok::kw_case) && "'case' expected");
	auto *Case = FormatTok;

	// FIXME: fix handling of complex expressions here.
	do {
	nextToken();
	if (FormatTok->is(tok::colon)) {
	FormatTok->setFinalizedType(TT_CaseLabelColon);
	break;
	}
	if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::arrow)) {
	FormatTok->setFinalizedType(TT_CaseLabelArrow);
	Case->setFinalizedType(TT_SwitchExpressionLabel);
	break;
	}
	} while (!eof());
	parseLabel();
	}

	void UnwrappedLineParser::parseSwitch(bool IsExpr) {
	assert(FormatTok->is(tok::kw_switch) && "'switch' expected");
	nextToken();
	if (FormatTok->is(tok::l_paren))
	parseParens();

	keepAncestorBraces();

	if (FormatTok->is(tok::l_brace)) {
	CompoundStatementIndenter Indenter(this, Style, Line->Level);
	FormatTok->setFinalizedType(IsExpr ? TT_SwitchExpressionLBrace
	: TT_ControlStatementLBrace);
	if (IsExpr)
	parseChildBlock();
	else
	parseBlock();
	setPreviousRBraceType(TT_ControlStatementRBrace);
	if (!IsExpr)
	addUnwrappedLine();
	} else {
	addUnwrappedLine();
	++Line->Level;
	parseStructuralElement();
	--Line->Level;
	}

	if (Style.RemoveBracesLLVM)
	NestedTooDeep.pop_back();
	}

	// Operators that can follow a C variable.
	static bool isCOperatorFollowingVar(tok::TokenKind Kind) {
	switch (Kind) {
	case tok::ampamp:
	case tok::ampequal:
	case tok::arrow:
	case tok::caret:
	case tok::caretequal:
	case tok::comma:
	case tok::ellipsis:
	case tok::equal:
	case tok::equalequal:
	case tok::exclaim:
	case tok::exclaimequal:
	case tok::greater:
	case tok::greaterequal:
	case tok::greatergreater:
	case tok::greatergreaterequal:
	case tok::l_paren:
	case tok::l_square:
	case tok::less:
	case tok::lessequal:
	case tok::lessless:
	case tok::lesslessequal:
	case tok::minus:
	case tok::minusequal:
	case tok::minusminus:
	case tok::percent:
	case tok::percentequal:
	case tok::period:
	case tok::pipe:
	case tok::pipeequal:
	case tok::pipepipe:
	case tok::plus:
	case tok::plusequal:
	case tok::plusplus:
	case tok::question:
	case tok::r_brace:
	case tok::r_paren:
	case tok::r_square:
	case tok::semi:
	case tok::slash:
	case tok::slashequal:
	case tok::star:
	case tok::starequal:
	return true;
	default:
	return false;
	}
	}

	void UnwrappedLineParser::parseAccessSpecifier() {
	FormatToken *AccessSpecifierCandidate = FormatTok;
	nextToken();
	// Understand Qt's slots.
	if (FormatTok->isOneOf(Keywords.kw_slots, Keywords.kw_qslots))
	nextToken();
	// Otherwise, we don't know what it is, and we'd better keep the next token.
	if (FormatTok->is(tok::colon)) {
	nextToken();
	addUnwrappedLine();
	} else if (FormatTok->isNot(tok::coloncolon) &&
	!isCOperatorFollowingVar(FormatTok->Tok.getKind())) {
	// Not a variable name nor namespace name.
	addUnwrappedLine();
	} else if (AccessSpecifierCandidate) {
	// Consider the access specifier to be a C identifier.
	AccessSpecifierCandidate->Tok.setKind(tok::identifier);
	}
	}

	/// \brief Parses a requires, decides if it is a clause or an expression.
	/// \pre The current token has to be the requires keyword.
	/// \returns true if it parsed a clause.
	bool UnwrappedLineParser::parseRequires() {
	assert(FormatTok->is(tok::kw_requires) && "'requires' expected");
	auto RequiresToken = FormatTok;

	// We try to guess if it is a requires clause, or a requires expression. For
	// that we first consume the keyword and check the next token.
	nextToken();

	switch (FormatTok->Tok.getKind()) {
	case tok::l_brace:
	// This can only be an expression, never a clause.
	parseRequiresExpression(RequiresToken);
	return false;
	case tok::l_paren:
	// Clauses and expression can start with a paren, it's unclear what we have.
	break;
	default:
	// All other tokens can only be a clause.
	parseRequiresClause(RequiresToken);
	return true;
	}

	// Looking forward we would have to decide if there are function declaration
	// like arguments to the requires expression:
	// requires (T t) {
	// Or there is a constraint expression for the requires clause:
	// requires (C<T> && ...

	// But first let's look behind.
	auto *PreviousNonComment = RequiresToken->getPreviousNonComment();

	if (!PreviousNonComment \|\|
	PreviousNonComment->is(TT_RequiresExpressionLBrace)) {
	// If there is no token, or an expression left brace, we are a requires
	// clause within a requires expression.
	parseRequiresClause(RequiresToken);
	return true;
	}

	switch (PreviousNonComment->Tok.getKind()) {
	case tok::greater:
	case tok::r_paren:
	case tok::kw_noexcept:
	case tok::kw_const:
	// This is a requires clause.
	parseRequiresClause(RequiresToken);
	return true;
	case tok::amp:
	case tok::ampamp: {
	// This can be either:
	// if (... && requires (T t) ...)
	// Or
	// void member(...) && requires (C<T> ...
	// We check the one token before that for a const:
	// void member(...) const && requires (C<T> ...
	auto PrevPrev = PreviousNonComment->getPreviousNonComment();
	if (PrevPrev && PrevPrev->is(tok::kw_const)) {
	parseRequiresClause(RequiresToken);
	return true;
	}
	break;
	}
	default:
	if (PreviousNonComment->isTypeOrIdentifier(LangOpts)) {
	// This is a requires clause.
	parseRequiresClause(RequiresToken);
	return true;
	}
	// It's an expression.
	parseRequiresExpression(RequiresToken);
	return false;
	}

	// Now we look forward and try to check if the paren content is a parameter
	// list. The parameters can be cv-qualified and contain references or
	// pointers.
	// So we want basically to check for TYPE NAME, but TYPE can contain all kinds
	// of stuff: typename, const, *, &, &&, ::, identifiers.

	unsigned StoredPosition = Tokens->getPosition();
	FormatToken *NextToken = Tokens->getNextToken();
	int Lookahead = 0;
	auto PeekNext = [&Lookahead, &NextToken, this] {
	++Lookahead;
	NextToken = Tokens->getNextToken();
	};

	bool FoundType = false;
	bool LastWasColonColon = false;
	int OpenAngles = 0;

	for (; Lookahead < 50; PeekNext()) {
	switch (NextToken->Tok.getKind()) {
	case tok::kw_volatile:
	case tok::kw_const:
	case tok::comma:
	if (OpenAngles == 0) {
	FormatTok = Tokens->setPosition(StoredPosition);
	parseRequiresExpression(RequiresToken);
	return false;
	}
	break;
	case tok::eof:
	// Break out of the loop.
	Lookahead = 50;
	break;
	case tok::coloncolon:
	LastWasColonColon = true;
	break;
	case tok::kw_decltype:
	case tok::identifier:
	if (FoundType && !LastWasColonColon && OpenAngles == 0) {
	FormatTok = Tokens->setPosition(StoredPosition);
	parseRequiresExpression(RequiresToken);
	return false;
	}
	FoundType = true;
	LastWasColonColon = false;
	break;
	case tok::less:
	++OpenAngles;
	break;
	case tok::greater:
	--OpenAngles;
	break;
	default:
	if (NextToken->isTypeName(LangOpts)) {
	FormatTok = Tokens->setPosition(StoredPosition);
	parseRequiresExpression(RequiresToken);
	return false;
	}
	break;
	}
	}
	// This seems to be a complicated expression, just assume it's a clause.
	FormatTok = Tokens->setPosition(StoredPosition);
	parseRequiresClause(RequiresToken);
	return true;
	}

	/// \brief Parses a requires clause.
	/// \param RequiresToken The requires keyword token, which starts this clause.
	/// \pre We need to be on the next token after the requires keyword.
	/// \sa parseRequiresExpression
	///
	/// Returns if it either has finished parsing the clause, or it detects, that
	/// the clause is incorrect.
	void UnwrappedLineParser::parseRequiresClause(FormatToken *RequiresToken) {
	assert(FormatTok->getPreviousNonComment() == RequiresToken);
	assert(RequiresToken->is(tok::kw_requires) && "'requires' expected");

	// If there is no previous token, we are within a requires expression,
	// otherwise we will always have the template or function declaration in front
	// of it.
	bool InRequiresExpression =
	!RequiresToken->Previous \|\|
	RequiresToken->Previous->is(TT_RequiresExpressionLBrace);

	RequiresToken->setFinalizedType(InRequiresExpression
	? TT_RequiresClauseInARequiresExpression
	: TT_RequiresClause);

	// NOTE: parseConstraintExpression is only ever called from this function.
	// It could be inlined into here.
	parseConstraintExpression();

	if (!InRequiresExpression)
	FormatTok->Previous->ClosesRequiresClause = true;
	}

	/// \brief Parses a requires expression.
	/// \param RequiresToken The requires keyword token, which starts this clause.
	/// \pre We need to be on the next token after the requires keyword.
	/// \sa parseRequiresClause
	///
	/// Returns if it either has finished parsing the expression, or it detects,
	/// that the expression is incorrect.
	void UnwrappedLineParser::parseRequiresExpression(FormatToken *RequiresToken) {
	assert(FormatTok->getPreviousNonComment() == RequiresToken);
	assert(RequiresToken->is(tok::kw_requires) && "'requires' expected");

	RequiresToken->setFinalizedType(TT_RequiresExpression);

	if (FormatTok->is(tok::l_paren)) {
	FormatTok->setFinalizedType(TT_RequiresExpressionLParen);
	parseParens();
	}

	if (FormatTok->is(tok::l_brace)) {
	FormatTok->setFinalizedType(TT_RequiresExpressionLBrace);
	parseChildBlock();
	}
	}

	/// \brief Parses a constraint expression.
	///
	/// This is the body of a requires clause. It returns, when the parsing is
	/// complete, or the expression is incorrect.
	void UnwrappedLineParser::parseConstraintExpression() {
	// The special handling for lambdas is needed since tryToParseLambda() eats a
	// token and if a requires expression is the last part of a requires clause
	// and followed by an attribute like [[nodiscard]] the ClosesRequiresClause is
	// not set on the correct token. Thus we need to be aware if we even expect a
	// lambda to be possible.
	// template <typename T> requires requires { ... } [[nodiscard]] ...;
	bool LambdaNextTimeAllowed = true;

	// Within lambda declarations, it is permitted to put a requires clause after
	// its template parameter list, which would place the requires clause right
	// before the parentheses of the parameters of the lambda declaration. Thus,
	// we track if we expect to see grouping parentheses at all.
	// Without this check, `requires foo<T> (T t)` in the below example would be
	// seen as the whole requires clause, accidentally eating the parameters of
	// the lambda.
	// [&]<typename T> requires foo<T> (T t) { ... };
	bool TopLevelParensAllowed = true;

	do {
	bool LambdaThisTimeAllowed = std::exchange(LambdaNextTimeAllowed, false);

	switch (FormatTok->Tok.getKind()) {
	case tok::kw_requires: {
	auto RequiresToken = FormatTok;
	nextToken();
	parseRequiresExpression(RequiresToken);
	break;
	}

	case tok::l_paren:
	if (!TopLevelParensAllowed)
	return;
	parseParens(/AmpAmpTokenType=/TT_BinaryOperator);
	TopLevelParensAllowed = false;
	break;

	case tok::l_square:
	if (!LambdaThisTimeAllowed \|\| !tryToParseLambda())
	return;
	break;

	case tok::kw_const:
	case tok::semi:
	case tok::kw_class:
	case tok::kw_struct:
	case tok::kw_union:
	return;

	case tok::l_brace:
	// Potential function body.
	return;

	case tok::ampamp:
	case tok::pipepipe:
	FormatTok->setFinalizedType(TT_BinaryOperator);
	nextToken();
	LambdaNextTimeAllowed = true;
	TopLevelParensAllowed = true;
	break;

	case tok::comma:
	case tok::comment:
	LambdaNextTimeAllowed = LambdaThisTimeAllowed;
	nextToken();
	break;

	case tok::kw_sizeof:
	case tok::greater:
	case tok::greaterequal:
	case tok::greatergreater:
	case tok::less:
	case tok::lessequal:
	case tok::lessless:
	case tok::equalequal:
	case tok::exclaim:
	case tok::exclaimequal:
	case tok::plus:
	case tok::minus:
	case tok::star:
	case tok::slash:
	LambdaNextTimeAllowed = true;
	TopLevelParensAllowed = true;
	// Just eat them.
	nextToken();
	break;

	case tok::numeric_constant:
	case tok::coloncolon:
	case tok::kw_true:
	case tok::kw_false:
	TopLevelParensAllowed = false;
	// Just eat them.
	nextToken();
	break;

	case tok::kw_static_cast:
	case tok::kw_const_cast:
	case tok::kw_reinterpret_cast:
	case tok::kw_dynamic_cast:
	nextToken();
	if (FormatTok->isNot(tok::less))
	return;

	nextToken();
	parseBracedList(/IsAngleBracket=/true);
	break;

	default:
	if (!FormatTok->Tok.getIdentifierInfo()) {
	// Identifiers are part of the default case, we check for more then
	// tok::identifier to handle builtin type traits.
	return;
	}

	// We need to differentiate identifiers for a template deduction guide,
	// variables, or function return types (the constraint expression has
	// ended before that), and basically all other cases. But it's easier to
	// check the other way around.
	assert(FormatTok->Previous);
	switch (FormatTok->Previous->Tok.getKind()) {
	case tok::coloncolon: // Nested identifier.
	case tok::ampamp: // Start of a function or variable for the
	case tok::pipepipe: // constraint expression. (binary)
	case tok::exclaim: // The same as above, but unary.
	case tok::kw_requires: // Initial identifier of a requires clause.
	case tok::equal: // Initial identifier of a concept declaration.
	break;
	default:
	return;
	}

	// Read identifier with optional template declaration.
	nextToken();
	if (FormatTok->is(tok::less)) {
	nextToken();
	parseBracedList(/IsAngleBracket=/true);
	}
	TopLevelParensAllowed = false;
	break;
	}
	} while (!eof());
	}

	bool UnwrappedLineParser::parseEnum() {
	const FormatToken &InitialToken = *FormatTok;

	// Won't be 'enum' for NS_ENUMs.
	if (FormatTok->is(tok::kw_enum))
	nextToken();

	// In TypeScript, "enum" can also be used as property name, e.g. in interface
	// declarations. An "enum" keyword followed by a colon would be a syntax
	// error and thus assume it is just an identifier.
	if (Style.isJavaScript() && FormatTok->isOneOf(tok::colon, tok::question))
	return false;

	// In protobuf, "enum" can be used as a field name.
	if (Style.Language == FormatStyle::LK_Proto && FormatTok->is(tok::equal))
	return false;

	if (IsCpp) {
	// Eat up enum class ...
	if (FormatTok->isOneOf(tok::kw_class, tok::kw_struct))
	nextToken();
	while (FormatTok->is(tok::l_square))
	if (!handleCppAttributes())
	return false;
	}

	while (FormatTok->Tok.getIdentifierInfo() \|\|
	FormatTok->isOneOf(tok::colon, tok::coloncolon, tok::less,
	tok::greater, tok::comma, tok::question,
	tok::l_square)) {
	if (Style.isVerilog()) {
	FormatTok->setFinalizedType(TT_VerilogDimensionedTypeName);
	nextToken();
	// In Verilog the base type can have dimensions.
	while (FormatTok->is(tok::l_square))
	parseSquare();
	} else {
	nextToken();
	}
	// We can have macros or attributes in between 'enum' and the enum name.
	if (FormatTok->is(tok::l_paren))
	parseParens();
	if (FormatTok->is(tok::identifier)) {
	nextToken();
	// If there are two identifiers in a row, this is likely an elaborate
	// return type. In Java, this can be "implements", etc.
	if (IsCpp && FormatTok->is(tok::identifier))
	return false;
	}
	}

	// Just a declaration or something is wrong.
	if (FormatTok->isNot(tok::l_brace))
	return true;
	FormatTok->setFinalizedType(TT_EnumLBrace);
	FormatTok->setBlockKind(BK_Block);

	if (Style.Language == FormatStyle::LK_Java) {
	// Java enums are different.
	parseJavaEnumBody();
	return true;
	}
	if (Style.Language == FormatStyle::LK_Proto) {
	parseBlock(/MustBeDeclaration=/true);
	return true;
	}

	if (!Style.AllowShortEnumsOnASingleLine &&
	ShouldBreakBeforeBrace(Style, InitialToken)) {
	addUnwrappedLine();
	}
	// Parse enum body.
	nextToken();
	if (!Style.AllowShortEnumsOnASingleLine) {
	addUnwrappedLine();
	Line->Level += 1;
	}
	bool HasError = !parseBracedList(/IsAngleBracket=/false, /IsEnum=/true);
	if (!Style.AllowShortEnumsOnASingleLine)
	Line->Level -= 1;
	if (HasError) {
	if (FormatTok->is(tok::semi))
	nextToken();
	addUnwrappedLine();
	}
	setPreviousRBraceType(TT_EnumRBrace);
	return true;

	// There is no addUnwrappedLine() here so that we fall through to parsing a
	// structural element afterwards. Thus, in "enum A {} n, m;",
	// "} n, m;" will end up in one unwrapped line.
	}

	bool UnwrappedLineParser::parseStructLike() {
	// parseRecord falls through and does not yet add an unwrapped line as a
	// record declaration or definition can start a structural element.
	parseRecord();
	// This does not apply to Java, JavaScript and C#.
	if (Style.Language == FormatStyle::LK_Java \|\| Style.isJavaScript() \|\|
	Style.isCSharp()) {
	if (FormatTok->is(tok::semi))
	nextToken();
	addUnwrappedLine();
	return true;
	}
	return false;
	}

	namespace {
	// A class used to set and restore the Token position when peeking
	// ahead in the token source.
	class ScopedTokenPosition {
	unsigned StoredPosition;
	FormatTokenSource *Tokens;

	public:
	ScopedTokenPosition(FormatTokenSource *Tokens) : Tokens(Tokens) {
	assert(Tokens && "Tokens expected to not be null");
	StoredPosition = Tokens->getPosition();
	}

	~ScopedTokenPosition() { Tokens->setPosition(StoredPosition); }
	};
	} // namespace

	// Look to see if we have [[ by looking ahead, if
	// its not then rewind to the original position.
	bool UnwrappedLineParser::tryToParseSimpleAttribute() {
	ScopedTokenPosition AutoPosition(Tokens);
	FormatToken *Tok = Tokens->getNextToken();
	// We already read the first [ check for the second.
	if (Tok->isNot(tok::l_square))
	return false;
	// Double check that the attribute is just something
	// fairly simple.
	while (Tok->isNot(tok::eof)) {
	if (Tok->is(tok::r_square))
	break;
	Tok = Tokens->getNextToken();
	}
	if (Tok->is(tok::eof))
	return false;
	Tok = Tokens->getNextToken();
	if (Tok->isNot(tok::r_square))
	return false;
	Tok = Tokens->getNextToken();
	if (Tok->is(tok::semi))
	return false;
	return true;
	}

	void UnwrappedLineParser::parseJavaEnumBody() {
	assert(FormatTok->is(tok::l_brace));
	const FormatToken *OpeningBrace = FormatTok;

	// Determine whether the enum is simple, i.e. does not have a semicolon or
	// constants with class bodies. Simple enums can be formatted like braced
	// lists, contracted to a single line, etc.
	unsigned StoredPosition = Tokens->getPosition();
	bool IsSimple = true;
	FormatToken *Tok = Tokens->getNextToken();
	while (Tok->isNot(tok::eof)) {
	if (Tok->is(tok::r_brace))
	break;
	if (Tok->isOneOf(tok::l_brace, tok::semi)) {
	IsSimple = false;
	break;
	}
	// FIXME: This will also mark enums with braces in the arguments to enum
	// constants as "not simple". This is probably fine in practice, though.
	Tok = Tokens->getNextToken();
	}
	FormatTok = Tokens->setPosition(StoredPosition);

	if (IsSimple) {
	nextToken();
	parseBracedList();
	addUnwrappedLine();
	return;
	}

	// Parse the body of a more complex enum.
	// First add a line for everything up to the "{".
	nextToken();
	addUnwrappedLine();
	++Line->Level;

	// Parse the enum constants.
	while (!eof()) {
	if (FormatTok->is(tok::l_brace)) {
	// Parse the constant's class body.
	parseBlock(/MustBeDeclaration=/true, /AddLevels=/1u,
	/MunchSemi=/false);
	} else if (FormatTok->is(tok::l_paren)) {
	parseParens();
	} else if (FormatTok->is(tok::comma)) {
	nextToken();
	addUnwrappedLine();
	} else if (FormatTok->is(tok::semi)) {
	nextToken();
	addUnwrappedLine();
	break;
	} else if (FormatTok->is(tok::r_brace)) {
	addUnwrappedLine();
	break;
	} else {
	nextToken();
	}
	}

	// Parse the class body after the enum's ";" if any.
	parseLevel(OpeningBrace);
	nextToken();
	--Line->Level;
	addUnwrappedLine();
	}

	void UnwrappedLineParser::parseRecord(bool ParseAsExpr) {
	const FormatToken &InitialToken = *FormatTok;
	nextToken();

	const FormatToken *ClassName = nullptr;
	bool IsDerived = false;
	auto IsNonMacroIdentifier = [](const FormatToken *Tok) {
	return Tok->is(tok::identifier) && Tok->TokenText != Tok->TokenText.upper();
	};
	// JavaScript/TypeScript supports anonymous classes like:
	// a = class extends foo { }
	bool JSPastExtendsOrImplements = false;
	// The actual identifier can be a nested name specifier, and in macros
	// it is often token-pasted.
	// An [[attribute]] can be before the identifier.
	while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::hashhash,
	tok::kw_alignas, tok::l_square) \|\|
	FormatTok->isAttribute() \|\|
	((Style.Language == FormatStyle::LK_Java \|\| Style.isJavaScript()) &&
	FormatTok->isOneOf(tok::period, tok::comma))) {
	if (Style.isJavaScript() &&
	FormatTok->isOneOf(Keywords.kw_extends, Keywords.kw_implements)) {
	JSPastExtendsOrImplements = true;
	// JavaScript/TypeScript supports inline object types in
	// extends/implements positions:
	// class Foo implements {bar: number} { }
	nextToken();
	if (FormatTok->is(tok::l_brace)) {
	tryToParseBracedList();
	continue;
	}
	}
	if (FormatTok->is(tok::l_square) && handleCppAttributes())
	continue;
	const auto *Previous = FormatTok;
	nextToken();
	switch (FormatTok->Tok.getKind()) {
	case tok::l_paren:
	// We can have macros in between 'class' and the class name.
	if (!IsNonMacroIdentifier(Previous) \|\|
	// e.g. `struct macro(a) S { int i; };`
	Previous->Previous == &InitialToken) {
	parseParens();
	}
	break;
	case tok::coloncolon:
	case tok::hashhash:
	break;
	default:
	if (!JSPastExtendsOrImplements && !ClassName &&
	Previous->is(tok::identifier) && Previous->isNot(TT_AttributeMacro)) {
	ClassName = Previous;
	}
	}
	}

	auto IsListInitialization = [&] {
	if (!ClassName \|\| IsDerived)
	return false;
	assert(FormatTok->is(tok::l_brace));
	const auto *Prev = FormatTok->getPreviousNonComment();
	assert(Prev);
	return Prev != ClassName && Prev->is(tok::identifier) &&
	Prev->isNot(Keywords.kw_final) && tryToParseBracedList();
	};

	if (FormatTok->isOneOf(tok::colon, tok::less)) {
	int AngleNestingLevel = 0;
	do {
	if (FormatTok->is(tok::less))
	++AngleNestingLevel;
	else if (FormatTok->is(tok::greater))
	--AngleNestingLevel;

	if (AngleNestingLevel == 0) {
	if (FormatTok->is(tok::colon)) {
	IsDerived = true;
	} else if (FormatTok->is(tok::identifier) &&
	FormatTok->Previous->is(tok::coloncolon)) {
	ClassName = FormatTok;
	} else if (FormatTok->is(tok::l_paren) &&
	IsNonMacroIdentifier(FormatTok->Previous)) {
	break;
	}
	}
	if (FormatTok->is(tok::l_brace)) {
	if (AngleNestingLevel == 0 && IsListInitialization())
	return;
	calculateBraceTypes(/ExpectClassBody=/true);
	if (!tryToParseBracedList())
	break;
	}
	if (FormatTok->is(tok::l_square)) {
	FormatToken *Previous = FormatTok->Previous;
	if (!Previous \|\| (Previous->isNot(tok::r_paren) &&
	!Previous->isTypeOrIdentifier(LangOpts))) {
	// Don't try parsing a lambda if we had a closing parenthesis before,
	// it was probably a pointer to an array: int (*)[].
	if (!tryToParseLambda())
	continue;
	} else {
	parseSquare();
	continue;
	}
	}
	if (FormatTok->is(tok::semi))
	return;
	if (Style.isCSharp() && FormatTok->is(Keywords.kw_where)) {
	addUnwrappedLine();
	nextToken();
	parseCSharpGenericTypeConstraint();
	break;
	}
	nextToken();
	} while (!eof());
	}

	auto GetBraceTypes =
	[](const FormatToken &RecordTok) -> std::pair<TokenType, TokenType> {
	switch (RecordTok.Tok.getKind()) {
	case tok::kw_class:
	return {TT_ClassLBrace, TT_ClassRBrace};
	case tok::kw_struct:
	return {TT_StructLBrace, TT_StructRBrace};
	case tok::kw_union:
	return {TT_UnionLBrace, TT_UnionRBrace};
	default:
	// Useful for e.g. interface.
	return {TT_RecordLBrace, TT_RecordRBrace};
	}
	};
	if (FormatTok->is(tok::l_brace)) {
	if (IsListInitialization())
	return;
	auto [OpenBraceType, ClosingBraceType] = GetBraceTypes(InitialToken);
	FormatTok->setFinalizedType(OpenBraceType);
	if (ParseAsExpr) {
	parseChildBlock();
	} else {
	if (ShouldBreakBeforeBrace(Style, InitialToken))
	addUnwrappedLine();

	unsigned AddLevels = Style.IndentAccessModifiers ? 2u : 1u;
	parseBlock(/MustBeDeclaration=/true, AddLevels, /MunchSemi=/false);
	}
	setPreviousRBraceType(ClosingBraceType);
	}
	// There is no addUnwrappedLine() here so that we fall through to parsing a
	// structural element afterwards. Thus, in "class A {} n, m;",
	// "} n, m;" will end up in one unwrapped line.
	}

	void UnwrappedLineParser::parseObjCMethod() {
	assert(FormatTok->isOneOf(tok::l_paren, tok::identifier) &&
	"'(' or identifier expected.");
	do {
	if (FormatTok->is(tok::semi)) {
	nextToken();
	addUnwrappedLine();
	return;
	} else if (FormatTok->is(tok::l_brace)) {
	if (Style.BraceWrapping.AfterFunction)
	addUnwrappedLine();
	parseBlock();
	addUnwrappedLine();
	return;
	} else {
	nextToken();
	}
	} while (!eof());
	}

	void UnwrappedLineParser::parseObjCProtocolList() {
	assert(FormatTok->is(tok::less) && "'<' expected.");
	do {
	nextToken();
	// Early exit in case someone forgot a close angle.
	if (FormatTok->isOneOf(tok::semi, tok::l_brace) \|\|
	FormatTok->isObjCAtKeyword(tok::objc_end)) {
	return;
	}
	} while (!eof() && FormatTok->isNot(tok::greater));
	nextToken(); // Skip '>'.
	}

	void UnwrappedLineParser::parseObjCUntilAtEnd() {
	do {
	if (FormatTok->isObjCAtKeyword(tok::objc_end)) {
	nextToken();
	addUnwrappedLine();
	break;
	}
	if (FormatTok->is(tok::l_brace)) {
	parseBlock();
	// In ObjC interfaces, nothing should be following the "}".
	addUnwrappedLine();
	} else if (FormatTok->is(tok::r_brace)) {
	// Ignore stray "}". parseStructuralElement doesn't consume them.
	nextToken();
	addUnwrappedLine();
	} else if (FormatTok->isOneOf(tok::minus, tok::plus)) {
	nextToken();
	parseObjCMethod();
	} else {
	parseStructuralElement();
	}
	} while (!eof());
	}

	void UnwrappedLineParser::parseObjCInterfaceOrImplementation() {
	assert(FormatTok->Tok.getObjCKeywordID() == tok::objc_interface \|\|
	FormatTok->Tok.getObjCKeywordID() == tok::objc_implementation);
	nextToken();
	nextToken(); // interface name

	// @interface can be followed by a lightweight generic
	// specialization list, then either a base class or a category.
	if (FormatTok->is(tok::less))
	parseObjCLightweightGenerics();
	if (FormatTok->is(tok::colon)) {
	nextToken();
	nextToken(); // base class name
	// The base class can also have lightweight generics applied to it.
	if (FormatTok->is(tok::less))
	parseObjCLightweightGenerics();
	} else if (FormatTok->is(tok::l_paren)) {
	// Skip category, if present.
	parseParens();
	}

	if (FormatTok->is(tok::less))
	parseObjCProtocolList();

	if (FormatTok->is(tok::l_brace)) {
	if (Style.BraceWrapping.AfterObjCDeclaration)
	addUnwrappedLine();
	parseBlock(/MustBeDeclaration=/true);
	}

	// With instance variables, this puts '}' on its own line. Without instance
	// variables, this ends the @interface line.
	addUnwrappedLine();

	parseObjCUntilAtEnd();
	}

	void UnwrappedLineParser::parseObjCLightweightGenerics() {
	assert(FormatTok->is(tok::less));
	// Unlike protocol lists, generic parameterizations support
	// nested angles:
	//
	// @interface Foo<ValueType : id <NSCopying, NSSecureCoding>> :
	// NSObject <NSCopying, NSSecureCoding>
	//
	// so we need to count how many open angles we have left.
	unsigned NumOpenAngles = 1;
	do {
	nextToken();
	// Early exit in case someone forgot a close angle.
	if (FormatTok->isOneOf(tok::semi, tok::l_brace) \|\|
	FormatTok->isObjCAtKeyword(tok::objc_end)) {
	break;
	}
	if (FormatTok->is(tok::less)) {
	++NumOpenAngles;
	} else if (FormatTok->is(tok::greater)) {
	assert(NumOpenAngles > 0 && "'>' makes NumOpenAngles negative");
	--NumOpenAngles;
	}
	} while (!eof() && NumOpenAngles != 0);
	nextToken(); // Skip '>'.
	}

	// Returns true for the declaration/definition form of @protocol,
	// false for the expression form.
	bool UnwrappedLineParser::parseObjCProtocol() {
	assert(FormatTok->Tok.getObjCKeywordID() == tok::objc_protocol);
	nextToken();

	if (FormatTok->is(tok::l_paren)) {
	// The expression form of @protocol, e.g. "Protocol* p = @protocol(foo);".
	return false;
	}

	// The definition/declaration form,
	// @protocol Foo
	// - (int)someMethod;
	// @end

	nextToken(); // protocol name

	if (FormatTok->is(tok::less))
	parseObjCProtocolList();

	// Check for protocol declaration.
	if (FormatTok->is(tok::semi)) {
	nextToken();
	addUnwrappedLine();
	return true;
	}

	addUnwrappedLine();
	parseObjCUntilAtEnd();
	return true;
	}

	void UnwrappedLineParser::parseJavaScriptEs6ImportExport() {
	bool IsImport = FormatTok->is(Keywords.kw_import);
	assert(IsImport \|\| FormatTok->is(tok::kw_export));
	nextToken();

	// Consume the "default" in "export default class/function".
	if (FormatTok->is(tok::kw_default))
	nextToken();

	// Consume "async function", "function" and "default function", so that these
	// get parsed as free-standing JS functions, i.e. do not require a trailing
	// semicolon.
	if (FormatTok->is(Keywords.kw_async))
	nextToken();
	if (FormatTok->is(Keywords.kw_function)) {
	nextToken();
	return;
	}

	// For imports, `export *`, `export {...}`, consume the rest of the line up
	// to the terminating `;`. For everything else, just return and continue
	// parsing the structural element, i.e. the declaration or expression for
	// `export default`.
	if (!IsImport && !FormatTok->isOneOf(tok::l_brace, tok::star) &&
	!FormatTok->isStringLiteral() &&
	!(FormatTok->is(Keywords.kw_type) &&
	Tokens->peekNextToken()->isOneOf(tok::l_brace, tok::star))) {
	return;
	}

	while (!eof()) {
	if (FormatTok->is(tok::semi))
	return;
	if (Line->Tokens.empty()) {
	// Common issue: Automatic Semicolon Insertion wrapped the line, so the
	// import statement should terminate.
	return;
	}
	if (FormatTok->is(tok::l_brace)) {
	FormatTok->setBlockKind(BK_Block);
	nextToken();
	parseBracedList();
	} else {
	nextToken();
	}
	}
	}

	void UnwrappedLineParser::parseStatementMacro() {
	nextToken();
	if (FormatTok->is(tok::l_paren))
	parseParens();
	if (FormatTok->is(tok::semi))
	nextToken();
	addUnwrappedLine();
	}

	void UnwrappedLineParser::parseVerilogHierarchyIdentifier() {
	// consume things like a::`b.c[d:e] or a::*
	while (true) {
	if (FormatTok->isOneOf(tok::star, tok::period, tok::periodstar,
	tok::coloncolon, tok::hash) \|\|
	Keywords.isVerilogIdentifier(*FormatTok)) {
	nextToken();
	} else if (FormatTok->is(tok::l_square)) {
	parseSquare();
	} else {
	break;
	}
	}
	}

	void UnwrappedLineParser::parseVerilogSensitivityList() {
	if (FormatTok->isNot(tok::at))
	return;
	nextToken();
	// A block event expression has 2 at signs.
	if (FormatTok->is(tok::at))
	nextToken();
	switch (FormatTok->Tok.getKind()) {
	case tok::star:
	nextToken();
	break;
	case tok::l_paren:
	parseParens();
	break;
	default:
	parseVerilogHierarchyIdentifier();
	break;
	}
	}

	unsigned UnwrappedLineParser::parseVerilogHierarchyHeader() {
	unsigned AddLevels = 0;

	if (FormatTok->is(Keywords.kw_clocking)) {
	nextToken();
	if (Keywords.isVerilogIdentifier(*FormatTok))
	nextToken();
	parseVerilogSensitivityList();
	if (FormatTok->is(tok::semi))
	nextToken();
	} else if (FormatTok->isOneOf(tok::kw_case, Keywords.kw_casex,
	Keywords.kw_casez, Keywords.kw_randcase,
	Keywords.kw_randsequence)) {
	if (Style.IndentCaseLabels)
	AddLevels++;
	nextToken();
	if (FormatTok->is(tok::l_paren)) {
	FormatTok->setFinalizedType(TT_ConditionLParen);
	parseParens();
	}
	if (FormatTok->isOneOf(Keywords.kw_inside, Keywords.kw_matches))
	nextToken();
	// The case header has no semicolon.
	} else {
	// "module" etc.
	nextToken();
	// all the words like the name of the module and specifiers like
	// "automatic" and the width of function return type
	while (true) {
	if (FormatTok->is(tok::l_square)) {
	auto Prev = FormatTok->getPreviousNonComment();
	if (Prev && Keywords.isVerilogIdentifier(*Prev))
	Prev->setFinalizedType(TT_VerilogDimensionedTypeName);
	parseSquare();
	} else if (Keywords.isVerilogIdentifier(*FormatTok) \|\|
	FormatTok->isOneOf(Keywords.kw_automatic, tok::kw_static)) {
	nextToken();
	} else {
	break;
	}
	}

	auto NewLine = [this]() {
	addUnwrappedLine();
	Line->IsContinuation = true;
	};

	// package imports
	while (FormatTok->is(Keywords.kw_import)) {
	NewLine();
	nextToken();
	parseVerilogHierarchyIdentifier();
	if (FormatTok->is(tok::semi))
	nextToken();
	}

	// parameters and ports
	if (FormatTok->is(Keywords.kw_verilogHash)) {
	NewLine();
	nextToken();
	if (FormatTok->is(tok::l_paren)) {
	FormatTok->setFinalizedType(TT_VerilogMultiLineListLParen);
	parseParens();
	}
	}
	if (FormatTok->is(tok::l_paren)) {
	NewLine();
	FormatTok->setFinalizedType(TT_VerilogMultiLineListLParen);
	parseParens();
	}

	// extends and implements
	if (FormatTok->is(Keywords.kw_extends)) {
	NewLine();
	nextToken();
	parseVerilogHierarchyIdentifier();
	if (FormatTok->is(tok::l_paren))
	parseParens();
	}
	if (FormatTok->is(Keywords.kw_implements)) {
	NewLine();
	do {
	nextToken();
	parseVerilogHierarchyIdentifier();
	} while (FormatTok->is(tok::comma));
	}

	// Coverage event for cover groups.
	if (FormatTok->is(tok::at)) {
	NewLine();
	parseVerilogSensitivityList();
	}

	if (FormatTok->is(tok::semi))
	nextToken(/LevelDifference=/1);
	addUnwrappedLine();
	}

	return AddLevels;
	}

	void UnwrappedLineParser::parseVerilogTable() {
	assert(FormatTok->is(Keywords.kw_table));
	nextToken(/LevelDifference=/1);
	addUnwrappedLine();

	auto InitialLevel = Line->Level++;
	while (!eof() && !Keywords.isVerilogEnd(*FormatTok)) {
	FormatToken *Tok = FormatTok;
	nextToken();
	if (Tok->is(tok::semi))
	addUnwrappedLine();
	else if (Tok->isOneOf(tok::star, tok::colon, tok::question, tok::minus))
	Tok->setFinalizedType(TT_VerilogTableItem);
	}
	Line->Level = InitialLevel;
	nextToken(/LevelDifference=/-1);
	addUnwrappedLine();
	}

	void UnwrappedLineParser::parseVerilogCaseLabel() {
	// The label will get unindented in AnnotatingParser. If there are no leading
	// spaces, indent the rest here so that things inside the block will be
	// indented relative to things outside. We don't use parseLabel because we
	// don't know whether this colon is a label or a ternary expression at this
	// point.
	auto OrigLevel = Line->Level;
	auto FirstLine = CurrentLines->size();
	if (Line->Level == 0 \|\| (Line->InPPDirective && Line->Level <= 1))
	++Line->Level;
	else if (!Style.IndentCaseBlocks && Keywords.isVerilogBegin(*FormatTok))
	--Line->Level;
	parseStructuralElement();
	// Restore the indentation in both the new line and the line that has the
	// label.
	if (CurrentLines->size() > FirstLine)
	(*CurrentLines)[FirstLine].Level = OrigLevel;
	Line->Level = OrigLevel;
	}

	bool UnwrappedLineParser::containsExpansion(const UnwrappedLine &Line) const {
	for (const auto &N : Line.Tokens) {
	if (N.Tok->MacroCtx)
	return true;
	for (const UnwrappedLine &Child : N.Children)
	if (containsExpansion(Child))
	return true;
	}
	return false;
	}

	void UnwrappedLineParser::addUnwrappedLine(LineLevel AdjustLevel) {
	if (Line->Tokens.empty())
	return;
	LLVM_DEBUG({
	if (!parsingPPDirective()) {
	llvm::dbgs() << "Adding unwrapped line:\n";
	printDebugInfo(*Line);
	}
	});

	// If this line closes a block when in Whitesmiths mode, remember that
	// information so that the level can be decreased after the line is added.
	// This has to happen after the addition of the line since the line itself
	// needs to be indented.
	bool ClosesWhitesmithsBlock =
	Line->MatchingOpeningBlockLineIndex != UnwrappedLine::kInvalidIndex &&
	Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths;

	// If the current line was expanded from a macro call, we use it to
	// reconstruct an unwrapped line from the structure of the expanded unwrapped
	// line and the unexpanded token stream.
	if (!parsingPPDirective() && !InExpansion && containsExpansion(*Line)) {
	if (!Reconstruct)
	Reconstruct.emplace(Line->Level, Unexpanded);
	Reconstruct->addLine(*Line);

	// While the reconstructed unexpanded lines are stored in the normal
	// flow of lines, the expanded lines are stored on the side to be analyzed
	// in an extra step.
	CurrentExpandedLines.push_back(std::move(*Line));

	if (Reconstruct->finished()) {
	UnwrappedLine Reconstructed = std::move(*Reconstruct).takeResult();
	assert(!Reconstructed.Tokens.empty() &&
	"Reconstructed must at least contain the macro identifier.");
	assert(!parsingPPDirective());
	LLVM_DEBUG({
	llvm::dbgs() << "Adding unexpanded line:\n";
	printDebugInfo(Reconstructed);
	});
	ExpandedLines[Reconstructed.Tokens.begin()->Tok] = CurrentExpandedLines;
	Lines.push_back(std::move(Reconstructed));
	CurrentExpandedLines.clear();
	Reconstruct.reset();
	}
	} else {
	// At the top level we only get here when no unexpansion is going on, or
	// when conditional formatting led to unfinished macro reconstructions.
	assert(!Reconstruct \|\| (CurrentLines != &Lines) \|\| PPStack.size() > 0);
	CurrentLines->push_back(std::move(*Line));
	}
	Line->Tokens.clear();
	Line->MatchingOpeningBlockLineIndex = UnwrappedLine::kInvalidIndex;
	Line->FirstStartColumn = 0;
	Line->IsContinuation = false;
	Line->SeenDecltypeAuto = false;

	if (ClosesWhitesmithsBlock && AdjustLevel == LineLevel::Remove)
	--Line->Level;
	if (!parsingPPDirective() && !PreprocessorDirectives.empty()) {
	CurrentLines->append(
	std::make_move_iterator(PreprocessorDirectives.begin()),
	std::make_move_iterator(PreprocessorDirectives.end()));
	PreprocessorDirectives.clear();
	}
	// Disconnect the current token from the last token on the previous line.
	FormatTok->Previous = nullptr;
	}

	bool UnwrappedLineParser::eof() const { return FormatTok->is(tok::eof); }

	bool UnwrappedLineParser::isOnNewLine(const FormatToken &FormatTok) {
	return (Line->InPPDirective \|\| FormatTok.HasUnescapedNewline) &&
	FormatTok.NewlinesBefore > 0;
	}

	// Checks if \p FormatTok is a line comment that continues the line comment
	// section on \p Line.
	static bool
	continuesLineCommentSection(const FormatToken &FormatTok,
	const UnwrappedLine &Line,
	const llvm::Regex &CommentPragmasRegex) {
	if (Line.Tokens.empty())
	return false;

	StringRef IndentContent = FormatTok.TokenText;
	if (FormatTok.TokenText.starts_with("//") \|\|
	FormatTok.TokenText.starts_with("/*")) {
	IndentContent = FormatTok.TokenText.substr(2);
	}
	if (CommentPragmasRegex.match(IndentContent))
	return false;

	// If Line starts with a line comment, then FormatTok continues the comment
	// section if its original column is greater or equal to the original start
	// column of the line.
	//
	// Define the min column token of a line as follows: if a line ends in '{' or
	// contains a '{' followed by a line comment, then the min column token is
	// that '{'. Otherwise, the min column token of the line is the first token of
	// the line.
	//
	// If Line starts with a token other than a line comment, then FormatTok
	// continues the comment section if its original column is greater than the
	// original start column of the min column token of the line.
	//
	// For example, the second line comment continues the first in these cases:
	//
	// // first line
	// // second line
	//
	// and:
	//
	// // first line
	// // second line
	//
	// and:
	//
	// int i; // first line
	// // second line
	//
	// and:
	//
	// do { // first line
	// // second line
	// int i;
	// } while (true);
	//
	// and:
	//
	// enum {
	// a, // first line
	// // second line
	// b
	// };
	//
	// The second line comment doesn't continue the first in these cases:
	//
	// // first line
	// // second line
	//
	// and:
	//
	// int i; // first line
	// // second line
	//
	// and:
	//
	// do { // first line
	// // second line
	// int i;
	// } while (true);
	//
	// and:
	//
	// enum {
	// a, // first line
	// // second line
	// };
	const FormatToken *MinColumnToken = Line.Tokens.front().Tok;

	// Scan for '{//'. If found, use the column of '{' as a min column for line
	// comment section continuation.
	const FormatToken *PreviousToken = nullptr;
	for (const UnwrappedLineNode &Node : Line.Tokens) {
	if (PreviousToken && PreviousToken->is(tok::l_brace) &&
	isLineComment(*Node.Tok)) {
	MinColumnToken = PreviousToken;
	break;
	}
	PreviousToken = Node.Tok;

	// Grab the last newline preceding a token in this unwrapped line.
	if (Node.Tok->NewlinesBefore > 0)
	MinColumnToken = Node.Tok;
	}
	if (PreviousToken && PreviousToken->is(tok::l_brace))
	MinColumnToken = PreviousToken;

	return continuesLineComment(FormatTok, /Previous=/Line.Tokens.back().Tok,
	MinColumnToken);
	}

	void UnwrappedLineParser::flushComments(bool NewlineBeforeNext) {
	bool JustComments = Line->Tokens.empty();
	for (FormatToken *Tok : CommentsBeforeNextToken) {
	// Line comments that belong to the same line comment section are put on the
	// same line since later we might want to reflow content between them.
	// Additional fine-grained breaking of line comment sections is controlled
	// by the class BreakableLineCommentSection in case it is desirable to keep
	// several line comment sections in the same unwrapped line.
	//
	// FIXME: Consider putting separate line comment sections as children to the
	// unwrapped line instead.
	Tok->ContinuesLineCommentSection =
	continuesLineCommentSection(Tok, Line, CommentPragmasRegex);
	if (isOnNewLine(*Tok) && JustComments && !Tok->ContinuesLineCommentSection)
	addUnwrappedLine();
	pushToken(Tok);
	}
	if (NewlineBeforeNext && JustComments)
	addUnwrappedLine();
	CommentsBeforeNextToken.clear();
	}

	void UnwrappedLineParser::nextToken(int LevelDifference) {
	if (eof())
	return;
	flushComments(isOnNewLine(*FormatTok));
	pushToken(FormatTok);
	FormatToken *Previous = FormatTok;
	if (!Style.isJavaScript())
	readToken(LevelDifference);
	else
	readTokenWithJavaScriptASI();
	FormatTok->Previous = Previous;
	if (Style.isVerilog()) {
	// Blocks in Verilog can have `begin` and `end` instead of braces. For
	// keywords like `begin`, we can't treat them the same as left braces
	// because some contexts require one of them. For example structs use
	// braces and if blocks use keywords, and a left brace can occur in an if
	// statement, but it is not a block. For keywords like `end`, we simply
	// treat them the same as right braces.
	if (Keywords.isVerilogEnd(*FormatTok))
	FormatTok->Tok.setKind(tok::r_brace);
	}
	}

	void UnwrappedLineParser::distributeComments(
	const SmallVectorImpl<FormatToken *> &Comments,
	const FormatToken *NextTok) {
	// Whether or not a line comment token continues a line is controlled by
	// the method continuesLineCommentSection, with the following caveat:
	//
	// Define a trail of Comments to be a nonempty proper postfix of Comments such
	// that each comment line from the trail is aligned with the next token, if
	// the next token exists. If a trail exists, the beginning of the maximal
	// trail is marked as a start of a new comment section.
	//
	// For example in this code:
	//
	// int a; // line about a
	// // line 1 about b
	// // line 2 about b
	// int b;
	//
	// the two lines about b form a maximal trail, so there are two sections, the
	// first one consisting of the single comment "// line about a" and the
	// second one consisting of the next two comments.
	if (Comments.empty())
	return;
	bool ShouldPushCommentsInCurrentLine = true;
	bool HasTrailAlignedWithNextToken = false;
	unsigned StartOfTrailAlignedWithNextToken = 0;
	if (NextTok) {
	// We are skipping the first element intentionally.
	for (unsigned i = Comments.size() - 1; i > 0; --i) {
	if (Comments[i]->OriginalColumn == NextTok->OriginalColumn) {
	HasTrailAlignedWithNextToken = true;
	StartOfTrailAlignedWithNextToken = i;
	}
	}
	}
	for (unsigned i = 0, e = Comments.size(); i < e; ++i) {
	FormatToken *FormatTok = Comments[i];
	if (HasTrailAlignedWithNextToken && i == StartOfTrailAlignedWithNextToken) {
	FormatTok->ContinuesLineCommentSection = false;
	} else {
	FormatTok->ContinuesLineCommentSection =
	continuesLineCommentSection(FormatTok, Line, CommentPragmasRegex);
	}
	if (!FormatTok->ContinuesLineCommentSection &&
	(isOnNewLine(*FormatTok) \|\| FormatTok->IsFirst)) {
	ShouldPushCommentsInCurrentLine = false;
	}
	if (ShouldPushCommentsInCurrentLine)
	pushToken(FormatTok);
	else
	CommentsBeforeNextToken.push_back(FormatTok);
	}
	}

	void UnwrappedLineParser::readToken(int LevelDifference) {
	SmallVector<FormatToken *, 1> Comments;
	bool PreviousWasComment = false;
	bool FirstNonCommentOnLine = false;
	do {
	FormatTok = Tokens->getNextToken();
	assert(FormatTok);
	while (FormatTok->isOneOf(TT_ConflictStart, TT_ConflictEnd,
	TT_ConflictAlternative)) {
	if (FormatTok->is(TT_ConflictStart))
	conditionalCompilationStart(/Unreachable=/false);
	else if (FormatTok->is(TT_ConflictAlternative))
	conditionalCompilationAlternative();
	else if (FormatTok->is(TT_ConflictEnd))
	conditionalCompilationEnd();
	FormatTok = Tokens->getNextToken();
	FormatTok->MustBreakBefore = true;
	FormatTok->MustBreakBeforeFinalized = true;
	}

	auto IsFirstNonCommentOnLine = [](bool FirstNonCommentOnLine,
	const FormatToken &Tok,
	bool PreviousWasComment) {
	auto IsFirstOnLine = [](const FormatToken &Tok) {
	return Tok.HasUnescapedNewline \|\| Tok.IsFirst;
	};

	// Consider preprocessor directives preceded by block comments as first
	// on line.
	if (PreviousWasComment)
	return FirstNonCommentOnLine \|\| IsFirstOnLine(Tok);
	return IsFirstOnLine(Tok);
	};

	FirstNonCommentOnLine = IsFirstNonCommentOnLine(
	FirstNonCommentOnLine, *FormatTok, PreviousWasComment);
	PreviousWasComment = FormatTok->is(tok::comment);

	while (!Line->InPPDirective && FormatTok->is(tok::hash) &&
	(!Style.isVerilog() \|\|
	Keywords.isVerilogPPDirective(*Tokens->peekNextToken())) &&
	FirstNonCommentOnLine) {
	distributeComments(Comments, FormatTok);
	Comments.clear();
	// If there is an unfinished unwrapped line, we flush the preprocessor
	// directives only after that unwrapped line was finished later.
	bool SwitchToPreprocessorLines = !Line->Tokens.empty();
	ScopedLineState BlockState(*this, SwitchToPreprocessorLines);
	assert((LevelDifference >= 0 \|\|
	static_cast<unsigned>(-LevelDifference) <= Line->Level) &&
	"LevelDifference makes Line->Level negative");
	Line->Level += LevelDifference;
	// Comments stored before the preprocessor directive need to be output
	// before the preprocessor directive, at the same level as the
	// preprocessor directive, as we consider them to apply to the directive.
	if (Style.IndentPPDirectives == FormatStyle::PPDIS_BeforeHash &&
	PPBranchLevel > 0) {
	Line->Level += PPBranchLevel;
	}
	assert(Line->Level >= Line->UnbracedBodyLevel);
	Line->Level -= Line->UnbracedBodyLevel;
	flushComments(isOnNewLine(*FormatTok));
	parsePPDirective();
	PreviousWasComment = FormatTok->is(tok::comment);
	FirstNonCommentOnLine = IsFirstNonCommentOnLine(
	FirstNonCommentOnLine, *FormatTok, PreviousWasComment);
	}

	if (!PPStack.empty() && (PPStack.back().Kind == PP_Unreachable) &&
	!Line->InPPDirective) {
	continue;
	}

	if (FormatTok->is(tok::identifier) &&
	Macros.defined(FormatTok->TokenText) &&
	// FIXME: Allow expanding macros in preprocessor directives.
	!Line->InPPDirective) {
	FormatToken *ID = FormatTok;
	unsigned Position = Tokens->getPosition();

	// To correctly parse the code, we need to replace the tokens of the macro
	// call with its expansion.
	auto PreCall = std::move(Line);
	Line.reset(new UnwrappedLine);
	bool OldInExpansion = InExpansion;
	InExpansion = true;
	// We parse the macro call into a new line.
	auto Args = parseMacroCall();
	InExpansion = OldInExpansion;
	assert(Line->Tokens.front().Tok == ID);
	// And remember the unexpanded macro call tokens.
	auto UnexpandedLine = std::move(Line);
	// Reset to the old line.
	Line = std::move(PreCall);

	LLVM_DEBUG({
	llvm::dbgs() << "Macro call: " << ID->TokenText << "(";
	if (Args) {
	llvm::dbgs() << "(";
	for (const auto &Arg : Args.value())
	for (const auto &T : Arg)
	llvm::dbgs() << T->TokenText << " ";
	llvm::dbgs() << ")";
	}
	llvm::dbgs() << "\n";
	});
	if (Macros.objectLike(ID->TokenText) && Args &&
	!Macros.hasArity(ID->TokenText, Args->size())) {
	// The macro is either
	// - object-like, but we got argumnets, or
	// - overloaded to be both object-like and function-like, but none of
	// the function-like arities match the number of arguments.
	// Thus, expand as object-like macro.
	LLVM_DEBUG(llvm::dbgs()
	<< "Macro \"" << ID->TokenText
	<< "\" not overloaded for arity " << Args->size()
	<< "or not function-like, using object-like overload.");
	Args.reset();
	UnexpandedLine->Tokens.resize(1);
	Tokens->setPosition(Position);
	nextToken();
	assert(!Args && Macros.objectLike(ID->TokenText));
	}
	if ((!Args && Macros.objectLike(ID->TokenText)) \|\|
	(Args && Macros.hasArity(ID->TokenText, Args->size()))) {
	// Next, we insert the expanded tokens in the token stream at the
	// current position, and continue parsing.
	Unexpanded[ID] = std::move(UnexpandedLine);
	SmallVector<FormatToken *, 8> Expansion =
	Macros.expand(ID, std::move(Args));
	if (!Expansion.empty())
	FormatTok = Tokens->insertTokens(Expansion);

	LLVM_DEBUG({
	llvm::dbgs() << "Expanded: ";
	for (const auto &T : Expansion)
	llvm::dbgs() << T->TokenText << " ";
	llvm::dbgs() << "\n";
	});
	} else {
	LLVM_DEBUG({
	llvm::dbgs() << "Did not expand macro \"" << ID->TokenText
	<< "\", because it was used ";
	if (Args)
	llvm::dbgs() << "with " << Args->size();
	else
	llvm::dbgs() << "without";
	llvm::dbgs() << " arguments, which doesn't match any definition.\n";
	});
	Tokens->setPosition(Position);
	FormatTok = ID;
	}
	}

	if (FormatTok->isNot(tok::comment)) {
	distributeComments(Comments, FormatTok);
	Comments.clear();
	return;
	}

	Comments.push_back(FormatTok);
	} while (!eof());

	distributeComments(Comments, nullptr);
	Comments.clear();
	}

	namespace {
	template <typename Iterator>
	void pushTokens(Iterator Begin, Iterator End,
	llvm::SmallVectorImpl<FormatToken *> &Into) {
	for (auto I = Begin; I != End; ++I) {
	Into.push_back(I->Tok);
	for (const auto &Child : I->Children)
	pushTokens(Child.Tokens.begin(), Child.Tokens.end(), Into);
	}
	}
	} // namespace

	std::optional<llvm::SmallVector<llvm::SmallVector<FormatToken *, 8>, 1>>
	UnwrappedLineParser::parseMacroCall() {
	std::optional<llvm::SmallVector<llvm::SmallVector<FormatToken *, 8>, 1>> Args;
	assert(Line->Tokens.empty());
	nextToken();
	if (FormatTok->isNot(tok::l_paren))
	return Args;
	unsigned Position = Tokens->getPosition();
	FormatToken *Tok = FormatTok;
	nextToken();
	Args.emplace();
	auto ArgStart = std::prev(Line->Tokens.end());

	int Parens = 0;
	do {
	switch (FormatTok->Tok.getKind()) {
	case tok::l_paren:
	++Parens;
	nextToken();
	break;
	case tok::r_paren: {
	if (Parens > 0) {
	--Parens;
	nextToken();
	break;
	}
	Args->push_back({});
	pushTokens(std::next(ArgStart), Line->Tokens.end(), Args->back());
	nextToken();
	return Args;
	}
	case tok::comma: {
	if (Parens > 0) {
	nextToken();
	break;
	}
	Args->push_back({});
	pushTokens(std::next(ArgStart), Line->Tokens.end(), Args->back());
	nextToken();
	ArgStart = std::prev(Line->Tokens.end());
	break;
	}
	default:
	nextToken();
	break;
	}
	} while (!eof());
	Line->Tokens.resize(1);
	Tokens->setPosition(Position);
	FormatTok = Tok;
	return {};
	}

	void UnwrappedLineParser::pushToken(FormatToken *Tok) {
	Line->Tokens.push_back(UnwrappedLineNode(Tok));
	if (MustBreakBeforeNextToken) {
	Line->Tokens.back().Tok->MustBreakBefore = true;
	Line->Tokens.back().Tok->MustBreakBeforeFinalized = true;
	MustBreakBeforeNextToken = false;
	}
	}

	} // end namespace format
	} // end namespace clang
	diff --git a/contrib/llvm-project/clang/lib/Sema/SemaExpr.cpp b/contrib/llvm-project/clang/lib/Sema/SemaExpr.cpp
	index edb8b79a2220..f56ca398cda8 100644
	--- a/contrib/llvm-project/clang/lib/Sema/SemaExpr.cpp
	+++ b/contrib/llvm-project/clang/lib/Sema/SemaExpr.cpp
	@@ -1,20903 +1,20916 @@
	//===--- SemaExpr.cpp - Semantic Analysis for Expressions -----------------===//
	//
	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
	// See https://llvm.org/LICENSE.txt for license information.
	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
	//
	//===----------------------------------------------------------------------===//
	//
	// This file implements semantic analysis for expressions.
	//
	//===----------------------------------------------------------------------===//

	#include "CheckExprLifetime.h"
	#include "TreeTransform.h"
	#include "UsedDeclVisitor.h"
	#include "clang/AST/ASTConsumer.h"
	#include "clang/AST/ASTContext.h"
	#include "clang/AST/ASTLambda.h"
	#include "clang/AST/ASTMutationListener.h"
	#include "clang/AST/CXXInheritance.h"
	#include "clang/AST/DeclObjC.h"
	#include "clang/AST/DeclTemplate.h"
	#include "clang/AST/EvaluatedExprVisitor.h"
	#include "clang/AST/Expr.h"
	#include "clang/AST/ExprCXX.h"
	#include "clang/AST/ExprObjC.h"
	#include "clang/AST/ExprOpenMP.h"
	#include "clang/AST/OperationKinds.h"
	#include "clang/AST/ParentMapContext.h"
	#include "clang/AST/RecursiveASTVisitor.h"
	#include "clang/AST/Type.h"
	#include "clang/AST/TypeLoc.h"
	#include "clang/Basic/Builtins.h"
	#include "clang/Basic/DiagnosticSema.h"
	#include "clang/Basic/PartialDiagnostic.h"
	#include "clang/Basic/SourceManager.h"
	#include "clang/Basic/Specifiers.h"
	#include "clang/Basic/TargetInfo.h"
	#include "clang/Basic/TypeTraits.h"
	#include "clang/Lex/LiteralSupport.h"
	#include "clang/Lex/Preprocessor.h"
	#include "clang/Sema/AnalysisBasedWarnings.h"
	#include "clang/Sema/DeclSpec.h"
	#include "clang/Sema/DelayedDiagnostic.h"
	#include "clang/Sema/Designator.h"
	#include "clang/Sema/EnterExpressionEvaluationContext.h"
	#include "clang/Sema/Initialization.h"
	#include "clang/Sema/Lookup.h"
	#include "clang/Sema/Overload.h"
	#include "clang/Sema/ParsedTemplate.h"
	#include "clang/Sema/Scope.h"
	#include "clang/Sema/ScopeInfo.h"
	#include "clang/Sema/SemaCUDA.h"
	#include "clang/Sema/SemaFixItUtils.h"
	#include "clang/Sema/SemaInternal.h"
	#include "clang/Sema/SemaObjC.h"
	#include "clang/Sema/SemaOpenMP.h"
	#include "clang/Sema/SemaPseudoObject.h"
	#include "clang/Sema/Template.h"
	#include "llvm/ADT/STLExtras.h"
	#include "llvm/ADT/STLForwardCompat.h"
	#include "llvm/ADT/StringExtras.h"
	#include "llvm/Support/Casting.h"
	#include "llvm/Support/ConvertUTF.h"
	#include "llvm/Support/SaveAndRestore.h"
	#include "llvm/Support/TypeSize.h"
	#include <optional>

	using namespace clang;
	using namespace sema;

	bool Sema::CanUseDecl(NamedDecl *D, bool TreatUnavailableAsInvalid) {
	// See if this is an auto-typed variable whose initializer we are parsing.
	if (ParsingInitForAutoVars.count(D))
	return false;

	// See if this is a deleted function.
	if (FunctionDecl *FD = dyn_cast<FunctionDecl>(D)) {
	if (FD->isDeleted())
	return false;

	// If the function has a deduced return type, and we can't deduce it,
	// then we can't use it either.
	if (getLangOpts().CPlusPlus14 && FD->getReturnType()->isUndeducedType() &&
	DeduceReturnType(FD, SourceLocation(), /Diagnose/ false))
	return false;

	// See if this is an aligned allocation/deallocation function that is
	// unavailable.
	if (TreatUnavailableAsInvalid &&
	isUnavailableAlignedAllocationFunction(*FD))
	return false;
	}

	// See if this function is unavailable.
	if (TreatUnavailableAsInvalid && D->getAvailability() == AR_Unavailable &&
	cast<Decl>(CurContext)->getAvailability() != AR_Unavailable)
	return false;

	if (isa<UnresolvedUsingIfExistsDecl>(D))
	return false;

	return true;
	}

	static void DiagnoseUnusedOfDecl(Sema &S, NamedDecl *D, SourceLocation Loc) {
	// Warn if this is used but marked unused.
	if (const auto *A = D->getAttr<UnusedAttr>()) {
	// [[maybe_unused]] should not diagnose uses, but __attribute__((unused))
	// should diagnose them.
	if (A->getSemanticSpelling() != UnusedAttr::CXX11_maybe_unused &&
	A->getSemanticSpelling() != UnusedAttr::C23_maybe_unused) {
	const Decl *DC = cast_or_null<Decl>(S.ObjC().getCurObjCLexicalContext());
	if (DC && !DC->hasAttr<UnusedAttr>())
	S.Diag(Loc, diag::warn_used_but_marked_unused) << D;
	}
	}
	}

	void Sema::NoteDeletedFunction(FunctionDecl *Decl) {
	assert(Decl && Decl->isDeleted());

	if (Decl->isDefaulted()) {
	// If the method was explicitly defaulted, point at that declaration.
	if (!Decl->isImplicit())
	Diag(Decl->getLocation(), diag::note_implicitly_deleted);

	// Try to diagnose why this special member function was implicitly
	// deleted. This might fail, if that reason no longer applies.
	DiagnoseDeletedDefaultedFunction(Decl);
	return;
	}

	auto *Ctor = dyn_cast<CXXConstructorDecl>(Decl);
	if (Ctor && Ctor->isInheritingConstructor())
	return NoteDeletedInheritingConstructor(Ctor);

	Diag(Decl->getLocation(), diag::note_availability_specified_here)
	<< Decl << 1;
	}

	/// Determine whether a FunctionDecl was ever declared with an
	/// explicit storage class.
	static bool hasAnyExplicitStorageClass(const FunctionDecl *D) {
	for (auto *I : D->redecls()) {
	if (I->getStorageClass() != SC_None)
	return true;
	}
	return false;
	}

	/// Check whether we're in an extern inline function and referring to a
	/// variable or function with internal linkage (C11 6.7.4p3).
	///
	/// This is only a warning because we used to silently accept this code, but
	/// in many cases it will not behave correctly. This is not enabled in C++ mode
	/// because the restriction language is a bit weaker (C++11 [basic.def.odr]p6)
	/// and so while there may still be user mistakes, most of the time we can't
	/// prove that there are errors.
	static void diagnoseUseOfInternalDeclInInlineFunction(Sema &S,
	const NamedDecl *D,
	SourceLocation Loc) {
	// This is disabled under C++; there are too many ways for this to fire in
	// contexts where the warning is a false positive, or where it is technically
	// correct but benign.
	if (S.getLangOpts().CPlusPlus)
	return;

	// Check if this is an inlined function or method.
	FunctionDecl *Current = S.getCurFunctionDecl();
	if (!Current)
	return;
	if (!Current->isInlined())
	return;
	if (!Current->isExternallyVisible())
	return;

	// Check if the decl has internal linkage.
	if (D->getFormalLinkage() != Linkage::Internal)
	return;

	// Downgrade from ExtWarn to Extension if
	// (1) the supposedly external inline function is in the main file,
	// and probably won't be included anywhere else.
	// (2) the thing we're referencing is a pure function.
	// (3) the thing we're referencing is another inline function.
	// This last can give us false negatives, but it's better than warning on
	// wrappers for simple C library functions.
	const FunctionDecl *UsedFn = dyn_cast<FunctionDecl>(D);
	bool DowngradeWarning = S.getSourceManager().isInMainFile(Loc);
	if (!DowngradeWarning && UsedFn)
	DowngradeWarning = UsedFn->isInlined() \|\| UsedFn->hasAttr<ConstAttr>();

	S.Diag(Loc, DowngradeWarning ? diag::ext_internal_in_extern_inline_quiet
	: diag::ext_internal_in_extern_inline)
	<< /IsVar=/!UsedFn << D;

	S.MaybeSuggestAddingStaticToDecl(Current);

	S.Diag(D->getCanonicalDecl()->getLocation(), diag::note_entity_declared_at)
	<< D;
	}

	void Sema::MaybeSuggestAddingStaticToDecl(const FunctionDecl *Cur) {
	const FunctionDecl *First = Cur->getFirstDecl();

	// Suggest "static" on the function, if possible.
	if (!hasAnyExplicitStorageClass(First)) {
	SourceLocation DeclBegin = First->getSourceRange().getBegin();
	Diag(DeclBegin, diag::note_convert_inline_to_static)
	<< Cur << FixItHint::CreateInsertion(DeclBegin, "static ");
	}
	}

	bool Sema::DiagnoseUseOfDecl(NamedDecl *D, ArrayRef<SourceLocation> Locs,
	const ObjCInterfaceDecl *UnknownObjCClass,
	bool ObjCPropertyAccess,
	bool AvoidPartialAvailabilityChecks,
	ObjCInterfaceDecl *ClassReceiver,
	bool SkipTrailingRequiresClause) {
	SourceLocation Loc = Locs.front();
	if (getLangOpts().CPlusPlus && isa<FunctionDecl>(D)) {
	// If there were any diagnostics suppressed by template argument deduction,
	// emit them now.
	auto Pos = SuppressedDiagnostics.find(D->getCanonicalDecl());
	if (Pos != SuppressedDiagnostics.end()) {
	for (const PartialDiagnosticAt &Suppressed : Pos->second)
	Diag(Suppressed.first, Suppressed.second);

	// Clear out the list of suppressed diagnostics, so that we don't emit
	// them again for this specialization. However, we don't obsolete this
	// entry from the table, because we want to avoid ever emitting these
	// diagnostics again.
	Pos->second.clear();
	}

	// C++ [basic.start.main]p3:
	// The function 'main' shall not be used within a program.
	if (cast<FunctionDecl>(D)->isMain())
	Diag(Loc, diag::ext_main_used);

	diagnoseUnavailableAlignedAllocation(*cast<FunctionDecl>(D), Loc);
	}

	// See if this is an auto-typed variable whose initializer we are parsing.
	if (ParsingInitForAutoVars.count(D)) {
	if (isa<BindingDecl>(D)) {
	Diag(Loc, diag::err_binding_cannot_appear_in_own_initializer)
	<< D->getDeclName();
	} else {
	Diag(Loc, diag::err_auto_variable_cannot_appear_in_own_initializer)
	<< D->getDeclName() << cast<VarDecl>(D)->getType();
	}
	return true;
	}

	if (FunctionDecl *FD = dyn_cast<FunctionDecl>(D)) {
	// See if this is a deleted function.
	if (FD->isDeleted()) {
	auto *Ctor = dyn_cast<CXXConstructorDecl>(FD);
	if (Ctor && Ctor->isInheritingConstructor())
	Diag(Loc, diag::err_deleted_inherited_ctor_use)
	<< Ctor->getParent()
	<< Ctor->getInheritedConstructor().getConstructor()->getParent();
	else {
	StringLiteral *Msg = FD->getDeletedMessage();
	Diag(Loc, diag::err_deleted_function_use)
	<< (Msg != nullptr) << (Msg ? Msg->getString() : StringRef());
	}
	NoteDeletedFunction(FD);
	return true;
	}

	// [expr.prim.id]p4
	// A program that refers explicitly or implicitly to a function with a
	// trailing requires-clause whose constraint-expression is not satisfied,
	// other than to declare it, is ill-formed. [...]
	//
	// See if this is a function with constraints that need to be satisfied.
	// Check this before deducing the return type, as it might instantiate the
	// definition.
	if (!SkipTrailingRequiresClause && FD->getTrailingRequiresClause()) {
	ConstraintSatisfaction Satisfaction;
	if (CheckFunctionConstraints(FD, Satisfaction, Loc,
	/ForOverloadResolution/ true))
	// A diagnostic will have already been generated (non-constant
	// constraint expression, for example)
	return true;
	if (!Satisfaction.IsSatisfied) {
	Diag(Loc,
	diag::err_reference_to_function_with_unsatisfied_constraints)
	<< D;
	DiagnoseUnsatisfiedConstraint(Satisfaction);
	return true;
	}
	}

	// If the function has a deduced return type, and we can't deduce it,
	// then we can't use it either.
	if (getLangOpts().CPlusPlus14 && FD->getReturnType()->isUndeducedType() &&
	DeduceReturnType(FD, Loc))
	return true;

	if (getLangOpts().CUDA && !CUDA().CheckCall(Loc, FD))
	return true;

	}

	if (auto *MD = dyn_cast<CXXMethodDecl>(D)) {
	// Lambdas are only default-constructible or assignable in C++2a onwards.
	if (MD->getParent()->isLambda() &&
	((isa<CXXConstructorDecl>(MD) &&
	cast<CXXConstructorDecl>(MD)->isDefaultConstructor()) \|\|
	MD->isCopyAssignmentOperator() \|\| MD->isMoveAssignmentOperator())) {
	Diag(Loc, diag::warn_cxx17_compat_lambda_def_ctor_assign)
	<< !isa<CXXConstructorDecl>(MD);
	}
	}

	auto getReferencedObjCProp = [](const NamedDecl *D) ->
	const ObjCPropertyDecl * {
	if (const auto *MD = dyn_cast<ObjCMethodDecl>(D))
	return MD->findPropertyDecl();
	return nullptr;
	};
	if (const ObjCPropertyDecl *ObjCPDecl = getReferencedObjCProp(D)) {
	if (diagnoseArgIndependentDiagnoseIfAttrs(ObjCPDecl, Loc))
	return true;
	} else if (diagnoseArgIndependentDiagnoseIfAttrs(D, Loc)) {
	return true;
	}

	// [OpenMP 4.0], 2.15 declare reduction Directive, Restrictions
	// Only the variables omp_in and omp_out are allowed in the combiner.
	// Only the variables omp_priv and omp_orig are allowed in the
	// initializer-clause.
	auto *DRD = dyn_cast<OMPDeclareReductionDecl>(CurContext);
	if (LangOpts.OpenMP && DRD && !CurContext->containsDecl(D) &&
	isa<VarDecl>(D)) {
	Diag(Loc, diag::err_omp_wrong_var_in_declare_reduction)
	<< getCurFunction()->HasOMPDeclareReductionCombiner;
	Diag(D->getLocation(), diag::note_entity_declared_at) << D;
	return true;
	}

	// [OpenMP 5.0], 2.19.7.3. declare mapper Directive, Restrictions
	// List-items in map clauses on this construct may only refer to the declared
	// variable var and entities that could be referenced by a procedure defined
	// at the same location.
	// [OpenMP 5.2] Also allow iterator declared variables.
	if (LangOpts.OpenMP && isa<VarDecl>(D) &&
	!OpenMP().isOpenMPDeclareMapperVarDeclAllowed(cast<VarDecl>(D))) {
	Diag(Loc, diag::err_omp_declare_mapper_wrong_var)
	<< OpenMP().getOpenMPDeclareMapperVarName();
	Diag(D->getLocation(), diag::note_entity_declared_at) << D;
	return true;
	}

	if (const auto *EmptyD = dyn_cast<UnresolvedUsingIfExistsDecl>(D)) {
	Diag(Loc, diag::err_use_of_empty_using_if_exists);
	Diag(EmptyD->getLocation(), diag::note_empty_using_if_exists_here);
	return true;
	}

	DiagnoseAvailabilityOfDecl(D, Locs, UnknownObjCClass, ObjCPropertyAccess,
	AvoidPartialAvailabilityChecks, ClassReceiver);

	DiagnoseUnusedOfDecl(*this, D, Loc);

	diagnoseUseOfInternalDeclInInlineFunction(*this, D, Loc);

	if (D->hasAttr<AvailableOnlyInDefaultEvalMethodAttr>()) {
	if (getLangOpts().getFPEvalMethod() !=
	LangOptions::FPEvalMethodKind::FEM_UnsetOnCommandLine &&
	PP.getLastFPEvalPragmaLocation().isValid() &&
	PP.getCurrentFPEvalMethod() != getLangOpts().getFPEvalMethod())
	Diag(D->getLocation(),
	diag::err_type_available_only_in_default_eval_method)
	<< D->getName();
	}

	if (auto *VD = dyn_cast<ValueDecl>(D))
	checkTypeSupport(VD->getType(), Loc, VD);

	if (LangOpts.SYCLIsDevice \|\|
	(LangOpts.OpenMP && LangOpts.OpenMPIsTargetDevice)) {
	if (!Context.getTargetInfo().isTLSSupported())
	if (const auto *VD = dyn_cast<VarDecl>(D))
	if (VD->getTLSKind() != VarDecl::TLS_None)
	targetDiag(*Locs.begin(), diag::err_thread_unsupported);
	}

	if (isa<ParmVarDecl>(D) && isa<RequiresExprBodyDecl>(D->getDeclContext()) &&
	!isUnevaluatedContext()) {
	// C++ [expr.prim.req.nested] p3
	// A local parameter shall only appear as an unevaluated operand
	// (Clause 8) within the constraint-expression.
	Diag(Loc, diag::err_requires_expr_parameter_referenced_in_evaluated_context)
	<< D;
	Diag(D->getLocation(), diag::note_entity_declared_at) << D;
	return true;
	}

	return false;
	}

	void Sema::DiagnoseSentinelCalls(const NamedDecl *D, SourceLocation Loc,
	ArrayRef<Expr *> Args) {
	const SentinelAttr *Attr = D->getAttr<SentinelAttr>();
	if (!Attr)
	return;

	// The number of formal parameters of the declaration.
	unsigned NumFormalParams;

	// The kind of declaration. This is also an index into a %select in
	// the diagnostic.
	enum { CK_Function, CK_Method, CK_Block } CalleeKind;

	if (const auto *MD = dyn_cast<ObjCMethodDecl>(D)) {
	NumFormalParams = MD->param_size();
	CalleeKind = CK_Method;
	} else if (const auto *FD = dyn_cast<FunctionDecl>(D)) {
	NumFormalParams = FD->param_size();
	CalleeKind = CK_Function;
	} else if (const auto *VD = dyn_cast<VarDecl>(D)) {
	QualType Ty = VD->getType();
	const FunctionType *Fn = nullptr;
	if (const auto *PtrTy = Ty->getAs<PointerType>()) {
	Fn = PtrTy->getPointeeType()->getAs<FunctionType>();
	if (!Fn)
	return;
	CalleeKind = CK_Function;
	} else if (const auto *PtrTy = Ty->getAs<BlockPointerType>()) {
	Fn = PtrTy->getPointeeType()->castAs<FunctionType>();
	CalleeKind = CK_Block;
	} else {
	return;
	}

	if (const auto *proto = dyn_cast<FunctionProtoType>(Fn))
	NumFormalParams = proto->getNumParams();
	else
	NumFormalParams = 0;
	} else {
	return;
	}

	// "NullPos" is the number of formal parameters at the end which
	// effectively count as part of the variadic arguments. This is
	// useful if you would prefer to not have any formal parameters,
	// but the language forces you to have at least one.
	unsigned NullPos = Attr->getNullPos();
	assert((NullPos == 0 \|\| NullPos == 1) && "invalid null position on sentinel");
	NumFormalParams = (NullPos > NumFormalParams ? 0 : NumFormalParams - NullPos);

	// The number of arguments which should follow the sentinel.
	unsigned NumArgsAfterSentinel = Attr->getSentinel();

	// If there aren't enough arguments for all the formal parameters,
	// the sentinel, and the args after the sentinel, complain.
	if (Args.size() < NumFormalParams + NumArgsAfterSentinel + 1) {
	Diag(Loc, diag::warn_not_enough_argument) << D->getDeclName();
	Diag(D->getLocation(), diag::note_sentinel_here) << int(CalleeKind);
	return;
	}

	// Otherwise, find the sentinel expression.
	const Expr *SentinelExpr = Args[Args.size() - NumArgsAfterSentinel - 1];
	if (!SentinelExpr)
	return;
	if (SentinelExpr->isValueDependent())
	return;
	if (Context.isSentinelNullExpr(SentinelExpr))
	return;

	// Pick a reasonable string to insert. Optimistically use 'nil', 'nullptr',
	// or 'NULL' if those are actually defined in the context. Only use
	// 'nil' for ObjC methods, where it's much more likely that the
	// variadic arguments form a list of object pointers.
	SourceLocation MissingNilLoc = getLocForEndOfToken(SentinelExpr->getEndLoc());
	std::string NullValue;
	if (CalleeKind == CK_Method && PP.isMacroDefined("nil"))
	NullValue = "nil";
	else if (getLangOpts().CPlusPlus11)
	NullValue = "nullptr";
	else if (PP.isMacroDefined("NULL"))
	NullValue = "NULL";
	else
	NullValue = "(void*) 0";

	if (MissingNilLoc.isInvalid())
	Diag(Loc, diag::warn_missing_sentinel) << int(CalleeKind);
	else
	Diag(MissingNilLoc, diag::warn_missing_sentinel)
	<< int(CalleeKind)
	<< FixItHint::CreateInsertion(MissingNilLoc, ", " + NullValue);
	Diag(D->getLocation(), diag::note_sentinel_here)
	<< int(CalleeKind) << Attr->getRange();
	}

	SourceRange Sema::getExprRange(Expr *E) const {
	return E ? E->getSourceRange() : SourceRange();
	}

	//===----------------------------------------------------------------------===//
	// Standard Promotions and Conversions
	//===----------------------------------------------------------------------===//

	/// DefaultFunctionArrayConversion (C99 6.3.2.1p3, C99 6.3.2.1p4).
	ExprResult Sema::DefaultFunctionArrayConversion(Expr *E, bool Diagnose) {
	// Handle any placeholder expressions which made it here.
	if (E->hasPlaceholderType()) {
	ExprResult result = CheckPlaceholderExpr(E);
	if (result.isInvalid()) return ExprError();
	E = result.get();
	}

	QualType Ty = E->getType();
	assert(!Ty.isNull() && "DefaultFunctionArrayConversion - missing type");

	if (Ty->isFunctionType()) {
	if (auto *DRE = dyn_cast<DeclRefExpr>(E->IgnoreParenCasts()))
	if (auto *FD = dyn_cast<FunctionDecl>(DRE->getDecl()))
	if (!checkAddressOfFunctionIsAvailable(FD, Diagnose, E->getExprLoc()))
	return ExprError();

	E = ImpCastExprToType(E, Context.getPointerType(Ty),
	CK_FunctionToPointerDecay).get();
	} else if (Ty->isArrayType()) {
	// In C90 mode, arrays only promote to pointers if the array expression is
	// an lvalue. The relevant legalese is C90 6.2.2.1p3: "an lvalue that has
	// type 'array of type' is converted to an expression that has type 'pointer
	// to type'...". In C99 this was changed to: C99 6.3.2.1p3: "an expression
	// that has type 'array of type' ...". The relevant change is "an lvalue"
	// (C90) to "an expression" (C99).
	//
	// C++ 4.2p1:
	// An lvalue or rvalue of type "array of N T" or "array of unknown bound of
	// T" can be converted to an rvalue of type "pointer to T".
	//
	if (getLangOpts().C99 \|\| getLangOpts().CPlusPlus \|\| E->isLValue()) {
	ExprResult Res = ImpCastExprToType(E, Context.getArrayDecayedType(Ty),
	CK_ArrayToPointerDecay);
	if (Res.isInvalid())
	return ExprError();
	E = Res.get();
	}
	}
	return E;
	}

	static void CheckForNullPointerDereference(Sema &S, Expr *E) {
	// Check to see if we are dereferencing a null pointer. If so,
	// and if not volatile-qualified, this is undefined behavior that the
	// optimizer will delete, so warn about it. People sometimes try to use this
	// to get a deterministic trap and are surprised by clang's behavior. This
	// only handles the pattern "*null", which is a very syntactic check.
	const auto *UO = dyn_cast<UnaryOperator>(E->IgnoreParenCasts());
	if (UO && UO->getOpcode() == UO_Deref &&
	UO->getSubExpr()->getType()->isPointerType()) {
	const LangAS AS =
	UO->getSubExpr()->getType()->getPointeeType().getAddressSpace();
	if ((!isTargetAddressSpace(AS) \|\|
	(isTargetAddressSpace(AS) && toTargetAddressSpace(AS) == 0)) &&
	UO->getSubExpr()->IgnoreParenCasts()->isNullPointerConstant(
	S.Context, Expr::NPC_ValueDependentIsNotNull) &&
	!UO->getType().isVolatileQualified()) {
	S.DiagRuntimeBehavior(UO->getOperatorLoc(), UO,
	S.PDiag(diag::warn_indirection_through_null)
	<< UO->getSubExpr()->getSourceRange());
	S.DiagRuntimeBehavior(UO->getOperatorLoc(), UO,
	S.PDiag(diag::note_indirection_through_null));
	}
	}
	}

	static void DiagnoseDirectIsaAccess(Sema &S, const ObjCIvarRefExpr *OIRE,
	SourceLocation AssignLoc,
	const Expr* RHS) {
	const ObjCIvarDecl *IV = OIRE->getDecl();
	if (!IV)
	return;

	DeclarationName MemberName = IV->getDeclName();
	IdentifierInfo *Member = MemberName.getAsIdentifierInfo();
	if (!Member \|\| !Member->isStr("isa"))
	return;

	const Expr *Base = OIRE->getBase();
	QualType BaseType = Base->getType();
	if (OIRE->isArrow())
	BaseType = BaseType->getPointeeType();
	if (const ObjCObjectType *OTy = BaseType->getAs<ObjCObjectType>())
	if (ObjCInterfaceDecl *IDecl = OTy->getInterface()) {
	ObjCInterfaceDecl *ClassDeclared = nullptr;
	ObjCIvarDecl *IV = IDecl->lookupInstanceVariable(Member, ClassDeclared);
	if (!ClassDeclared->getSuperClass()
	&& (*ClassDeclared->ivar_begin()) == IV) {
	if (RHS) {
	NamedDecl *ObjectSetClass =
	S.LookupSingleName(S.TUScope,
	&S.Context.Idents.get("object_setClass"),
	SourceLocation(), S.LookupOrdinaryName);
	if (ObjectSetClass) {
	SourceLocation RHSLocEnd = S.getLocForEndOfToken(RHS->getEndLoc());
	S.Diag(OIRE->getExprLoc(), diag::warn_objc_isa_assign)
	<< FixItHint::CreateInsertion(OIRE->getBeginLoc(),
	"object_setClass(")
	<< FixItHint::CreateReplacement(
	SourceRange(OIRE->getOpLoc(), AssignLoc), ",")
	<< FixItHint::CreateInsertion(RHSLocEnd, ")");
	}
	else
	S.Diag(OIRE->getLocation(), diag::warn_objc_isa_assign);
	} else {
	NamedDecl *ObjectGetClass =
	S.LookupSingleName(S.TUScope,
	&S.Context.Idents.get("object_getClass"),
	SourceLocation(), S.LookupOrdinaryName);
	if (ObjectGetClass)
	S.Diag(OIRE->getExprLoc(), diag::warn_objc_isa_use)
	<< FixItHint::CreateInsertion(OIRE->getBeginLoc(),
	"object_getClass(")
	<< FixItHint::CreateReplacement(
	SourceRange(OIRE->getOpLoc(), OIRE->getEndLoc()), ")");
	else
	S.Diag(OIRE->getLocation(), diag::warn_objc_isa_use);
	}
	S.Diag(IV->getLocation(), diag::note_ivar_decl);
	}
	}
	}

	ExprResult Sema::DefaultLvalueConversion(Expr *E) {
	// Handle any placeholder expressions which made it here.
	if (E->hasPlaceholderType()) {
	ExprResult result = CheckPlaceholderExpr(E);
	if (result.isInvalid()) return ExprError();
	E = result.get();
	}

	// C++ [conv.lval]p1:
	// A glvalue of a non-function, non-array type T can be
	// converted to a prvalue.
	if (!E->isGLValue()) return E;

	QualType T = E->getType();
	assert(!T.isNull() && "r-value conversion on typeless expression?");

	// lvalue-to-rvalue conversion cannot be applied to types that decay to
	// pointers (i.e. function or array types).
	if (T->canDecayToPointerType())
	return E;

	// We don't want to throw lvalue-to-rvalue casts on top of
	// expressions of certain types in C++.
	if (getLangOpts().CPlusPlus) {
	if (T == Context.OverloadTy \|\| T->isRecordType() \|\|
	(T->isDependentType() && !T->isAnyPointerType() &&
	!T->isMemberPointerType()))
	return E;
	}

	// The C standard is actually really unclear on this point, and
	// DR106 tells us what the result should be but not why. It's
	// generally best to say that void types just doesn't undergo
	// lvalue-to-rvalue at all. Note that expressions of unqualified
	// 'void' type are never l-values, but qualified void can be.
	if (T->isVoidType())
	return E;

	// OpenCL usually rejects direct accesses to values of 'half' type.
	if (getLangOpts().OpenCL &&
	!getOpenCLOptions().isAvailableOption("cl_khr_fp16", getLangOpts()) &&
	T->isHalfType()) {
	Diag(E->getExprLoc(), diag::err_opencl_half_load_store)
	<< 0 << T;
	return ExprError();
	}

	CheckForNullPointerDereference(*this, E);
	if (const ObjCIsaExpr *OISA = dyn_cast<ObjCIsaExpr>(E->IgnoreParenCasts())) {
	NamedDecl *ObjectGetClass = LookupSingleName(TUScope,
	&Context.Idents.get("object_getClass"),
	SourceLocation(), LookupOrdinaryName);
	if (ObjectGetClass)
	Diag(E->getExprLoc(), diag::warn_objc_isa_use)
	<< FixItHint::CreateInsertion(OISA->getBeginLoc(), "object_getClass(")
	<< FixItHint::CreateReplacement(
	SourceRange(OISA->getOpLoc(), OISA->getIsaMemberLoc()), ")");
	else
	Diag(E->getExprLoc(), diag::warn_objc_isa_use);
	}
	else if (const ObjCIvarRefExpr *OIRE =
	dyn_cast<ObjCIvarRefExpr>(E->IgnoreParenCasts()))
	DiagnoseDirectIsaAccess(this, OIRE, SourceLocation(), / Expr*/nullptr);

	// C++ [conv.lval]p1:
	// [...] If T is a non-class type, the type of the prvalue is the
	// cv-unqualified version of T. Otherwise, the type of the
	// rvalue is T.
	//
	// C99 6.3.2.1p2:
	// If the lvalue has qualified type, the value has the unqualified
	// version of the type of the lvalue; otherwise, the value has the
	// type of the lvalue.
	if (T.hasQualifiers())
	T = T.getUnqualifiedType();

	// Under the MS ABI, lock down the inheritance model now.
	if (T->isMemberPointerType() &&
	Context.getTargetInfo().getCXXABI().isMicrosoft())
	(void)isCompleteType(E->getExprLoc(), T);

	ExprResult Res = CheckLValueToRValueConversionOperand(E);
	if (Res.isInvalid())
	return Res;
	E = Res.get();

	// Loading a __weak object implicitly retains the value, so we need a cleanup to
	// balance that.
	if (E->getType().getObjCLifetime() == Qualifiers::OCL_Weak)
	Cleanup.setExprNeedsCleanups(true);

	if (E->getType().isDestructedType() == QualType::DK_nontrivial_c_struct)
	Cleanup.setExprNeedsCleanups(true);

	// C++ [conv.lval]p3:
	// If T is cv std::nullptr_t, the result is a null pointer constant.
	CastKind CK = T->isNullPtrType() ? CK_NullToPointer : CK_LValueToRValue;
	Res = ImplicitCastExpr::Create(Context, T, CK, E, nullptr, VK_PRValue,
	CurFPFeatureOverrides());

	// C11 6.3.2.1p2:
	// ... if the lvalue has atomic type, the value has the non-atomic version
	// of the type of the lvalue ...
	if (const AtomicType *Atomic = T->getAs<AtomicType>()) {
	T = Atomic->getValueType().getUnqualifiedType();
	Res = ImplicitCastExpr::Create(Context, T, CK_AtomicToNonAtomic, Res.get(),
	nullptr, VK_PRValue, FPOptionsOverride());
	}

	return Res;
	}

	ExprResult Sema::DefaultFunctionArrayLvalueConversion(Expr *E, bool Diagnose) {
	ExprResult Res = DefaultFunctionArrayConversion(E, Diagnose);
	if (Res.isInvalid())
	return ExprError();
	Res = DefaultLvalueConversion(Res.get());
	if (Res.isInvalid())
	return ExprError();
	return Res;
	}

	ExprResult Sema::CallExprUnaryConversions(Expr *E) {
	QualType Ty = E->getType();
	ExprResult Res = E;
	// Only do implicit cast for a function type, but not for a pointer
	// to function type.
	if (Ty->isFunctionType()) {
	Res = ImpCastExprToType(E, Context.getPointerType(Ty),
	CK_FunctionToPointerDecay);
	if (Res.isInvalid())
	return ExprError();
	}
	Res = DefaultLvalueConversion(Res.get());
	if (Res.isInvalid())
	return ExprError();
	return Res.get();
	}

	/// UsualUnaryConversions - Performs various conversions that are common to most
	/// operators (C99 6.3). The conversions of array and function types are
	/// sometimes suppressed. For example, the array->pointer conversion doesn't
	/// apply if the array is an argument to the sizeof or address (&) operators.
	/// In these instances, this routine should not be called.
	ExprResult Sema::UsualUnaryConversions(Expr *E) {
	// First, convert to an r-value.
	ExprResult Res = DefaultFunctionArrayLvalueConversion(E);
	if (Res.isInvalid())
	return ExprError();
	E = Res.get();

	QualType Ty = E->getType();
	assert(!Ty.isNull() && "UsualUnaryConversions - missing type");

	LangOptions::FPEvalMethodKind EvalMethod = CurFPFeatures.getFPEvalMethod();
	if (EvalMethod != LangOptions::FEM_Source && Ty->isFloatingType() &&
	(getLangOpts().getFPEvalMethod() !=
	LangOptions::FPEvalMethodKind::FEM_UnsetOnCommandLine \|\|
	PP.getLastFPEvalPragmaLocation().isValid())) {
	switch (EvalMethod) {
	default:
	llvm_unreachable("Unrecognized float evaluation method");
	break;
	case LangOptions::FEM_UnsetOnCommandLine:
	llvm_unreachable("Float evaluation method should be set by now");
	break;
	case LangOptions::FEM_Double:
	if (Context.getFloatingTypeOrder(Context.DoubleTy, Ty) > 0)
	// Widen the expression to double.
	return Ty->isComplexType()
	? ImpCastExprToType(E,
	Context.getComplexType(Context.DoubleTy),
	CK_FloatingComplexCast)
	: ImpCastExprToType(E, Context.DoubleTy, CK_FloatingCast);
	break;
	case LangOptions::FEM_Extended:
	if (Context.getFloatingTypeOrder(Context.LongDoubleTy, Ty) > 0)
	// Widen the expression to long double.
	return Ty->isComplexType()
	? ImpCastExprToType(
	E, Context.getComplexType(Context.LongDoubleTy),
	CK_FloatingComplexCast)
	: ImpCastExprToType(E, Context.LongDoubleTy,
	CK_FloatingCast);
	break;
	}
	}

	// Half FP have to be promoted to float unless it is natively supported
	if (Ty->isHalfType() && !getLangOpts().NativeHalfType)
	return ImpCastExprToType(Res.get(), Context.FloatTy, CK_FloatingCast);

	// Try to perform integral promotions if the object has a theoretically
	// promotable type.
	if (Ty->isIntegralOrUnscopedEnumerationType()) {
	// C99 6.3.1.1p2:
	//
	// The following may be used in an expression wherever an int or
	// unsigned int may be used:
	// - an object or expression with an integer type whose integer
	// conversion rank is less than or equal to the rank of int
	// and unsigned int.
	// - A bit-field of type _Bool, int, signed int, or unsigned int.
	//
	// If an int can represent all values of the original type, the
	// value is converted to an int; otherwise, it is converted to an
	// unsigned int. These are called the integer promotions. All
	// other types are unchanged by the integer promotions.

	QualType PTy = Context.isPromotableBitField(E);
	if (!PTy.isNull()) {
	E = ImpCastExprToType(E, PTy, CK_IntegralCast).get();
	return E;
	}
	if (Context.isPromotableIntegerType(Ty)) {
	QualType PT = Context.getPromotedIntegerType(Ty);
	E = ImpCastExprToType(E, PT, CK_IntegralCast).get();
	return E;
	}
	}
	return E;
	}

	/// DefaultArgumentPromotion (C99 6.5.2.2p6). Used for function calls that
	/// do not have a prototype. Arguments that have type float or __fp16
	/// are promoted to double. All other argument types are converted by
	/// UsualUnaryConversions().
	ExprResult Sema::DefaultArgumentPromotion(Expr *E) {
	QualType Ty = E->getType();
	assert(!Ty.isNull() && "DefaultArgumentPromotion - missing type");

	ExprResult Res = UsualUnaryConversions(E);
	if (Res.isInvalid())
	return ExprError();
	E = Res.get();

	// If this is a 'float' or '__fp16' (CVR qualified or typedef)
	// promote to double.
	// Note that default argument promotion applies only to float (and
	// half/fp16); it does not apply to _Float16.
	const BuiltinType *BTy = Ty->getAs<BuiltinType>();
	if (BTy && (BTy->getKind() == BuiltinType::Half \|\|
	BTy->getKind() == BuiltinType::Float)) {
	if (getLangOpts().OpenCL &&
	!getOpenCLOptions().isAvailableOption("cl_khr_fp64", getLangOpts())) {
	if (BTy->getKind() == BuiltinType::Half) {
	E = ImpCastExprToType(E, Context.FloatTy, CK_FloatingCast).get();
	}
	} else {
	E = ImpCastExprToType(E, Context.DoubleTy, CK_FloatingCast).get();
	}
	}
	if (BTy &&
	getLangOpts().getExtendIntArgs() ==
	LangOptions::ExtendArgsKind::ExtendTo64 &&
	Context.getTargetInfo().supportsExtendIntArgs() && Ty->isIntegerType() &&
	Context.getTypeSizeInChars(BTy) <
	Context.getTypeSizeInChars(Context.LongLongTy)) {
	E = (Ty->isUnsignedIntegerType())
	? ImpCastExprToType(E, Context.UnsignedLongLongTy, CK_IntegralCast)
	.get()
	: ImpCastExprToType(E, Context.LongLongTy, CK_IntegralCast).get();
	assert(8 == Context.getTypeSizeInChars(Context.LongLongTy).getQuantity() &&
	"Unexpected typesize for LongLongTy");
	}

	// C++ performs lvalue-to-rvalue conversion as a default argument
	// promotion, even on class types, but note:
	// C++11 [conv.lval]p2:
	// When an lvalue-to-rvalue conversion occurs in an unevaluated
	// operand or a subexpression thereof the value contained in the
	// referenced object is not accessed. Otherwise, if the glvalue
	// has a class type, the conversion copy-initializes a temporary
	// of type T from the glvalue and the result of the conversion
	// is a prvalue for the temporary.
	// FIXME: add some way to gate this entire thing for correctness in
	// potentially potentially evaluated contexts.
	if (getLangOpts().CPlusPlus && E->isGLValue() && !isUnevaluatedContext()) {
	ExprResult Temp = PerformCopyInitialization(
	InitializedEntity::InitializeTemporary(E->getType()),
	E->getExprLoc(), E);
	if (Temp.isInvalid())
	return ExprError();
	E = Temp.get();
	}

	return E;
	}

	Sema::VarArgKind Sema::isValidVarArgType(const QualType &Ty) {
	if (Ty->isIncompleteType()) {
	// C++11 [expr.call]p7:
	// After these conversions, if the argument does not have arithmetic,
	// enumeration, pointer, pointer to member, or class type, the program
	// is ill-formed.
	//
	// Since we've already performed array-to-pointer and function-to-pointer
	// decay, the only such type in C++ is cv void. This also handles
	// initializer lists as variadic arguments.
	if (Ty->isVoidType())
	return VAK_Invalid;

	if (Ty->isObjCObjectType())
	return VAK_Invalid;
	return VAK_Valid;
	}

	if (Ty.isDestructedType() == QualType::DK_nontrivial_c_struct)
	return VAK_Invalid;

	if (Context.getTargetInfo().getTriple().isWasm() &&
	Ty.isWebAssemblyReferenceType()) {
	return VAK_Invalid;
	}

	if (Ty.isCXX98PODType(Context))
	return VAK_Valid;

	// C++11 [expr.call]p7:
	// Passing a potentially-evaluated argument of class type (Clause 9)
	// having a non-trivial copy constructor, a non-trivial move constructor,
	// or a non-trivial destructor, with no corresponding parameter,
	// is conditionally-supported with implementation-defined semantics.
	if (getLangOpts().CPlusPlus11 && !Ty->isDependentType())
	if (CXXRecordDecl *Record = Ty->getAsCXXRecordDecl())
	if (!Record->hasNonTrivialCopyConstructor() &&
	!Record->hasNonTrivialMoveConstructor() &&
	!Record->hasNonTrivialDestructor())
	return VAK_ValidInCXX11;

	if (getLangOpts().ObjCAutoRefCount && Ty->isObjCLifetimeType())
	return VAK_Valid;

	if (Ty->isObjCObjectType())
	return VAK_Invalid;

	if (getLangOpts().MSVCCompat)
	return VAK_MSVCUndefined;

	// FIXME: In C++11, these cases are conditionally-supported, meaning we're
	// permitted to reject them. We should consider doing so.
	return VAK_Undefined;
	}

	void Sema::checkVariadicArgument(const Expr *E, VariadicCallType CT) {
	// Don't allow one to pass an Objective-C interface to a vararg.
	const QualType &Ty = E->getType();
	VarArgKind VAK = isValidVarArgType(Ty);

	// Complain about passing non-POD types through varargs.
	switch (VAK) {
	case VAK_ValidInCXX11:
	DiagRuntimeBehavior(
	E->getBeginLoc(), nullptr,
	PDiag(diag::warn_cxx98_compat_pass_non_pod_arg_to_vararg) << Ty << CT);
	[[fallthrough]];
	case VAK_Valid:
	if (Ty->isRecordType()) {
	// This is unlikely to be what the user intended. If the class has a
	// 'c_str' member function, the user probably meant to call that.
	DiagRuntimeBehavior(E->getBeginLoc(), nullptr,
	PDiag(diag::warn_pass_class_arg_to_vararg)
	<< Ty << CT << hasCStrMethod(E) << ".c_str()");
	}
	break;

	case VAK_Undefined:
	case VAK_MSVCUndefined:
	DiagRuntimeBehavior(E->getBeginLoc(), nullptr,
	PDiag(diag::warn_cannot_pass_non_pod_arg_to_vararg)
	<< getLangOpts().CPlusPlus11 << Ty << CT);
	break;

	case VAK_Invalid:
	if (Ty.isDestructedType() == QualType::DK_nontrivial_c_struct)
	Diag(E->getBeginLoc(),
	diag::err_cannot_pass_non_trivial_c_struct_to_vararg)
	<< Ty << CT;
	else if (Ty->isObjCObjectType())
	DiagRuntimeBehavior(E->getBeginLoc(), nullptr,
	PDiag(diag::err_cannot_pass_objc_interface_to_vararg)
	<< Ty << CT);
	else
	Diag(E->getBeginLoc(), diag::err_cannot_pass_to_vararg)
	<< isa<InitListExpr>(E) << Ty << CT;
	break;
	}
	}

	ExprResult Sema::DefaultVariadicArgumentPromotion(Expr *E, VariadicCallType CT,
	FunctionDecl *FDecl) {
	if (const BuiltinType *PlaceholderTy = E->getType()->getAsPlaceholderType()) {
	// Strip the unbridged-cast placeholder expression off, if applicable.
	if (PlaceholderTy->getKind() == BuiltinType::ARCUnbridgedCast &&
	(CT == VariadicMethod \|\|
	(FDecl && FDecl->hasAttr<CFAuditedTransferAttr>()))) {
	E = ObjC().stripARCUnbridgedCast(E);

	// Otherwise, do normal placeholder checking.
	} else {
	ExprResult ExprRes = CheckPlaceholderExpr(E);
	if (ExprRes.isInvalid())
	return ExprError();
	E = ExprRes.get();
	}
	}

	ExprResult ExprRes = DefaultArgumentPromotion(E);
	if (ExprRes.isInvalid())
	return ExprError();

	// Copy blocks to the heap.
	if (ExprRes.get()->getType()->isBlockPointerType())
	maybeExtendBlockObject(ExprRes);

	E = ExprRes.get();

	// Diagnostics regarding non-POD argument types are
	// emitted along with format string checking in Sema::CheckFunctionCall().
	if (isValidVarArgType(E->getType()) == VAK_Undefined) {
	// Turn this into a trap.
	CXXScopeSpec SS;
	SourceLocation TemplateKWLoc;
	UnqualifiedId Name;
	Name.setIdentifier(PP.getIdentifierInfo("__builtin_trap"),
	E->getBeginLoc());
	ExprResult TrapFn = ActOnIdExpression(TUScope, SS, TemplateKWLoc, Name,
	/HasTrailingLParen=/true,
	/IsAddressOfOperand=/false);
	if (TrapFn.isInvalid())
	return ExprError();

	ExprResult Call = BuildCallExpr(TUScope, TrapFn.get(), E->getBeginLoc(),
	std::nullopt, E->getEndLoc());
	if (Call.isInvalid())
	return ExprError();

	ExprResult Comma =
	ActOnBinOp(TUScope, E->getBeginLoc(), tok::comma, Call.get(), E);
	if (Comma.isInvalid())
	return ExprError();
	return Comma.get();
	}

	if (!getLangOpts().CPlusPlus &&
	RequireCompleteType(E->getExprLoc(), E->getType(),
	diag::err_call_incomplete_argument))
	return ExprError();

	return E;
	}

	/// Convert complex integers to complex floats and real integers to
	/// real floats as required for complex arithmetic. Helper function of
	/// UsualArithmeticConversions()
	///
	/// \return false if the integer expression is an integer type and is
	/// successfully converted to the (complex) float type.
	static bool handleComplexIntegerToFloatConversion(Sema &S, ExprResult &IntExpr,
	ExprResult &ComplexExpr,
	QualType IntTy,
	QualType ComplexTy,
	bool SkipCast) {
	if (IntTy->isComplexType() \|\| IntTy->isRealFloatingType()) return true;
	if (SkipCast) return false;
	if (IntTy->isIntegerType()) {
	QualType fpTy = ComplexTy->castAs<ComplexType>()->getElementType();
	IntExpr = S.ImpCastExprToType(IntExpr.get(), fpTy, CK_IntegralToFloating);
	} else {
	assert(IntTy->isComplexIntegerType());
	IntExpr = S.ImpCastExprToType(IntExpr.get(), ComplexTy,
	CK_IntegralComplexToFloatingComplex);
	}
	return false;
	}

	// This handles complex/complex, complex/float, or float/complex.
	// When both operands are complex, the shorter operand is converted to the
	// type of the longer, and that is the type of the result. This corresponds
	// to what is done when combining two real floating-point operands.
	// The fun begins when size promotion occur across type domains.
	// From H&S 6.3.4: When one operand is complex and the other is a real
	// floating-point type, the less precise type is converted, within it's
	// real or complex domain, to the precision of the other type. For example,
	// when combining a "long double" with a "double _Complex", the
	// "double _Complex" is promoted to "long double _Complex".
	static QualType handleComplexFloatConversion(Sema &S, ExprResult &Shorter,
	QualType ShorterType,
	QualType LongerType,
	bool PromotePrecision) {
	bool LongerIsComplex = isa<ComplexType>(LongerType.getCanonicalType());
	QualType Result =
	LongerIsComplex ? LongerType : S.Context.getComplexType(LongerType);

	if (PromotePrecision) {
	if (isa<ComplexType>(ShorterType.getCanonicalType())) {
	Shorter =
	S.ImpCastExprToType(Shorter.get(), Result, CK_FloatingComplexCast);
	} else {
	if (LongerIsComplex)
	LongerType = LongerType->castAs<ComplexType>()->getElementType();
	Shorter = S.ImpCastExprToType(Shorter.get(), LongerType, CK_FloatingCast);
	}
	}
	return Result;
	}

	/// Handle arithmetic conversion with complex types. Helper function of
	/// UsualArithmeticConversions()
	static QualType handleComplexConversion(Sema &S, ExprResult &LHS,
	ExprResult &RHS, QualType LHSType,
	QualType RHSType, bool IsCompAssign) {
	// Handle (complex) integer types.
	if (!handleComplexIntegerToFloatConversion(S, RHS, LHS, RHSType, LHSType,
	/SkipCast=/false))
	return LHSType;
	if (!handleComplexIntegerToFloatConversion(S, LHS, RHS, LHSType, RHSType,
	/SkipCast=/IsCompAssign))
	return RHSType;

	// Compute the rank of the two types, regardless of whether they are complex.
	int Order = S.Context.getFloatingTypeOrder(LHSType, RHSType);
	if (Order < 0)
	// Promote the precision of the LHS if not an assignment.
	return handleComplexFloatConversion(S, LHS, LHSType, RHSType,
	/PromotePrecision=/!IsCompAssign);
	// Promote the precision of the RHS unless it is already the same as the LHS.
	return handleComplexFloatConversion(S, RHS, RHSType, LHSType,
	/PromotePrecision=/Order > 0);
	}

	/// Handle arithmetic conversion from integer to float. Helper function
	/// of UsualArithmeticConversions()
	static QualType handleIntToFloatConversion(Sema &S, ExprResult &FloatExpr,
	ExprResult &IntExpr,
	QualType FloatTy, QualType IntTy,
	bool ConvertFloat, bool ConvertInt) {
	if (IntTy->isIntegerType()) {
	if (ConvertInt)
	// Convert intExpr to the lhs floating point type.
	IntExpr = S.ImpCastExprToType(IntExpr.get(), FloatTy,
	CK_IntegralToFloating);
	return FloatTy;
	}

	// Convert both sides to the appropriate complex float.
	assert(IntTy->isComplexIntegerType());
	QualType result = S.Context.getComplexType(FloatTy);

	// _Complex int -> _Complex float
	if (ConvertInt)
	IntExpr = S.ImpCastExprToType(IntExpr.get(), result,
	CK_IntegralComplexToFloatingComplex);

	// float -> _Complex float
	if (ConvertFloat)
	FloatExpr = S.ImpCastExprToType(FloatExpr.get(), result,
	CK_FloatingRealToComplex);

	return result;
	}

	/// Handle arithmethic conversion with floating point types. Helper
	/// function of UsualArithmeticConversions()
	static QualType handleFloatConversion(Sema &S, ExprResult &LHS,
	ExprResult &RHS, QualType LHSType,
	QualType RHSType, bool IsCompAssign) {
	bool LHSFloat = LHSType->isRealFloatingType();
	bool RHSFloat = RHSType->isRealFloatingType();

	// N1169 4.1.4: If one of the operands has a floating type and the other
	// operand has a fixed-point type, the fixed-point operand
	// is converted to the floating type [...]
	if (LHSType->isFixedPointType() \|\| RHSType->isFixedPointType()) {
	if (LHSFloat)
	RHS = S.ImpCastExprToType(RHS.get(), LHSType, CK_FixedPointToFloating);
	else if (!IsCompAssign)
	LHS = S.ImpCastExprToType(LHS.get(), RHSType, CK_FixedPointToFloating);
	return LHSFloat ? LHSType : RHSType;
	}

	// If we have two real floating types, convert the smaller operand
	// to the bigger result.
	if (LHSFloat && RHSFloat) {
	int order = S.Context.getFloatingTypeOrder(LHSType, RHSType);
	if (order > 0) {
	RHS = S.ImpCastExprToType(RHS.get(), LHSType, CK_FloatingCast);
	return LHSType;
	}

	assert(order < 0 && "illegal float comparison");
	if (!IsCompAssign)
	LHS = S.ImpCastExprToType(LHS.get(), RHSType, CK_FloatingCast);
	return RHSType;
	}

	if (LHSFloat) {
	// Half FP has to be promoted to float unless it is natively supported
	if (LHSType->isHalfType() && !S.getLangOpts().NativeHalfType)
	LHSType = S.Context.FloatTy;

	return handleIntToFloatConversion(S, LHS, RHS, LHSType, RHSType,
	/ConvertFloat=/!IsCompAssign,
	/ConvertInt=/ true);
	}
	assert(RHSFloat);
	return handleIntToFloatConversion(S, RHS, LHS, RHSType, LHSType,
	/ConvertFloat=/ true,
	/ConvertInt=/!IsCompAssign);
	}

	/// Diagnose attempts to convert between __float128, __ibm128 and
	/// long double if there is no support for such conversion.
	/// Helper function of UsualArithmeticConversions().
	static bool unsupportedTypeConversion(const Sema &S, QualType LHSType,
	QualType RHSType) {
	// No issue if either is not a floating point type.
	if (!LHSType->isFloatingType() \|\| !RHSType->isFloatingType())
	return false;

	// No issue if both have the same 128-bit float semantics.
	auto *LHSComplex = LHSType->getAs<ComplexType>();
	auto *RHSComplex = RHSType->getAs<ComplexType>();

	QualType LHSElem = LHSComplex ? LHSComplex->getElementType() : LHSType;
	QualType RHSElem = RHSComplex ? RHSComplex->getElementType() : RHSType;

	const llvm::fltSemantics &LHSSem = S.Context.getFloatTypeSemantics(LHSElem);
	const llvm::fltSemantics &RHSSem = S.Context.getFloatTypeSemantics(RHSElem);

	if ((&LHSSem != &llvm::APFloat::PPCDoubleDouble() \|\|
	&RHSSem != &llvm::APFloat::IEEEquad()) &&
	(&LHSSem != &llvm::APFloat::IEEEquad() \|\|
	&RHSSem != &llvm::APFloat::PPCDoubleDouble()))
	return false;

	return true;
	}

	typedef ExprResult PerformCastFn(Sema &S, Expr *operand, QualType toType);

	namespace {
	/// These helper callbacks are placed in an anonymous namespace to
	/// permit their use as function template parameters.
	ExprResult doIntegralCast(Sema &S, Expr *op, QualType toType) {
	return S.ImpCastExprToType(op, toType, CK_IntegralCast);
	}

	ExprResult doComplexIntegralCast(Sema &S, Expr *op, QualType toType) {
	return S.ImpCastExprToType(op, S.Context.getComplexType(toType),
	CK_IntegralComplexCast);
	}
	}

	/// Handle integer arithmetic conversions. Helper function of
	/// UsualArithmeticConversions()
	template <PerformCastFn doLHSCast, PerformCastFn doRHSCast>
	static QualType handleIntegerConversion(Sema &S, ExprResult &LHS,
	ExprResult &RHS, QualType LHSType,
	QualType RHSType, bool IsCompAssign) {
	// The rules for this case are in C99 6.3.1.8
	int order = S.Context.getIntegerTypeOrder(LHSType, RHSType);
	bool LHSSigned = LHSType->hasSignedIntegerRepresentation();
	bool RHSSigned = RHSType->hasSignedIntegerRepresentation();
	if (LHSSigned == RHSSigned) {
	// Same signedness; use the higher-ranked type
	if (order >= 0) {
	RHS = (*doRHSCast)(S, RHS.get(), LHSType);
	return LHSType;
	} else if (!IsCompAssign)
	LHS = (*doLHSCast)(S, LHS.get(), RHSType);
	return RHSType;
	} else if (order != (LHSSigned ? 1 : -1)) {
	// The unsigned type has greater than or equal rank to the
	// signed type, so use the unsigned type
	if (RHSSigned) {
	RHS = (*doRHSCast)(S, RHS.get(), LHSType);
	return LHSType;
	} else if (!IsCompAssign)
	LHS = (*doLHSCast)(S, LHS.get(), RHSType);
	return RHSType;
	} else if (S.Context.getIntWidth(LHSType) != S.Context.getIntWidth(RHSType)) {
	// The two types are different widths; if we are here, that
	// means the signed type is larger than the unsigned type, so
	// use the signed type.
	if (LHSSigned) {
	RHS = (*doRHSCast)(S, RHS.get(), LHSType);
	return LHSType;
	} else if (!IsCompAssign)
	LHS = (*doLHSCast)(S, LHS.get(), RHSType);
	return RHSType;
	} else {
	// The signed type is higher-ranked than the unsigned type,
	// but isn't actually any bigger (like unsigned int and long
	// on most 32-bit systems). Use the unsigned type corresponding
	// to the signed type.
	QualType result =
	S.Context.getCorrespondingUnsignedType(LHSSigned ? LHSType : RHSType);
	RHS = (*doRHSCast)(S, RHS.get(), result);
	if (!IsCompAssign)
	LHS = (*doLHSCast)(S, LHS.get(), result);
	return result;
	}
	}

	/// Handle conversions with GCC complex int extension. Helper function
	/// of UsualArithmeticConversions()
	static QualType handleComplexIntConversion(Sema &S, ExprResult &LHS,
	ExprResult &RHS, QualType LHSType,
	QualType RHSType,
	bool IsCompAssign) {
	const ComplexType *LHSComplexInt = LHSType->getAsComplexIntegerType();
	const ComplexType *RHSComplexInt = RHSType->getAsComplexIntegerType();

	if (LHSComplexInt && RHSComplexInt) {
	QualType LHSEltType = LHSComplexInt->getElementType();
	QualType RHSEltType = RHSComplexInt->getElementType();
	QualType ScalarType =
	handleIntegerConversion<doComplexIntegralCast, doComplexIntegralCast>
	(S, LHS, RHS, LHSEltType, RHSEltType, IsCompAssign);

	return S.Context.getComplexType(ScalarType);
	}

	if (LHSComplexInt) {
	QualType LHSEltType = LHSComplexInt->getElementType();
	QualType ScalarType =
	handleIntegerConversion<doComplexIntegralCast, doIntegralCast>
	(S, LHS, RHS, LHSEltType, RHSType, IsCompAssign);
	QualType ComplexType = S.Context.getComplexType(ScalarType);
	RHS = S.ImpCastExprToType(RHS.get(), ComplexType,
	CK_IntegralRealToComplex);

	return ComplexType;
	}

	assert(RHSComplexInt);

	QualType RHSEltType = RHSComplexInt->getElementType();
	QualType ScalarType =
	handleIntegerConversion<doIntegralCast, doComplexIntegralCast>
	(S, LHS, RHS, LHSType, RHSEltType, IsCompAssign);
	QualType ComplexType = S.Context.getComplexType(ScalarType);

	if (!IsCompAssign)
	LHS = S.ImpCastExprToType(LHS.get(), ComplexType,
	CK_IntegralRealToComplex);
	return ComplexType;
	}

	/// Return the rank of a given fixed point or integer type. The value itself
	/// doesn't matter, but the values must be increasing with proper increasing
	/// rank as described in N1169 4.1.1.
	static unsigned GetFixedPointRank(QualType Ty) {
	const auto *BTy = Ty->getAs<BuiltinType>();
	assert(BTy && "Expected a builtin type.");

	switch (BTy->getKind()) {
	case BuiltinType::ShortFract:
	case BuiltinType::UShortFract:
	case BuiltinType::SatShortFract:
	case BuiltinType::SatUShortFract:
	return 1;
	case BuiltinType::Fract:
	case BuiltinType::UFract:
	case BuiltinType::SatFract:
	case BuiltinType::SatUFract:
	return 2;
	case BuiltinType::LongFract:
	case BuiltinType::ULongFract:
	case BuiltinType::SatLongFract:
	case BuiltinType::SatULongFract:
	return 3;
	case BuiltinType::ShortAccum:
	case BuiltinType::UShortAccum:
	case BuiltinType::SatShortAccum:
	case BuiltinType::SatUShortAccum:
	return 4;
	case BuiltinType::Accum:
	case BuiltinType::UAccum:
	case BuiltinType::SatAccum:
	case BuiltinType::SatUAccum:
	return 5;
	case BuiltinType::LongAccum:
	case BuiltinType::ULongAccum:
	case BuiltinType::SatLongAccum:
	case BuiltinType::SatULongAccum:
	return 6;
	default:
	if (BTy->isInteger())
	return 0;
	llvm_unreachable("Unexpected fixed point or integer type");
	}
	}

	/// handleFixedPointConversion - Fixed point operations between fixed
	/// point types and integers or other fixed point types do not fall under
	/// usual arithmetic conversion since these conversions could result in loss
	/// of precsision (N1169 4.1.4). These operations should be calculated with
	/// the full precision of their result type (N1169 4.1.6.2.1).
	static QualType handleFixedPointConversion(Sema &S, QualType LHSTy,
	QualType RHSTy) {
	assert((LHSTy->isFixedPointType() \|\| RHSTy->isFixedPointType()) &&
	"Expected at least one of the operands to be a fixed point type");
	assert((LHSTy->isFixedPointOrIntegerType() \|\|
	RHSTy->isFixedPointOrIntegerType()) &&
	"Special fixed point arithmetic operation conversions are only "
	"applied to ints or other fixed point types");

	// If one operand has signed fixed-point type and the other operand has
	// unsigned fixed-point type, then the unsigned fixed-point operand is
	// converted to its corresponding signed fixed-point type and the resulting
	// type is the type of the converted operand.
	if (RHSTy->isSignedFixedPointType() && LHSTy->isUnsignedFixedPointType())
	LHSTy = S.Context.getCorrespondingSignedFixedPointType(LHSTy);
	else if (RHSTy->isUnsignedFixedPointType() && LHSTy->isSignedFixedPointType())
	RHSTy = S.Context.getCorrespondingSignedFixedPointType(RHSTy);

	// The result type is the type with the highest rank, whereby a fixed-point
	// conversion rank is always greater than an integer conversion rank; if the
	// type of either of the operands is a saturating fixedpoint type, the result
	// type shall be the saturating fixed-point type corresponding to the type
	// with the highest rank; the resulting value is converted (taking into
	// account rounding and overflow) to the precision of the resulting type.
	// Same ranks between signed and unsigned types are resolved earlier, so both
	// types are either signed or both unsigned at this point.
	unsigned LHSTyRank = GetFixedPointRank(LHSTy);
	unsigned RHSTyRank = GetFixedPointRank(RHSTy);

	QualType ResultTy = LHSTyRank > RHSTyRank ? LHSTy : RHSTy;

	if (LHSTy->isSaturatedFixedPointType() \|\| RHSTy->isSaturatedFixedPointType())
	ResultTy = S.Context.getCorrespondingSaturatedType(ResultTy);

	return ResultTy;
	}

	/// Check that the usual arithmetic conversions can be performed on this pair of
	/// expressions that might be of enumeration type.
	static void checkEnumArithmeticConversions(Sema &S, Expr LHS, Expr RHS,
	SourceLocation Loc,
	Sema::ArithConvKind ACK) {
	// C++2a [expr.arith.conv]p1:
	// If one operand is of enumeration type and the other operand is of a
	// different enumeration type or a floating-point type, this behavior is
	// deprecated ([depr.arith.conv.enum]).
	//
	// Warn on this in all language modes. Produce a deprecation warning in C++20.
	// Eventually we will presumably reject these cases (in C++23 onwards?).
	QualType L = LHS->getEnumCoercedType(S.Context),
	R = RHS->getEnumCoercedType(S.Context);
	bool LEnum = L->isUnscopedEnumerationType(),
	REnum = R->isUnscopedEnumerationType();
	bool IsCompAssign = ACK == Sema::ACK_CompAssign;
	if ((!IsCompAssign && LEnum && R->isFloatingType()) \|\|
	(REnum && L->isFloatingType())) {
	S.Diag(Loc, S.getLangOpts().CPlusPlus26
	? diag::err_arith_conv_enum_float_cxx26
	: S.getLangOpts().CPlusPlus20
	? diag::warn_arith_conv_enum_float_cxx20
	: diag::warn_arith_conv_enum_float)
	<< LHS->getSourceRange() << RHS->getSourceRange() << (int)ACK << LEnum
	<< L << R;
	} else if (!IsCompAssign && LEnum && REnum &&
	!S.Context.hasSameUnqualifiedType(L, R)) {
	unsigned DiagID;
	// In C++ 26, usual arithmetic conversions between 2 different enum types
	// are ill-formed.
	if (S.getLangOpts().CPlusPlus26)
	DiagID = diag::err_conv_mixed_enum_types_cxx26;
	else if (!L->castAs<EnumType>()->getDecl()->hasNameForLinkage() \|\|
	!R->castAs<EnumType>()->getDecl()->hasNameForLinkage()) {
	// If either enumeration type is unnamed, it's less likely that the
	// user cares about this, but this situation is still deprecated in
	// C++2a. Use a different warning group.
	DiagID = S.getLangOpts().CPlusPlus20
	? diag::warn_arith_conv_mixed_anon_enum_types_cxx20
	: diag::warn_arith_conv_mixed_anon_enum_types;
	} else if (ACK == Sema::ACK_Conditional) {
	// Conditional expressions are separated out because they have
	// historically had a different warning flag.
	DiagID = S.getLangOpts().CPlusPlus20
	? diag::warn_conditional_mixed_enum_types_cxx20
	: diag::warn_conditional_mixed_enum_types;
	} else if (ACK == Sema::ACK_Comparison) {
	// Comparison expressions are separated out because they have
	// historically had a different warning flag.
	DiagID = S.getLangOpts().CPlusPlus20
	? diag::warn_comparison_mixed_enum_types_cxx20
	: diag::warn_comparison_mixed_enum_types;
	} else {
	DiagID = S.getLangOpts().CPlusPlus20
	? diag::warn_arith_conv_mixed_enum_types_cxx20
	: diag::warn_arith_conv_mixed_enum_types;
	}
	S.Diag(Loc, DiagID) << LHS->getSourceRange() << RHS->getSourceRange()
	<< (int)ACK << L << R;
	}
	}

	/// UsualArithmeticConversions - Performs various conversions that are common to
	/// binary operators (C99 6.3.1.8). If both operands aren't arithmetic, this
	/// routine returns the first non-arithmetic type found. The client is
	/// responsible for emitting appropriate error diagnostics.
	QualType Sema::UsualArithmeticConversions(ExprResult &LHS, ExprResult &RHS,
	SourceLocation Loc,
	ArithConvKind ACK) {
	checkEnumArithmeticConversions(*this, LHS.get(), RHS.get(), Loc, ACK);

	if (ACK != ACK_CompAssign) {
	LHS = UsualUnaryConversions(LHS.get());
	if (LHS.isInvalid())
	return QualType();
	}

	RHS = UsualUnaryConversions(RHS.get());
	if (RHS.isInvalid())
	return QualType();

	// For conversion purposes, we ignore any qualifiers.
	// For example, "const float" and "float" are equivalent.
	QualType LHSType = LHS.get()->getType().getUnqualifiedType();
	QualType RHSType = RHS.get()->getType().getUnqualifiedType();

	// For conversion purposes, we ignore any atomic qualifier on the LHS.
	if (const AtomicType *AtomicLHS = LHSType->getAs<AtomicType>())
	LHSType = AtomicLHS->getValueType();

	// If both types are identical, no conversion is needed.
	if (Context.hasSameType(LHSType, RHSType))
	return Context.getCommonSugaredType(LHSType, RHSType);

	// If either side is a non-arithmetic type (e.g. a pointer), we are done.
	// The caller can deal with this (e.g. pointer + int).
	if (!LHSType->isArithmeticType() \|\| !RHSType->isArithmeticType())
	return QualType();

	// Apply unary and bitfield promotions to the LHS's type.
	QualType LHSUnpromotedType = LHSType;
	if (Context.isPromotableIntegerType(LHSType))
	LHSType = Context.getPromotedIntegerType(LHSType);
	QualType LHSBitfieldPromoteTy = Context.isPromotableBitField(LHS.get());
	if (!LHSBitfieldPromoteTy.isNull())
	LHSType = LHSBitfieldPromoteTy;
	if (LHSType != LHSUnpromotedType && ACK != ACK_CompAssign)
	LHS = ImpCastExprToType(LHS.get(), LHSType, CK_IntegralCast);

	// If both types are identical, no conversion is needed.
	if (Context.hasSameType(LHSType, RHSType))
	return Context.getCommonSugaredType(LHSType, RHSType);

	// At this point, we have two different arithmetic types.

	// Diagnose attempts to convert between __ibm128, __float128 and long double
	// where such conversions currently can't be handled.
	if (unsupportedTypeConversion(*this, LHSType, RHSType))
	return QualType();

	// Handle complex types first (C99 6.3.1.8p1).
	if (LHSType->isComplexType() \|\| RHSType->isComplexType())
	return handleComplexConversion(*this, LHS, RHS, LHSType, RHSType,
	ACK == ACK_CompAssign);

	// Now handle "real" floating types (i.e. float, double, long double).
	if (LHSType->isRealFloatingType() \|\| RHSType->isRealFloatingType())
	return handleFloatConversion(*this, LHS, RHS, LHSType, RHSType,
	ACK == ACK_CompAssign);

	// Handle GCC complex int extension.
	if (LHSType->isComplexIntegerType() \|\| RHSType->isComplexIntegerType())
	return handleComplexIntConversion(*this, LHS, RHS, LHSType, RHSType,
	ACK == ACK_CompAssign);

	if (LHSType->isFixedPointType() \|\| RHSType->isFixedPointType())
	return handleFixedPointConversion(*this, LHSType, RHSType);

	// Finally, we have two differing integer types.
	return handleIntegerConversion<doIntegralCast, doIntegralCast>
	(*this, LHS, RHS, LHSType, RHSType, ACK == ACK_CompAssign);
	}

	//===----------------------------------------------------------------------===//
	// Semantic Analysis for various Expression Types
	//===----------------------------------------------------------------------===//


	ExprResult Sema::ActOnGenericSelectionExpr(
	SourceLocation KeyLoc, SourceLocation DefaultLoc, SourceLocation RParenLoc,
	bool PredicateIsExpr, void *ControllingExprOrType,
	ArrayRef<ParsedType> ArgTypes, ArrayRef<Expr *> ArgExprs) {
	unsigned NumAssocs = ArgTypes.size();
	assert(NumAssocs == ArgExprs.size());

	TypeSourceInfo *Types = new TypeSourceInfo[NumAssocs];
	for (unsigned i = 0; i < NumAssocs; ++i) {
	if (ArgTypes[i])
	(void) GetTypeFromParser(ArgTypes[i], &Types[i]);
	else
	Types[i] = nullptr;
	}

	// If we have a controlling type, we need to convert it from a parsed type
	// into a semantic type and then pass that along.
	if (!PredicateIsExpr) {
	TypeSourceInfo *ControllingType;
	(void)GetTypeFromParser(ParsedType::getFromOpaquePtr(ControllingExprOrType),
	&ControllingType);
	assert(ControllingType && "couldn't get the type out of the parser");
	ControllingExprOrType = ControllingType;
	}

	ExprResult ER = CreateGenericSelectionExpr(
	KeyLoc, DefaultLoc, RParenLoc, PredicateIsExpr, ControllingExprOrType,
	llvm::ArrayRef(Types, NumAssocs), ArgExprs);
	delete [] Types;
	return ER;
	}

	ExprResult Sema::CreateGenericSelectionExpr(
	SourceLocation KeyLoc, SourceLocation DefaultLoc, SourceLocation RParenLoc,
	bool PredicateIsExpr, void *ControllingExprOrType,
	ArrayRef<TypeSourceInfo > Types, ArrayRef<Expr > Exprs) {
	unsigned NumAssocs = Types.size();
	assert(NumAssocs == Exprs.size());
	assert(ControllingExprOrType &&
	"Must have either a controlling expression or a controlling type");

	Expr *ControllingExpr = nullptr;
	TypeSourceInfo *ControllingType = nullptr;
	if (PredicateIsExpr) {
	// Decay and strip qualifiers for the controlling expression type, and
	// handle placeholder type replacement. See committee discussion from WG14
	// DR423.
	EnterExpressionEvaluationContext Unevaluated(
	*this, Sema::ExpressionEvaluationContext::Unevaluated);
	ExprResult R = DefaultFunctionArrayLvalueConversion(
	reinterpret_cast<Expr *>(ControllingExprOrType));
	if (R.isInvalid())
	return ExprError();
	ControllingExpr = R.get();
	} else {
	// The extension form uses the type directly rather than converting it.
	ControllingType = reinterpret_cast<TypeSourceInfo *>(ControllingExprOrType);
	if (!ControllingType)
	return ExprError();
	}

	bool TypeErrorFound = false,
	IsResultDependent = ControllingExpr
	? ControllingExpr->isTypeDependent()
	: ControllingType->getType()->isDependentType(),
	ContainsUnexpandedParameterPack =
	ControllingExpr
	? ControllingExpr->containsUnexpandedParameterPack()
	: ControllingType->getType()->containsUnexpandedParameterPack();

	// The controlling expression is an unevaluated operand, so side effects are
	// likely unintended.
	if (!inTemplateInstantiation() && !IsResultDependent && ControllingExpr &&
	ControllingExpr->HasSideEffects(Context, false))
	Diag(ControllingExpr->getExprLoc(),
	diag::warn_side_effects_unevaluated_context);

	for (unsigned i = 0; i < NumAssocs; ++i) {
	if (Exprs[i]->containsUnexpandedParameterPack())
	ContainsUnexpandedParameterPack = true;

	if (Types[i]) {
	if (Types[i]->getType()->containsUnexpandedParameterPack())
	ContainsUnexpandedParameterPack = true;

	if (Types[i]->getType()->isDependentType()) {
	IsResultDependent = true;
	} else {
	// We relax the restriction on use of incomplete types and non-object
	// types with the type-based extension of _Generic. Allowing incomplete
	// objects means those can be used as "tags" for a type-safe way to map
	// to a value. Similarly, matching on function types rather than
	// function pointer types can be useful. However, the restriction on VM
	// types makes sense to retain as there are open questions about how
	// the selection can be made at compile time.
	//
	// C11 6.5.1.1p2 "The type name in a generic association shall specify a
	// complete object type other than a variably modified type."
	unsigned D = 0;
	if (ControllingExpr && Types[i]->getType()->isIncompleteType())
	D = diag::err_assoc_type_incomplete;
	else if (ControllingExpr && !Types[i]->getType()->isObjectType())
	D = diag::err_assoc_type_nonobject;
	else if (Types[i]->getType()->isVariablyModifiedType())
	D = diag::err_assoc_type_variably_modified;
	else if (ControllingExpr) {
	// Because the controlling expression undergoes lvalue conversion,
	// array conversion, and function conversion, an association which is
	// of array type, function type, or is qualified can never be
	// reached. We will warn about this so users are less surprised by
	// the unreachable association. However, we don't have to handle
	// function types; that's not an object type, so it's handled above.
	//
	// The logic is somewhat different for C++ because C++ has different
	// lvalue to rvalue conversion rules than C. [conv.lvalue]p1 says,
	// If T is a non-class type, the type of the prvalue is the cv-
	// unqualified version of T. Otherwise, the type of the prvalue is T.
	// The result of these rules is that all qualified types in an
	// association in C are unreachable, and in C++, only qualified non-
	// class types are unreachable.
	//
	// NB: this does not apply when the first operand is a type rather
	// than an expression, because the type form does not undergo
	// conversion.
	unsigned Reason = 0;
	QualType QT = Types[i]->getType();
	if (QT->isArrayType())
	Reason = 1;
	else if (QT.hasQualifiers() &&
	(!LangOpts.CPlusPlus \|\| !QT->isRecordType()))
	Reason = 2;

	if (Reason)
	Diag(Types[i]->getTypeLoc().getBeginLoc(),
	diag::warn_unreachable_association)
	<< QT << (Reason - 1);
	}

	if (D != 0) {
	Diag(Types[i]->getTypeLoc().getBeginLoc(), D)
	<< Types[i]->getTypeLoc().getSourceRange()
	<< Types[i]->getType();
	TypeErrorFound = true;
	}

	// C11 6.5.1.1p2 "No two generic associations in the same generic
	// selection shall specify compatible types."
	for (unsigned j = i+1; j < NumAssocs; ++j)
	if (Types[j] && !Types[j]->getType()->isDependentType() &&
	Context.typesAreCompatible(Types[i]->getType(),
	Types[j]->getType())) {
	Diag(Types[j]->getTypeLoc().getBeginLoc(),
	diag::err_assoc_compatible_types)
	<< Types[j]->getTypeLoc().getSourceRange()
	<< Types[j]->getType()
	<< Types[i]->getType();
	Diag(Types[i]->getTypeLoc().getBeginLoc(),
	diag::note_compat_assoc)
	<< Types[i]->getTypeLoc().getSourceRange()
	<< Types[i]->getType();
	TypeErrorFound = true;
	}
	}
	}
	}
	if (TypeErrorFound)
	return ExprError();

	// If we determined that the generic selection is result-dependent, don't
	// try to compute the result expression.
	if (IsResultDependent) {
	if (ControllingExpr)
	return GenericSelectionExpr::Create(Context, KeyLoc, ControllingExpr,
	Types, Exprs, DefaultLoc, RParenLoc,
	ContainsUnexpandedParameterPack);
	return GenericSelectionExpr::Create(Context, KeyLoc, ControllingType, Types,
	Exprs, DefaultLoc, RParenLoc,
	ContainsUnexpandedParameterPack);
	}

	SmallVector<unsigned, 1> CompatIndices;
	unsigned DefaultIndex = -1U;
	// Look at the canonical type of the controlling expression in case it was a
	// deduced type like __auto_type. However, when issuing diagnostics, use the
	// type the user wrote in source rather than the canonical one.
	for (unsigned i = 0; i < NumAssocs; ++i) {
	if (!Types[i])
	DefaultIndex = i;
	else if (ControllingExpr &&
	Context.typesAreCompatible(
	ControllingExpr->getType().getCanonicalType(),
	Types[i]->getType()))
	CompatIndices.push_back(i);
	else if (ControllingType &&
	Context.typesAreCompatible(
	ControllingType->getType().getCanonicalType(),
	Types[i]->getType()))
	CompatIndices.push_back(i);
	}

	auto GetControllingRangeAndType = [](Expr *ControllingExpr,
	TypeSourceInfo *ControllingType) {
	// We strip parens here because the controlling expression is typically
	// parenthesized in macro definitions.
	if (ControllingExpr)
	ControllingExpr = ControllingExpr->IgnoreParens();

	SourceRange SR = ControllingExpr
	? ControllingExpr->getSourceRange()
	: ControllingType->getTypeLoc().getSourceRange();
	QualType QT = ControllingExpr ? ControllingExpr->getType()
	: ControllingType->getType();

	return std::make_pair(SR, QT);
	};

	// C11 6.5.1.1p2 "The controlling expression of a generic selection shall have
	// type compatible with at most one of the types named in its generic
	// association list."
	if (CompatIndices.size() > 1) {
	auto P = GetControllingRangeAndType(ControllingExpr, ControllingType);
	SourceRange SR = P.first;
	Diag(SR.getBegin(), diag::err_generic_sel_multi_match)
	<< SR << P.second << (unsigned)CompatIndices.size();
	for (unsigned I : CompatIndices) {
	Diag(Types[I]->getTypeLoc().getBeginLoc(),
	diag::note_compat_assoc)
	<< Types[I]->getTypeLoc().getSourceRange()
	<< Types[I]->getType();
	}
	return ExprError();
	}

	// C11 6.5.1.1p2 "If a generic selection has no default generic association,
	// its controlling expression shall have type compatible with exactly one of
	// the types named in its generic association list."
	if (DefaultIndex == -1U && CompatIndices.size() == 0) {
	auto P = GetControllingRangeAndType(ControllingExpr, ControllingType);
	SourceRange SR = P.first;
	Diag(SR.getBegin(), diag::err_generic_sel_no_match) << SR << P.second;
	return ExprError();
	}

	// C11 6.5.1.1p3 "If a generic selection has a generic association with a
	// type name that is compatible with the type of the controlling expression,
	// then the result expression of the generic selection is the expression
	// in that generic association. Otherwise, the result expression of the
	// generic selection is the expression in the default generic association."
	unsigned ResultIndex =
	CompatIndices.size() ? CompatIndices[0] : DefaultIndex;

	if (ControllingExpr) {
	return GenericSelectionExpr::Create(
	Context, KeyLoc, ControllingExpr, Types, Exprs, DefaultLoc, RParenLoc,
	ContainsUnexpandedParameterPack, ResultIndex);
	}
	return GenericSelectionExpr::Create(
	Context, KeyLoc, ControllingType, Types, Exprs, DefaultLoc, RParenLoc,
	ContainsUnexpandedParameterPack, ResultIndex);
	}

	static PredefinedIdentKind getPredefinedExprKind(tok::TokenKind Kind) {
	switch (Kind) {
	default:
	llvm_unreachable("unexpected TokenKind");
	case tok::kw___func__:
	return PredefinedIdentKind::Func; // [C99 6.4.2.2]
	case tok::kw___FUNCTION__:
	return PredefinedIdentKind::Function;
	case tok::kw___FUNCDNAME__:
	return PredefinedIdentKind::FuncDName; // [MS]
	case tok::kw___FUNCSIG__:
	return PredefinedIdentKind::FuncSig; // [MS]
	case tok::kw_L__FUNCTION__:
	return PredefinedIdentKind::LFunction; // [MS]
	case tok::kw_L__FUNCSIG__:
	return PredefinedIdentKind::LFuncSig; // [MS]
	case tok::kw___PRETTY_FUNCTION__:
	return PredefinedIdentKind::PrettyFunction; // [GNU]
	}
	}

	/// getPredefinedExprDecl - Returns Decl of a given DeclContext that can be used
	/// to determine the value of a PredefinedExpr. This can be either a
	/// block, lambda, captured statement, function, otherwise a nullptr.
	static Decl getPredefinedExprDecl(DeclContext DC) {
	while (DC && !isa<BlockDecl, CapturedDecl, FunctionDecl, ObjCMethodDecl>(DC))
	DC = DC->getParent();
	return cast_or_null<Decl>(DC);
	}

	/// getUDSuffixLoc - Create a SourceLocation for a ud-suffix, given the
	/// location of the token and the offset of the ud-suffix within it.
	static SourceLocation getUDSuffixLoc(Sema &S, SourceLocation TokLoc,
	unsigned Offset) {
	return Lexer::AdvanceToTokenCharacter(TokLoc, Offset, S.getSourceManager(),
	S.getLangOpts());
	}

	/// BuildCookedLiteralOperatorCall - A user-defined literal was found. Look up
	/// the corresponding cooked (non-raw) literal operator, and build a call to it.
	static ExprResult BuildCookedLiteralOperatorCall(Sema &S, Scope *Scope,
	IdentifierInfo *UDSuffix,
	SourceLocation UDSuffixLoc,
	ArrayRef<Expr*> Args,
	SourceLocation LitEndLoc) {
	assert(Args.size() <= 2 && "too many arguments for literal operator");

	QualType ArgTy[2];
	for (unsigned ArgIdx = 0; ArgIdx != Args.size(); ++ArgIdx) {
	ArgTy[ArgIdx] = Args[ArgIdx]->getType();
	if (ArgTy[ArgIdx]->isArrayType())
	ArgTy[ArgIdx] = S.Context.getArrayDecayedType(ArgTy[ArgIdx]);
	}

	DeclarationName OpName =
	S.Context.DeclarationNames.getCXXLiteralOperatorName(UDSuffix);
	DeclarationNameInfo OpNameInfo(OpName, UDSuffixLoc);
	OpNameInfo.setCXXLiteralOperatorNameLoc(UDSuffixLoc);

	LookupResult R(S, OpName, UDSuffixLoc, Sema::LookupOrdinaryName);
	if (S.LookupLiteralOperator(Scope, R, llvm::ArrayRef(ArgTy, Args.size()),
	/AllowRaw/ false, /AllowTemplate/ false,
	/AllowStringTemplatePack/ false,
	/DiagnoseMissing/ true) == Sema::LOLR_Error)
	return ExprError();

	return S.BuildLiteralOperatorCall(R, OpNameInfo, Args, LitEndLoc);
	}

	ExprResult Sema::ActOnUnevaluatedStringLiteral(ArrayRef<Token> StringToks) {
	// StringToks needs backing storage as it doesn't hold array elements itself
	std::vector<Token> ExpandedToks;
	if (getLangOpts().MicrosoftExt)
	StringToks = ExpandedToks = ExpandFunctionLocalPredefinedMacros(StringToks);

	StringLiteralParser Literal(StringToks, PP,
	StringLiteralEvalMethod::Unevaluated);
	if (Literal.hadError)
	return ExprError();

	SmallVector<SourceLocation, 4> StringTokLocs;
	for (const Token &Tok : StringToks)
	StringTokLocs.push_back(Tok.getLocation());

	StringLiteral *Lit = StringLiteral::Create(
	Context, Literal.GetString(), StringLiteralKind::Unevaluated, false, {},
	&StringTokLocs[0], StringTokLocs.size());

	if (!Literal.getUDSuffix().empty()) {
	SourceLocation UDSuffixLoc =
	getUDSuffixLoc(*this, StringTokLocs[Literal.getUDSuffixToken()],
	Literal.getUDSuffixOffset());
	return ExprError(Diag(UDSuffixLoc, diag::err_invalid_string_udl));
	}

	return Lit;
	}

	std::vector<Token>
	Sema::ExpandFunctionLocalPredefinedMacros(ArrayRef<Token> Toks) {
	// MSVC treats some predefined identifiers (e.g. __FUNCTION__) as function
	// local macros that expand to string literals that may be concatenated.
	// These macros are expanded here (in Sema), because StringLiteralParser
	// (in Lex) doesn't know the enclosing function (because it hasn't been
	// parsed yet).
	assert(getLangOpts().MicrosoftExt);

	// Note: Although function local macros are defined only inside functions,
	// we ensure a valid `CurrentDecl` even outside of a function. This allows
	// expansion of macros into empty string literals without additional checks.
	Decl *CurrentDecl = getPredefinedExprDecl(CurContext);
	if (!CurrentDecl)
	CurrentDecl = Context.getTranslationUnitDecl();

	std::vector<Token> ExpandedToks;
	ExpandedToks.reserve(Toks.size());
	for (const Token &Tok : Toks) {
	if (!isFunctionLocalStringLiteralMacro(Tok.getKind(), getLangOpts())) {
	assert(tok::isStringLiteral(Tok.getKind()));
	ExpandedToks.emplace_back(Tok);
	continue;
	}
	if (isa<TranslationUnitDecl>(CurrentDecl))
	Diag(Tok.getLocation(), diag::ext_predef_outside_function);
	// Stringify predefined expression
	Diag(Tok.getLocation(), diag::ext_string_literal_from_predefined)
	<< Tok.getKind();
	SmallString<64> Str;
	llvm::raw_svector_ostream OS(Str);
	Token &Exp = ExpandedToks.emplace_back();
	Exp.startToken();
	if (Tok.getKind() == tok::kw_L__FUNCTION__ \|\|
	Tok.getKind() == tok::kw_L__FUNCSIG__) {
	OS << 'L';
	Exp.setKind(tok::wide_string_literal);
	} else {
	Exp.setKind(tok::string_literal);
	}
	OS << '"'
	<< Lexer::Stringify(PredefinedExpr::ComputeName(
	getPredefinedExprKind(Tok.getKind()), CurrentDecl))
	<< '"';
	PP.CreateString(OS.str(), Exp, Tok.getLocation(), Tok.getEndLoc());
	}
	return ExpandedToks;
	}

	ExprResult
	Sema::ActOnStringLiteral(ArrayRef<Token> StringToks, Scope *UDLScope) {
	assert(!StringToks.empty() && "Must have at least one string!");

	// StringToks needs backing storage as it doesn't hold array elements itself
	std::vector<Token> ExpandedToks;
	if (getLangOpts().MicrosoftExt)
	StringToks = ExpandedToks = ExpandFunctionLocalPredefinedMacros(StringToks);

	StringLiteralParser Literal(StringToks, PP);
	if (Literal.hadError)
	return ExprError();

	SmallVector<SourceLocation, 4> StringTokLocs;
	for (const Token &Tok : StringToks)
	StringTokLocs.push_back(Tok.getLocation());

	QualType CharTy = Context.CharTy;
	StringLiteralKind Kind = StringLiteralKind::Ordinary;
	if (Literal.isWide()) {
	CharTy = Context.getWideCharType();
	Kind = StringLiteralKind::Wide;
	} else if (Literal.isUTF8()) {
	if (getLangOpts().Char8)
	CharTy = Context.Char8Ty;
	else if (getLangOpts().C23)
	CharTy = Context.UnsignedCharTy;
	Kind = StringLiteralKind::UTF8;
	} else if (Literal.isUTF16()) {
	CharTy = Context.Char16Ty;
	Kind = StringLiteralKind::UTF16;
	} else if (Literal.isUTF32()) {
	CharTy = Context.Char32Ty;
	Kind = StringLiteralKind::UTF32;
	} else if (Literal.isPascal()) {
	CharTy = Context.UnsignedCharTy;
	}

	// Warn on u8 string literals before C++20 and C23, whose type
	// was an array of char before but becomes an array of char8_t.
	// In C++20, it cannot be used where a pointer to char is expected.
	// In C23, it might have an unexpected value if char was signed.
	if (Kind == StringLiteralKind::UTF8 &&
	(getLangOpts().CPlusPlus
	? !getLangOpts().CPlusPlus20 && !getLangOpts().Char8
	: !getLangOpts().C23)) {
	Diag(StringTokLocs.front(), getLangOpts().CPlusPlus
	? diag::warn_cxx20_compat_utf8_string
	: diag::warn_c23_compat_utf8_string);

	// Create removals for all 'u8' prefixes in the string literal(s). This
	// ensures C++20/C23 compatibility (but may change the program behavior when
	// built by non-Clang compilers for which the execution character set is
	// not always UTF-8).
	auto RemovalDiag = PDiag(diag::note_cxx20_c23_compat_utf8_string_remove_u8);
	SourceLocation RemovalDiagLoc;
	for (const Token &Tok : StringToks) {
	if (Tok.getKind() == tok::utf8_string_literal) {
	if (RemovalDiagLoc.isInvalid())
	RemovalDiagLoc = Tok.getLocation();
	RemovalDiag << FixItHint::CreateRemoval(CharSourceRange::getCharRange(
	Tok.getLocation(),
	Lexer::AdvanceToTokenCharacter(Tok.getLocation(), 2,
	getSourceManager(), getLangOpts())));
	}
	}
	Diag(RemovalDiagLoc, RemovalDiag);
	}

	QualType StrTy =
	Context.getStringLiteralArrayType(CharTy, Literal.GetNumStringChars());

	// Pass &StringTokLocs[0], StringTokLocs.size() to factory!
	StringLiteral *Lit = StringLiteral::Create(Context, Literal.GetString(),
	Kind, Literal.Pascal, StrTy,
	&StringTokLocs[0],
	StringTokLocs.size());
	if (Literal.getUDSuffix().empty())
	return Lit;

	// We're building a user-defined literal.
	IdentifierInfo *UDSuffix = &Context.Idents.get(Literal.getUDSuffix());
	SourceLocation UDSuffixLoc =
	getUDSuffixLoc(*this, StringTokLocs[Literal.getUDSuffixToken()],
	Literal.getUDSuffixOffset());

	// Make sure we're allowed user-defined literals here.
	if (!UDLScope)
	return ExprError(Diag(UDSuffixLoc, diag::err_invalid_string_udl));

	// C++11 [lex.ext]p5: The literal L is treated as a call of the form
	// operator "" X (str, len)
	QualType SizeType = Context.getSizeType();

	DeclarationName OpName =
	Context.DeclarationNames.getCXXLiteralOperatorName(UDSuffix);
	DeclarationNameInfo OpNameInfo(OpName, UDSuffixLoc);
	OpNameInfo.setCXXLiteralOperatorNameLoc(UDSuffixLoc);

	QualType ArgTy[] = {
	Context.getArrayDecayedType(StrTy), SizeType
	};

	LookupResult R(*this, OpName, UDSuffixLoc, LookupOrdinaryName);
	switch (LookupLiteralOperator(UDLScope, R, ArgTy,
	/AllowRaw/ false, /AllowTemplate/ true,
	/AllowStringTemplatePack/ true,
	/DiagnoseMissing/ true, Lit)) {

	case LOLR_Cooked: {
	llvm::APInt Len(Context.getIntWidth(SizeType), Literal.GetNumStringChars());
	IntegerLiteral *LenArg = IntegerLiteral::Create(Context, Len, SizeType,
	StringTokLocs[0]);
	Expr *Args[] = { Lit, LenArg };

	return BuildLiteralOperatorCall(R, OpNameInfo, Args, StringTokLocs.back());
	}

	case LOLR_Template: {
	TemplateArgumentListInfo ExplicitArgs;
	TemplateArgument Arg(Lit);
	TemplateArgumentLocInfo ArgInfo(Lit);
	ExplicitArgs.addArgument(TemplateArgumentLoc(Arg, ArgInfo));
	return BuildLiteralOperatorCall(R, OpNameInfo, std::nullopt,
	StringTokLocs.back(), &ExplicitArgs);
	}

	case LOLR_StringTemplatePack: {
	TemplateArgumentListInfo ExplicitArgs;

	unsigned CharBits = Context.getIntWidth(CharTy);
	bool CharIsUnsigned = CharTy->isUnsignedIntegerType();
	llvm::APSInt Value(CharBits, CharIsUnsigned);

	TemplateArgument TypeArg(CharTy);
	TemplateArgumentLocInfo TypeArgInfo(Context.getTrivialTypeSourceInfo(CharTy));
	ExplicitArgs.addArgument(TemplateArgumentLoc(TypeArg, TypeArgInfo));

	for (unsigned I = 0, N = Lit->getLength(); I != N; ++I) {
	Value = Lit->getCodeUnit(I);
	TemplateArgument Arg(Context, Value, CharTy);
	TemplateArgumentLocInfo ArgInfo;
	ExplicitArgs.addArgument(TemplateArgumentLoc(Arg, ArgInfo));
	}
	return BuildLiteralOperatorCall(R, OpNameInfo, std::nullopt,
	StringTokLocs.back(), &ExplicitArgs);
	}
	case LOLR_Raw:
	case LOLR_ErrorNoDiagnostic:
	llvm_unreachable("unexpected literal operator lookup result");
	case LOLR_Error:
	return ExprError();
	}
	llvm_unreachable("unexpected literal operator lookup result");
	}

	DeclRefExpr *
	Sema::BuildDeclRefExpr(ValueDecl *D, QualType Ty, ExprValueKind VK,
	SourceLocation Loc,
	const CXXScopeSpec *SS) {
	DeclarationNameInfo NameInfo(D->getDeclName(), Loc);
	return BuildDeclRefExpr(D, Ty, VK, NameInfo, SS);
	}

	DeclRefExpr *
	Sema::BuildDeclRefExpr(ValueDecl *D, QualType Ty, ExprValueKind VK,
	const DeclarationNameInfo &NameInfo,
	const CXXScopeSpec SS, NamedDecl FoundD,
	SourceLocation TemplateKWLoc,
	const TemplateArgumentListInfo *TemplateArgs) {
	NestedNameSpecifierLoc NNS =
	SS ? SS->getWithLocInContext(Context) : NestedNameSpecifierLoc();
	return BuildDeclRefExpr(D, Ty, VK, NameInfo, NNS, FoundD, TemplateKWLoc,
	TemplateArgs);
	}

	// CUDA/HIP: Check whether a captured reference variable is referencing a
	// host variable in a device or host device lambda.
	static bool isCapturingReferenceToHostVarInCUDADeviceLambda(const Sema &S,
	VarDecl *VD) {
	if (!S.getLangOpts().CUDA \|\| !VD->hasInit())
	return false;
	assert(VD->getType()->isReferenceType());

	// Check whether the reference variable is referencing a host variable.
	auto *DRE = dyn_cast<DeclRefExpr>(VD->getInit());
	if (!DRE)
	return false;
	auto *Referee = dyn_cast<VarDecl>(DRE->getDecl());
	if (!Referee \|\| !Referee->hasGlobalStorage() \|\|
	Referee->hasAttr<CUDADeviceAttr>())
	return false;

	// Check whether the current function is a device or host device lambda.
	// Check whether the reference variable is a capture by getDeclContext()
	// since refersToEnclosingVariableOrCapture() is not ready at this point.
	auto *MD = dyn_cast_or_null<CXXMethodDecl>(S.CurContext);
	if (MD && MD->getParent()->isLambda() &&
	MD->getOverloadedOperator() == OO_Call && MD->hasAttr<CUDADeviceAttr>() &&
	VD->getDeclContext() != MD)
	return true;

	return false;
	}

	NonOdrUseReason Sema::getNonOdrUseReasonInCurrentContext(ValueDecl *D) {
	// A declaration named in an unevaluated operand never constitutes an odr-use.
	if (isUnevaluatedContext())
	return NOUR_Unevaluated;

	// C++2a [basic.def.odr]p4:
	// A variable x whose name appears as a potentially-evaluated expression e
	// is odr-used by e unless [...] x is a reference that is usable in
	// constant expressions.
	// CUDA/HIP:
	// If a reference variable referencing a host variable is captured in a
	// device or host device lambda, the value of the referee must be copied
	// to the capture and the reference variable must be treated as odr-use
	// since the value of the referee is not known at compile time and must
	// be loaded from the captured.
	if (VarDecl *VD = dyn_cast<VarDecl>(D)) {
	if (VD->getType()->isReferenceType() &&
	!(getLangOpts().OpenMP && OpenMP().isOpenMPCapturedDecl(D)) &&
	!isCapturingReferenceToHostVarInCUDADeviceLambda(*this, VD) &&
	VD->isUsableInConstantExpressions(Context))
	return NOUR_Constant;
	}

	// All remaining non-variable cases constitute an odr-use. For variables, we
	// need to wait and see how the expression is used.
	return NOUR_None;
	}

	DeclRefExpr *
	Sema::BuildDeclRefExpr(ValueDecl *D, QualType Ty, ExprValueKind VK,
	const DeclarationNameInfo &NameInfo,
	NestedNameSpecifierLoc NNS, NamedDecl *FoundD,
	SourceLocation TemplateKWLoc,
	const TemplateArgumentListInfo *TemplateArgs) {
	bool RefersToCapturedVariable = isa<VarDecl, BindingDecl>(D) &&
	NeedToCaptureVariable(D, NameInfo.getLoc());

	DeclRefExpr *E = DeclRefExpr::Create(
	Context, NNS, TemplateKWLoc, D, RefersToCapturedVariable, NameInfo, Ty,
	VK, FoundD, TemplateArgs, getNonOdrUseReasonInCurrentContext(D));
	MarkDeclRefReferenced(E);

	// C++ [except.spec]p17:
	// An exception-specification is considered to be needed when:
	// - in an expression, the function is the unique lookup result or
	// the selected member of a set of overloaded functions.
	//
	// We delay doing this until after we've built the function reference and
	// marked it as used so that:
	// a) if the function is defaulted, we get errors from defining it before /
	// instead of errors from computing its exception specification, and
	// b) if the function is a defaulted comparison, we can use the body we
	// build when defining it as input to the exception specification
	// computation rather than computing a new body.
	if (const auto *FPT = Ty->getAs<FunctionProtoType>()) {
	if (isUnresolvedExceptionSpec(FPT->getExceptionSpecType())) {
	if (const auto *NewFPT = ResolveExceptionSpec(NameInfo.getLoc(), FPT))
	E->setType(Context.getQualifiedType(NewFPT, Ty.getQualifiers()));
	}
	}

	if (getLangOpts().ObjCWeak && isa<VarDecl>(D) &&
	Ty.getObjCLifetime() == Qualifiers::OCL_Weak && !isUnevaluatedContext() &&
	!Diags.isIgnored(diag::warn_arc_repeated_use_of_weak, E->getBeginLoc()))
	getCurFunction()->recordUseOfWeak(E);

	const auto *FD = dyn_cast<FieldDecl>(D);
	if (const auto *IFD = dyn_cast<IndirectFieldDecl>(D))
	FD = IFD->getAnonField();
	if (FD) {
	UnusedPrivateFields.remove(FD);
	// Just in case we're building an illegal pointer-to-member.
	if (FD->isBitField())
	E->setObjectKind(OK_BitField);
	}

	// C++ [expr.prim]/8: The expression [...] is a bit-field if the identifier
	// designates a bit-field.
	if (const auto *BD = dyn_cast<BindingDecl>(D))
	if (const auto *BE = BD->getBinding())
	E->setObjectKind(BE->getObjectKind());

	return E;
	}

	void
	Sema::DecomposeUnqualifiedId(const UnqualifiedId &Id,
	TemplateArgumentListInfo &Buffer,
	DeclarationNameInfo &NameInfo,
	const TemplateArgumentListInfo *&TemplateArgs) {
	if (Id.getKind() == UnqualifiedIdKind::IK_TemplateId) {
	Buffer.setLAngleLoc(Id.TemplateId->LAngleLoc);
	Buffer.setRAngleLoc(Id.TemplateId->RAngleLoc);

	ASTTemplateArgsPtr TemplateArgsPtr(Id.TemplateId->getTemplateArgs(),
	Id.TemplateId->NumArgs);
	translateTemplateArguments(TemplateArgsPtr, Buffer);

	TemplateName TName = Id.TemplateId->Template.get();
	SourceLocation TNameLoc = Id.TemplateId->TemplateNameLoc;
	NameInfo = Context.getNameForTemplate(TName, TNameLoc);
	TemplateArgs = &Buffer;
	} else {
	NameInfo = GetNameFromUnqualifiedId(Id);
	TemplateArgs = nullptr;
	}
	}

	static void emitEmptyLookupTypoDiagnostic(
	const TypoCorrection &TC, Sema &SemaRef, const CXXScopeSpec &SS,
	DeclarationName Typo, SourceLocation TypoLoc, ArrayRef<Expr *> Args,
	unsigned DiagnosticID, unsigned DiagnosticSuggestID) {
	DeclContext *Ctx =
	SS.isEmpty() ? nullptr : SemaRef.computeDeclContext(SS, false);
	if (!TC) {
	// Emit a special diagnostic for failed member lookups.
	// FIXME: computing the declaration context might fail here (?)
	if (Ctx)
	SemaRef.Diag(TypoLoc, diag::err_no_member) << Typo << Ctx
	<< SS.getRange();
	else
	SemaRef.Diag(TypoLoc, DiagnosticID) << Typo;
	return;
	}

	std::string CorrectedStr = TC.getAsString(SemaRef.getLangOpts());
	bool DroppedSpecifier =
	TC.WillReplaceSpecifier() && Typo.getAsString() == CorrectedStr;
	unsigned NoteID = TC.getCorrectionDeclAs<ImplicitParamDecl>()
	? diag::note_implicit_param_decl
	: diag::note_previous_decl;
	if (!Ctx)
	SemaRef.diagnoseTypo(TC, SemaRef.PDiag(DiagnosticSuggestID) << Typo,
	SemaRef.PDiag(NoteID));
	else
	SemaRef.diagnoseTypo(TC, SemaRef.PDiag(diag::err_no_member_suggest)
	<< Typo << Ctx << DroppedSpecifier
	<< SS.getRange(),
	SemaRef.PDiag(NoteID));
	}

	bool Sema::DiagnoseDependentMemberLookup(const LookupResult &R) {
	// During a default argument instantiation the CurContext points
	// to a CXXMethodDecl; but we can't apply a this-> fixit inside a
	// function parameter list, hence add an explicit check.
	bool isDefaultArgument =
	!CodeSynthesisContexts.empty() &&
	CodeSynthesisContexts.back().Kind ==
	CodeSynthesisContext::DefaultFunctionArgumentInstantiation;
	const auto *CurMethod = dyn_cast<CXXMethodDecl>(CurContext);
	bool isInstance = CurMethod && CurMethod->isInstance() &&
	R.getNamingClass() == CurMethod->getParent() &&
	!isDefaultArgument;

	// There are two ways we can find a class-scope declaration during template
	// instantiation that we did not find in the template definition: if it is a
	// member of a dependent base class, or if it is declared after the point of
	// use in the same class. Distinguish these by comparing the class in which
	// the member was found to the naming class of the lookup.
	unsigned DiagID = diag::err_found_in_dependent_base;
	unsigned NoteID = diag::note_member_declared_at;
	if (R.getRepresentativeDecl()->getDeclContext()->Equals(R.getNamingClass())) {
	DiagID = getLangOpts().MSVCCompat ? diag::ext_found_later_in_class
	: diag::err_found_later_in_class;
	} else if (getLangOpts().MSVCCompat) {
	DiagID = diag::ext_found_in_dependent_base;
	NoteID = diag::note_dependent_member_use;
	}

	if (isInstance) {
	// Give a code modification hint to insert 'this->'.
	Diag(R.getNameLoc(), DiagID)
	<< R.getLookupName()
	<< FixItHint::CreateInsertion(R.getNameLoc(), "this->");
	CheckCXXThisCapture(R.getNameLoc());
	} else {
	// FIXME: Add a FixItHint to insert 'Base::' or 'Derived::' (assuming
	// they're not shadowed).
	Diag(R.getNameLoc(), DiagID) << R.getLookupName();
	}

	for (const NamedDecl *D : R)
	Diag(D->getLocation(), NoteID);

	// Return true if we are inside a default argument instantiation
	// and the found name refers to an instance member function, otherwise
	// the caller will try to create an implicit member call and this is wrong
	// for default arguments.
	//
	// FIXME: Is this special case necessary? We could allow the caller to
	// diagnose this.
	if (isDefaultArgument && ((*R.begin())->isCXXInstanceMember())) {
	Diag(R.getNameLoc(), diag::err_member_call_without_object) << 0;
	return true;
	}

	// Tell the callee to try to recover.
	return false;
	}

	bool Sema::DiagnoseEmptyLookup(Scope *S, CXXScopeSpec &SS, LookupResult &R,
	CorrectionCandidateCallback &CCC,
	TemplateArgumentListInfo *ExplicitTemplateArgs,
	ArrayRef<Expr > Args, DeclContext LookupCtx,
	TypoExpr **Out) {
	DeclarationName Name = R.getLookupName();

	unsigned diagnostic = diag::err_undeclared_var_use;
	unsigned diagnostic_suggest = diag::err_undeclared_var_use_suggest;
	if (Name.getNameKind() == DeclarationName::CXXOperatorName \|\|
	Name.getNameKind() == DeclarationName::CXXLiteralOperatorName \|\|
	Name.getNameKind() == DeclarationName::CXXConversionFunctionName) {
	diagnostic = diag::err_undeclared_use;
	diagnostic_suggest = diag::err_undeclared_use_suggest;
	}

	// If the original lookup was an unqualified lookup, fake an
	// unqualified lookup. This is useful when (for example) the
	// original lookup would not have found something because it was a
	// dependent name.
	DeclContext *DC =
	LookupCtx ? LookupCtx : (SS.isEmpty() ? CurContext : nullptr);
	while (DC) {
	if (isa<CXXRecordDecl>(DC)) {
	LookupQualifiedName(R, DC);

	if (!R.empty()) {
	// Don't give errors about ambiguities in this lookup.
	R.suppressDiagnostics();

	// If there's a best viable function among the results, only mention
	// that one in the notes.
	OverloadCandidateSet Candidates(R.getNameLoc(),
	OverloadCandidateSet::CSK_Normal);
	AddOverloadedCallCandidates(R, ExplicitTemplateArgs, Args, Candidates);
	OverloadCandidateSet::iterator Best;
	if (Candidates.BestViableFunction(*this, R.getNameLoc(), Best) ==
	OR_Success) {
	R.clear();
	R.addDecl(Best->FoundDecl.getDecl(), Best->FoundDecl.getAccess());
	R.resolveKind();
	}

	return DiagnoseDependentMemberLookup(R);
	}

	R.clear();
	}

	DC = DC->getLookupParent();
	}

	// We didn't find anything, so try to correct for a typo.
	TypoCorrection Corrected;
	if (S && Out) {
	SourceLocation TypoLoc = R.getNameLoc();
	assert(!ExplicitTemplateArgs &&
	"Diagnosing an empty lookup with explicit template args!");
	*Out = CorrectTypoDelayed(
	R.getLookupNameInfo(), R.getLookupKind(), S, &SS, CCC,
	[=](const TypoCorrection &TC) {
	emitEmptyLookupTypoDiagnostic(TC, *this, SS, Name, TypoLoc, Args,
	diagnostic, diagnostic_suggest);
	},
	nullptr, CTK_ErrorRecovery, LookupCtx);
	if (*Out)
	return true;
	} else if (S && (Corrected =
	CorrectTypo(R.getLookupNameInfo(), R.getLookupKind(), S,
	&SS, CCC, CTK_ErrorRecovery, LookupCtx))) {
	std::string CorrectedStr(Corrected.getAsString(getLangOpts()));
	bool DroppedSpecifier =
	Corrected.WillReplaceSpecifier() && Name.getAsString() == CorrectedStr;
	R.setLookupName(Corrected.getCorrection());

	bool AcceptableWithRecovery = false;
	bool AcceptableWithoutRecovery = false;
	NamedDecl *ND = Corrected.getFoundDecl();
	if (ND) {
	if (Corrected.isOverloaded()) {
	OverloadCandidateSet OCS(R.getNameLoc(),
	OverloadCandidateSet::CSK_Normal);
	OverloadCandidateSet::iterator Best;
	for (NamedDecl *CD : Corrected) {
	if (FunctionTemplateDecl *FTD =
	dyn_cast<FunctionTemplateDecl>(CD))
	AddTemplateOverloadCandidate(
	FTD, DeclAccessPair::make(FTD, AS_none), ExplicitTemplateArgs,
	Args, OCS);
	else if (FunctionDecl *FD = dyn_cast<FunctionDecl>(CD))
	if (!ExplicitTemplateArgs \|\| ExplicitTemplateArgs->size() == 0)
	AddOverloadCandidate(FD, DeclAccessPair::make(FD, AS_none),
	Args, OCS);
	}
	switch (OCS.BestViableFunction(*this, R.getNameLoc(), Best)) {
	case OR_Success:
	ND = Best->FoundDecl;
	Corrected.setCorrectionDecl(ND);
	break;
	default:
	// FIXME: Arbitrarily pick the first declaration for the note.
	Corrected.setCorrectionDecl(ND);
	break;
	}
	}
	R.addDecl(ND);
	if (getLangOpts().CPlusPlus && ND->isCXXClassMember()) {
	CXXRecordDecl *Record = nullptr;
	if (Corrected.getCorrectionSpecifier()) {
	const Type *Ty = Corrected.getCorrectionSpecifier()->getAsType();
	Record = Ty->getAsCXXRecordDecl();
	}
	if (!Record)
	Record = cast<CXXRecordDecl>(
	ND->getDeclContext()->getRedeclContext());
	R.setNamingClass(Record);
	}

	auto *UnderlyingND = ND->getUnderlyingDecl();
	AcceptableWithRecovery = isa<ValueDecl>(UnderlyingND) \|\|
	isa<FunctionTemplateDecl>(UnderlyingND);
	// FIXME: If we ended up with a typo for a type name or
	// Objective-C class name, we're in trouble because the parser
	// is in the wrong place to recover. Suggest the typo
	// correction, but don't make it a fix-it since we're not going
	// to recover well anyway.
	AcceptableWithoutRecovery = isa<TypeDecl>(UnderlyingND) \|\|
	getAsTypeTemplateDecl(UnderlyingND) \|\|
	isa<ObjCInterfaceDecl>(UnderlyingND);
	} else {
	// FIXME: We found a keyword. Suggest it, but don't provide a fix-it
	// because we aren't able to recover.
	AcceptableWithoutRecovery = true;
	}

	if (AcceptableWithRecovery \|\| AcceptableWithoutRecovery) {
	unsigned NoteID = Corrected.getCorrectionDeclAs<ImplicitParamDecl>()
	? diag::note_implicit_param_decl
	: diag::note_previous_decl;
	if (SS.isEmpty())
	diagnoseTypo(Corrected, PDiag(diagnostic_suggest) << Name,
	PDiag(NoteID), AcceptableWithRecovery);
	else
	diagnoseTypo(Corrected, PDiag(diag::err_no_member_suggest)
	<< Name << computeDeclContext(SS, false)
	<< DroppedSpecifier << SS.getRange(),
	PDiag(NoteID), AcceptableWithRecovery);

	// Tell the callee whether to try to recover.
	return !AcceptableWithRecovery;
	}
	}
	R.clear();

	// Emit a special diagnostic for failed member lookups.
	// FIXME: computing the declaration context might fail here (?)
	if (!SS.isEmpty()) {
	Diag(R.getNameLoc(), diag::err_no_member)
	<< Name << computeDeclContext(SS, false)
	<< SS.getRange();
	return true;
	}

	// Give up, we can't recover.
	Diag(R.getNameLoc(), diagnostic) << Name;
	return true;
	}

	/// In Microsoft mode, if we are inside a template class whose parent class has
	/// dependent base classes, and we can't resolve an unqualified identifier, then
	/// assume the identifier is a member of a dependent base class. We can only
	/// recover successfully in static methods, instance methods, and other contexts
	/// where 'this' is available. This doesn't precisely match MSVC's
	/// instantiation model, but it's close enough.
	static Expr *
	recoverFromMSUnqualifiedLookup(Sema &S, ASTContext &Context,
	DeclarationNameInfo &NameInfo,
	SourceLocation TemplateKWLoc,
	const TemplateArgumentListInfo *TemplateArgs) {
	// Only try to recover from lookup into dependent bases in static methods or
	// contexts where 'this' is available.
	QualType ThisType = S.getCurrentThisType();
	const CXXRecordDecl *RD = nullptr;
	if (!ThisType.isNull())
	RD = ThisType->getPointeeType()->getAsCXXRecordDecl();
	else if (auto *MD = dyn_cast<CXXMethodDecl>(S.CurContext))
	RD = MD->getParent();
	if (!RD \|\| !RD->hasDefinition() \|\| !RD->hasAnyDependentBases())
	return nullptr;

	// Diagnose this as unqualified lookup into a dependent base class. If 'this'
	// is available, suggest inserting 'this->' as a fixit.
	SourceLocation Loc = NameInfo.getLoc();
	auto DB = S.Diag(Loc, diag::ext_undeclared_unqual_id_with_dependent_base);
	DB << NameInfo.getName() << RD;

	if (!ThisType.isNull()) {
	DB << FixItHint::CreateInsertion(Loc, "this->");
	return CXXDependentScopeMemberExpr::Create(
	Context, /This=/nullptr, ThisType, /IsArrow=/true,
	/Op=/SourceLocation(), NestedNameSpecifierLoc(), TemplateKWLoc,
	/FirstQualifierFoundInScope=/nullptr, NameInfo, TemplateArgs);
	}

	// Synthesize a fake NNS that points to the derived class. This will
	// perform name lookup during template instantiation.
	CXXScopeSpec SS;
	auto *NNS =
	NestedNameSpecifier::Create(Context, nullptr, true, RD->getTypeForDecl());
	SS.MakeTrivial(Context, NNS, SourceRange(Loc, Loc));
	return DependentScopeDeclRefExpr::Create(
	Context, SS.getWithLocInContext(Context), TemplateKWLoc, NameInfo,
	TemplateArgs);
	}

	ExprResult
	Sema::ActOnIdExpression(Scope *S, CXXScopeSpec &SS,
	SourceLocation TemplateKWLoc, UnqualifiedId &Id,
	bool HasTrailingLParen, bool IsAddressOfOperand,
	CorrectionCandidateCallback *CCC,
	bool IsInlineAsmIdentifier, Token *KeywordReplacement) {
	assert(!(IsAddressOfOperand && HasTrailingLParen) &&
	"cannot be direct & operand and have a trailing lparen");
	if (SS.isInvalid())
	return ExprError();

	TemplateArgumentListInfo TemplateArgsBuffer;

	// Decompose the UnqualifiedId into the following data.
	DeclarationNameInfo NameInfo;
	const TemplateArgumentListInfo *TemplateArgs;
	DecomposeUnqualifiedId(Id, TemplateArgsBuffer, NameInfo, TemplateArgs);

	DeclarationName Name = NameInfo.getName();
	IdentifierInfo *II = Name.getAsIdentifierInfo();
	SourceLocation NameLoc = NameInfo.getLoc();

	if (II && II->isEditorPlaceholder()) {
	// FIXME: When typed placeholders are supported we can create a typed
	// placeholder expression node.
	return ExprError();
	}

	// This specially handles arguments of attributes appertains to a type of C
	// struct field such that the name lookup within a struct finds the member
	// name, which is not the case for other contexts in C.
	if (isAttrContext() && !getLangOpts().CPlusPlus && S->isClassScope()) {
	// See if this is reference to a field of struct.
	LookupResult R(*this, NameInfo, LookupMemberName);
	// LookupName handles a name lookup from within anonymous struct.
	if (LookupName(R, S)) {
	if (auto *VD = dyn_cast<ValueDecl>(R.getFoundDecl())) {
	QualType type = VD->getType().getNonReferenceType();
	// This will eventually be translated into MemberExpr upon
	// the use of instantiated struct fields.
	return BuildDeclRefExpr(VD, type, VK_LValue, NameLoc);
	}
	}
	}

	// Perform the required lookup.
	LookupResult R(*this, NameInfo,
	(Id.getKind() == UnqualifiedIdKind::IK_ImplicitSelfParam)
	? LookupObjCImplicitSelfParam
	: LookupOrdinaryName);
	if (TemplateKWLoc.isValid() \|\| TemplateArgs) {
	// Lookup the template name again to correctly establish the context in
	// which it was found. This is really unfortunate as we already did the
	// lookup to determine that it was a template name in the first place. If
	// this becomes a performance hit, we can work harder to preserve those
	// results until we get here but it's likely not worth it.
	AssumedTemplateKind AssumedTemplate;
	if (LookupTemplateName(R, S, SS, /ObjectType=/QualType(),
	/EnteringContext=/false, TemplateKWLoc,
	&AssumedTemplate))
	return ExprError();

	if (R.wasNotFoundInCurrentInstantiation() \|\| SS.isInvalid())
	return ActOnDependentIdExpression(SS, TemplateKWLoc, NameInfo,
	IsAddressOfOperand, TemplateArgs);
	} else {
	bool IvarLookupFollowUp = II && !SS.isSet() && getCurMethodDecl();
	LookupParsedName(R, S, &SS, /ObjectType=/QualType(),
	/AllowBuiltinCreation=/!IvarLookupFollowUp);

	// If the result might be in a dependent base class, this is a dependent
	// id-expression.
	if (R.wasNotFoundInCurrentInstantiation() \|\| SS.isInvalid())
	return ActOnDependentIdExpression(SS, TemplateKWLoc, NameInfo,
	IsAddressOfOperand, TemplateArgs);

	// If this reference is in an Objective-C method, then we need to do
	// some special Objective-C lookup, too.
	if (IvarLookupFollowUp) {
	ExprResult E(ObjC().LookupInObjCMethod(R, S, II, true));
	if (E.isInvalid())
	return ExprError();

	if (Expr *Ex = E.getAs<Expr>())
	return Ex;
	}
	}

	if (R.isAmbiguous())
	return ExprError();

	// This could be an implicitly declared function reference if the language
	// mode allows it as a feature.
	if (R.empty() && HasTrailingLParen && II &&
	getLangOpts().implicitFunctionsAllowed()) {
	NamedDecl D = ImplicitlyDefineFunction(NameLoc, II, S);
	if (D) R.addDecl(D);
	}

	// Determine whether this name might be a candidate for
	// argument-dependent lookup.
	bool ADL = UseArgumentDependentLookup(SS, R, HasTrailingLParen);

	if (R.empty() && !ADL) {
	if (SS.isEmpty() && getLangOpts().MSVCCompat) {
	if (Expr E = recoverFromMSUnqualifiedLookup(this, Context, NameInfo,
	TemplateKWLoc, TemplateArgs))
	return E;
	}

	// Don't diagnose an empty lookup for inline assembly.
	if (IsInlineAsmIdentifier)
	return ExprError();

	// If this name wasn't predeclared and if this is not a function
	// call, diagnose the problem.
	TypoExpr *TE = nullptr;
	DefaultFilterCCC DefaultValidator(II, SS.isValid() ? SS.getScopeRep()
	: nullptr);
	DefaultValidator.IsAddressOfOperand = IsAddressOfOperand;
	assert((!CCC \|\| CCC->IsAddressOfOperand == IsAddressOfOperand) &&
	"Typo correction callback misconfigured");
	if (CCC) {
	// Make sure the callback knows what the typo being diagnosed is.
	CCC->setTypoName(II);
	if (SS.isValid())
	CCC->setTypoNNS(SS.getScopeRep());
	}
	// FIXME: DiagnoseEmptyLookup produces bad diagnostics if we're looking for
	// a template name, but we happen to have always already looked up the name
	// before we get here if it must be a template name.
	if (DiagnoseEmptyLookup(S, SS, R, CCC ? *CCC : DefaultValidator, nullptr,
	std::nullopt, nullptr, &TE)) {
	if (TE && KeywordReplacement) {
	auto &State = getTypoExprState(TE);
	auto BestTC = State.Consumer->getNextCorrection();
	if (BestTC.isKeyword()) {
	auto *II = BestTC.getCorrectionAsIdentifierInfo();
	if (State.DiagHandler)
	State.DiagHandler(BestTC);
	KeywordReplacement->startToken();
	KeywordReplacement->setKind(II->getTokenID());
	KeywordReplacement->setIdentifierInfo(II);
	KeywordReplacement->setLocation(BestTC.getCorrectionRange().getBegin());
	// Clean up the state associated with the TypoExpr, since it has
	// now been diagnosed (without a call to CorrectDelayedTyposInExpr).
	clearDelayedTypo(TE);
	// Signal that a correction to a keyword was performed by returning a
	// valid-but-null ExprResult.
	return (Expr*)nullptr;
	}
	State.Consumer->resetCorrectionStream();
	}
	return TE ? TE : ExprError();
	}

	assert(!R.empty() &&
	"DiagnoseEmptyLookup returned false but added no results");

	// If we found an Objective-C instance variable, let
	// LookupInObjCMethod build the appropriate expression to
	// reference the ivar.
	if (ObjCIvarDecl *Ivar = R.getAsSingle<ObjCIvarDecl>()) {
	R.clear();
	ExprResult E(ObjC().LookupInObjCMethod(R, S, Ivar->getIdentifier()));
	// In a hopelessly buggy code, Objective-C instance variable
	// lookup fails and no expression will be built to reference it.
	if (!E.isInvalid() && !E.get())
	return ExprError();
	return E;
	}
	}

	// This is guaranteed from this point on.
	assert(!R.empty() \|\| ADL);

	// Check whether this might be a C++ implicit instance member access.
	// C++ [class.mfct.non-static]p3:
	// When an id-expression that is not part of a class member access
	// syntax and not used to form a pointer to member is used in the
	// body of a non-static member function of class X, if name lookup
	// resolves the name in the id-expression to a non-static non-type
	// member of some class C, the id-expression is transformed into a
	// class member access expression using (*this) as the
	// postfix-expression to the left of the . operator.
	//
	// But we don't actually need to do this for '&' operands if R
	// resolved to a function or overloaded function set, because the
	// expression is ill-formed if it actually works out to be a
	// non-static member function:
	//
	// C++ [expr.ref]p4:
	// Otherwise, if E1.E2 refers to a non-static member function. . .
	// [t]he expression can be used only as the left-hand operand of a
	// member function call.
	//
	// There are other safeguards against such uses, but it's important
	// to get this right here so that we don't end up making a
	// spuriously dependent expression if we're inside a dependent
	// instance method.
	if (isPotentialImplicitMemberAccess(SS, R, IsAddressOfOperand))
	return BuildPossibleImplicitMemberExpr(SS, TemplateKWLoc, R, TemplateArgs,
	S);

	if (TemplateArgs \|\| TemplateKWLoc.isValid()) {

	// In C++1y, if this is a variable template id, then check it
	// in BuildTemplateIdExpr().
	// The single lookup result must be a variable template declaration.
	if (Id.getKind() == UnqualifiedIdKind::IK_TemplateId && Id.TemplateId &&
	Id.TemplateId->Kind == TNK_Var_template) {
	assert(R.getAsSingle<VarTemplateDecl>() &&
	"There should only be one declaration found.");
	}

	return BuildTemplateIdExpr(SS, TemplateKWLoc, R, ADL, TemplateArgs);
	}

	return BuildDeclarationNameExpr(SS, R, ADL);
	}

	ExprResult Sema::BuildQualifiedDeclarationNameExpr(
	CXXScopeSpec &SS, const DeclarationNameInfo &NameInfo,
	bool IsAddressOfOperand, TypeSourceInfo **RecoveryTSI) {
	LookupResult R(*this, NameInfo, LookupOrdinaryName);
	LookupParsedName(R, /S=/nullptr, &SS, /ObjectType=/QualType());

	if (R.isAmbiguous())
	return ExprError();

	if (R.wasNotFoundInCurrentInstantiation() \|\| SS.isInvalid())
	return BuildDependentDeclRefExpr(SS, /TemplateKWLoc=/SourceLocation(),
	NameInfo, /TemplateArgs=/nullptr);

	if (R.empty()) {
	// Don't diagnose problems with invalid record decl, the secondary no_member
	// diagnostic during template instantiation is likely bogus, e.g. if a class
	// is invalid because it's derived from an invalid base class, then missing
	// members were likely supposed to be inherited.
	DeclContext *DC = computeDeclContext(SS);
	if (const auto *CD = dyn_cast<CXXRecordDecl>(DC))
	if (CD->isInvalidDecl())
	return ExprError();
	Diag(NameInfo.getLoc(), diag::err_no_member)
	<< NameInfo.getName() << DC << SS.getRange();
	return ExprError();
	}

	if (const TypeDecl *TD = R.getAsSingle<TypeDecl>()) {
	// Diagnose a missing typename if this resolved unambiguously to a type in
	// a dependent context. If we can recover with a type, downgrade this to
	// a warning in Microsoft compatibility mode.
	unsigned DiagID = diag::err_typename_missing;
	if (RecoveryTSI && getLangOpts().MSVCCompat)
	DiagID = diag::ext_typename_missing;
	SourceLocation Loc = SS.getBeginLoc();
	auto D = Diag(Loc, DiagID);
	D << SS.getScopeRep() << NameInfo.getName().getAsString()
	<< SourceRange(Loc, NameInfo.getEndLoc());

	// Don't recover if the caller isn't expecting us to or if we're in a SFINAE
	// context.
	if (!RecoveryTSI)
	return ExprError();

	// Only issue the fixit if we're prepared to recover.
	D << FixItHint::CreateInsertion(Loc, "typename ");

	// Recover by pretending this was an elaborated type.
	QualType Ty = Context.getTypeDeclType(TD);
	TypeLocBuilder TLB;
	TLB.pushTypeSpec(Ty).setNameLoc(NameInfo.getLoc());

	QualType ET = getElaboratedType(ElaboratedTypeKeyword::None, SS, Ty);
	ElaboratedTypeLoc QTL = TLB.push<ElaboratedTypeLoc>(ET);
	QTL.setElaboratedKeywordLoc(SourceLocation());
	QTL.setQualifierLoc(SS.getWithLocInContext(Context));

	*RecoveryTSI = TLB.getTypeSourceInfo(Context, ET);

	return ExprEmpty();
	}

	// If necessary, build an implicit class member access.
	if (isPotentialImplicitMemberAccess(SS, R, IsAddressOfOperand))
	return BuildPossibleImplicitMemberExpr(SS,
	/TemplateKWLoc=/SourceLocation(),
	R, /TemplateArgs=/nullptr,
	/S=/nullptr);

	return BuildDeclarationNameExpr(SS, R, /ADL=/false);
	}

	ExprResult
	Sema::PerformObjectMemberConversion(Expr *From,
	NestedNameSpecifier *Qualifier,
	NamedDecl *FoundDecl,
	NamedDecl *Member) {
	const auto *RD = dyn_cast<CXXRecordDecl>(Member->getDeclContext());
	if (!RD)
	return From;

	QualType DestRecordType;
	QualType DestType;
	QualType FromRecordType;
	QualType FromType = From->getType();
	bool PointerConversions = false;
	if (isa<FieldDecl>(Member)) {
	DestRecordType = Context.getCanonicalType(Context.getTypeDeclType(RD));
	auto FromPtrType = FromType->getAs<PointerType>();
	DestRecordType = Context.getAddrSpaceQualType(
	DestRecordType, FromPtrType
	? FromType->getPointeeType().getAddressSpace()
	: FromType.getAddressSpace());

	if (FromPtrType) {
	DestType = Context.getPointerType(DestRecordType);
	FromRecordType = FromPtrType->getPointeeType();
	PointerConversions = true;
	} else {
	DestType = DestRecordType;
	FromRecordType = FromType;
	}
	} else if (const auto *Method = dyn_cast<CXXMethodDecl>(Member)) {
	if (!Method->isImplicitObjectMemberFunction())
	return From;

	DestType = Method->getThisType().getNonReferenceType();
	DestRecordType = Method->getFunctionObjectParameterType();

	if (FromType->getAs<PointerType>()) {
	FromRecordType = FromType->getPointeeType();
	PointerConversions = true;
	} else {
	FromRecordType = FromType;
	DestType = DestRecordType;
	}

	LangAS FromAS = FromRecordType.getAddressSpace();
	LangAS DestAS = DestRecordType.getAddressSpace();
	if (FromAS != DestAS) {
	QualType FromRecordTypeWithoutAS =
	Context.removeAddrSpaceQualType(FromRecordType);
	QualType FromTypeWithDestAS =
	Context.getAddrSpaceQualType(FromRecordTypeWithoutAS, DestAS);
	if (PointerConversions)
	FromTypeWithDestAS = Context.getPointerType(FromTypeWithDestAS);
	From = ImpCastExprToType(From, FromTypeWithDestAS,
	CK_AddressSpaceConversion, From->getValueKind())
	.get();
	}
	} else {
	// No conversion necessary.
	return From;
	}

	if (DestType->isDependentType() \|\| FromType->isDependentType())
	return From;

	// If the unqualified types are the same, no conversion is necessary.
	if (Context.hasSameUnqualifiedType(FromRecordType, DestRecordType))
	return From;

	SourceRange FromRange = From->getSourceRange();
	SourceLocation FromLoc = FromRange.getBegin();

	ExprValueKind VK = From->getValueKind();

	// C++ [class.member.lookup]p8:
	// [...] Ambiguities can often be resolved by qualifying a name with its
	// class name.
	//
	// If the member was a qualified name and the qualified referred to a
	// specific base subobject type, we'll cast to that intermediate type
	// first and then to the object in which the member is declared. That allows
	// one to resolve ambiguities in, e.g., a diamond-shaped hierarchy such as:
	//
	// class Base { public: int x; };
	// class Derived1 : public Base { };
	// class Derived2 : public Base { };
	// class VeryDerived : public Derived1, public Derived2 { void f(); };
	//
	// void VeryDerived::f() {
	// x = 17; // error: ambiguous base subobjects
	// Derived1::x = 17; // okay, pick the Base subobject of Derived1
	// }
	if (Qualifier && Qualifier->getAsType()) {
	QualType QType = QualType(Qualifier->getAsType(), 0);
	assert(QType->isRecordType() && "lookup done with non-record type");

	QualType QRecordType = QualType(QType->castAs<RecordType>(), 0);

	// In C++98, the qualifier type doesn't actually have to be a base
	// type of the object type, in which case we just ignore it.
	// Otherwise build the appropriate casts.
	if (IsDerivedFrom(FromLoc, FromRecordType, QRecordType)) {
	CXXCastPath BasePath;
	if (CheckDerivedToBaseConversion(FromRecordType, QRecordType,
	FromLoc, FromRange, &BasePath))
	return ExprError();

	if (PointerConversions)
	QType = Context.getPointerType(QType);
	From = ImpCastExprToType(From, QType, CK_UncheckedDerivedToBase,
	VK, &BasePath).get();

	FromType = QType;
	FromRecordType = QRecordType;

	// If the qualifier type was the same as the destination type,
	// we're done.
	if (Context.hasSameUnqualifiedType(FromRecordType, DestRecordType))
	return From;
	}
	}

	CXXCastPath BasePath;
	if (CheckDerivedToBaseConversion(FromRecordType, DestRecordType,
	FromLoc, FromRange, &BasePath,
	/IgnoreAccess=/true))
	return ExprError();

	return ImpCastExprToType(From, DestType, CK_UncheckedDerivedToBase,
	VK, &BasePath);
	}

	bool Sema::UseArgumentDependentLookup(const CXXScopeSpec &SS,
	const LookupResult &R,
	bool HasTrailingLParen) {
	// Only when used directly as the postfix-expression of a call.
	if (!HasTrailingLParen)
	return false;

	// Never if a scope specifier was provided.
	if (SS.isNotEmpty())
	return false;

	// Only in C++ or ObjC++.
	if (!getLangOpts().CPlusPlus)
	return false;

	// Turn off ADL when we find certain kinds of declarations during
	// normal lookup:
	for (const NamedDecl *D : R) {
	// C++0x [basic.lookup.argdep]p3:
	// -- a declaration of a class member
	// Since using decls preserve this property, we check this on the
	// original decl.
	if (D->isCXXClassMember())
	return false;

	// C++0x [basic.lookup.argdep]p3:
	// -- a block-scope function declaration that is not a
	// using-declaration
	// NOTE: we also trigger this for function templates (in fact, we
	// don't check the decl type at all, since all other decl types
	// turn off ADL anyway).
	if (isa<UsingShadowDecl>(D))
	D = cast<UsingShadowDecl>(D)->getTargetDecl();
	else if (D->getLexicalDeclContext()->isFunctionOrMethod())
	return false;

	// C++0x [basic.lookup.argdep]p3:
	// -- a declaration that is neither a function or a function
	// template
	// And also for builtin functions.
	if (const auto *FDecl = dyn_cast<FunctionDecl>(D)) {
	// But also builtin functions.
	if (FDecl->getBuiltinID() && FDecl->isImplicit())
	return false;
	} else if (!isa<FunctionTemplateDecl>(D))
	return false;
	}

	return true;
	}


	/// Diagnoses obvious problems with the use of the given declaration
	/// as an expression. This is only actually called for lookups that
	/// were not overloaded, and it doesn't promise that the declaration
	/// will in fact be used.
	static bool CheckDeclInExpr(Sema &S, SourceLocation Loc, NamedDecl *D,
	bool AcceptInvalid) {
	if (D->isInvalidDecl() && !AcceptInvalid)
	return true;

	if (isa<TypedefNameDecl>(D)) {
	S.Diag(Loc, diag::err_unexpected_typedef) << D->getDeclName();
	return true;
	}

	if (isa<ObjCInterfaceDecl>(D)) {
	S.Diag(Loc, diag::err_unexpected_interface) << D->getDeclName();
	return true;
	}

	if (isa<NamespaceDecl>(D)) {
	S.Diag(Loc, diag::err_unexpected_namespace) << D->getDeclName();
	return true;
	}

	return false;
	}

	// Certain multiversion types should be treated as overloaded even when there is
	// only one result.
	static bool ShouldLookupResultBeMultiVersionOverload(const LookupResult &R) {
	assert(R.isSingleResult() && "Expected only a single result");
	const auto *FD = dyn_cast<FunctionDecl>(R.getFoundDecl());
	return FD &&
	(FD->isCPUDispatchMultiVersion() \|\| FD->isCPUSpecificMultiVersion());
	}

	ExprResult Sema::BuildDeclarationNameExpr(const CXXScopeSpec &SS,
	LookupResult &R, bool NeedsADL,
	bool AcceptInvalidDecl) {
	// If this is a single, fully-resolved result and we don't need ADL,
	// just build an ordinary singleton decl ref.
	if (!NeedsADL && R.isSingleResult() &&
	!R.getAsSingle<FunctionTemplateDecl>() &&
	!ShouldLookupResultBeMultiVersionOverload(R))
	return BuildDeclarationNameExpr(SS, R.getLookupNameInfo(), R.getFoundDecl(),
	R.getRepresentativeDecl(), nullptr,
	AcceptInvalidDecl);

	// We only need to check the declaration if there's exactly one
	// result, because in the overloaded case the results can only be
	// functions and function templates.
	if (R.isSingleResult() && !ShouldLookupResultBeMultiVersionOverload(R) &&
	CheckDeclInExpr(*this, R.getNameLoc(), R.getFoundDecl(),
	AcceptInvalidDecl))
	return ExprError();

	// Otherwise, just build an unresolved lookup expression. Suppress
	// any lookup-related diagnostics; we'll hash these out later, when
	// we've picked a target.
	R.suppressDiagnostics();

	UnresolvedLookupExpr *ULE = UnresolvedLookupExpr::Create(
	Context, R.getNamingClass(), SS.getWithLocInContext(Context),
	R.getLookupNameInfo(), NeedsADL, R.begin(), R.end(),
	/KnownDependent=/false, /KnownInstantiationDependent=/false);

	return ULE;
	}

	static void diagnoseUncapturableValueReferenceOrBinding(Sema &S,
	SourceLocation loc,
	ValueDecl *var);

	ExprResult Sema::BuildDeclarationNameExpr(
	const CXXScopeSpec &SS, const DeclarationNameInfo &NameInfo, NamedDecl *D,
	NamedDecl FoundD, const TemplateArgumentListInfo TemplateArgs,
	bool AcceptInvalidDecl) {
	assert(D && "Cannot refer to a NULL declaration");
	assert(!isa<FunctionTemplateDecl>(D) &&
	"Cannot refer unambiguously to a function template");

	SourceLocation Loc = NameInfo.getLoc();
	if (CheckDeclInExpr(*this, Loc, D, AcceptInvalidDecl)) {
	// Recovery from invalid cases (e.g. D is an invalid Decl).
	// We use the dependent type for the RecoveryExpr to prevent bogus follow-up
	// diagnostics, as invalid decls use int as a fallback type.
	return CreateRecoveryExpr(NameInfo.getBeginLoc(), NameInfo.getEndLoc(), {});
	}

	if (TemplateDecl *TD = dyn_cast<TemplateDecl>(D)) {
	// Specifically diagnose references to class templates that are missing
	// a template argument list.
	diagnoseMissingTemplateArguments(SS, /TemplateKeyword=/false, TD, Loc);
	return ExprError();
	}

	// Make sure that we're referring to a value.
	if (!isa<ValueDecl, UnresolvedUsingIfExistsDecl>(D)) {
	Diag(Loc, diag::err_ref_non_value) << D << SS.getRange();
	Diag(D->getLocation(), diag::note_declared_at);
	return ExprError();
	}

	// Check whether this declaration can be used. Note that we suppress
	// this check when we're going to perform argument-dependent lookup
	// on this function name, because this might not be the function
	// that overload resolution actually selects.
	if (DiagnoseUseOfDecl(D, Loc))
	return ExprError();

	auto *VD = cast<ValueDecl>(D);

	// Only create DeclRefExpr's for valid Decl's.
	if (VD->isInvalidDecl() && !AcceptInvalidDecl)
	return ExprError();

	// Handle members of anonymous structs and unions. If we got here,
	// and the reference is to a class member indirect field, then this
	// must be the subject of a pointer-to-member expression.
	if (auto *IndirectField = dyn_cast<IndirectFieldDecl>(VD);
	IndirectField && !IndirectField->isCXXClassMember())
	return BuildAnonymousStructUnionMemberReference(SS, NameInfo.getLoc(),
	IndirectField);

	QualType type = VD->getType();
	if (type.isNull())
	return ExprError();
	ExprValueKind valueKind = VK_PRValue;

	// In 'T ...V;', the type of the declaration 'V' is 'T...', but the type of
	// a reference to 'V' is simply (unexpanded) 'T'. The type, like the value,
	// is expanded by some outer '...' in the context of the use.
	type = type.getNonPackExpansionType();

	switch (D->getKind()) {
	// Ignore all the non-ValueDecl kinds.
	#define ABSTRACT_DECL(kind)
	#define VALUE(type, base)
	#define DECL(type, base) case Decl::type:
	#include "clang/AST/DeclNodes.inc"
	llvm_unreachable("invalid value decl kind");

	// These shouldn't make it here.
	case Decl::ObjCAtDefsField:
	llvm_unreachable("forming non-member reference to ivar?");

	// Enum constants are always r-values and never references.
	// Unresolved using declarations are dependent.
	case Decl::EnumConstant:
	case Decl::UnresolvedUsingValue:
	case Decl::OMPDeclareReduction:
	case Decl::OMPDeclareMapper:
	valueKind = VK_PRValue;
	break;

	// Fields and indirect fields that got here must be for
	// pointer-to-member expressions; we just call them l-values for
	// internal consistency, because this subexpression doesn't really
	// exist in the high-level semantics.
	case Decl::Field:
	case Decl::IndirectField:
	case Decl::ObjCIvar:
	assert((getLangOpts().CPlusPlus \|\| isAttrContext()) &&
	"building reference to field in C?");

	// These can't have reference type in well-formed programs, but
	// for internal consistency we do this anyway.
	type = type.getNonReferenceType();
	valueKind = VK_LValue;
	break;

	// Non-type template parameters are either l-values or r-values
	// depending on the type.
	case Decl::NonTypeTemplateParm: {
	if (const ReferenceType *reftype = type->getAs<ReferenceType>()) {
	type = reftype->getPointeeType();
	valueKind = VK_LValue; // even if the parameter is an r-value reference
	break;
	}

	// [expr.prim.id.unqual]p2:
	// If the entity is a template parameter object for a template
	// parameter of type T, the type of the expression is const T.
	// [...] The expression is an lvalue if the entity is a [...] template
	// parameter object.
	if (type->isRecordType()) {
	type = type.getUnqualifiedType().withConst();
	valueKind = VK_LValue;
	break;
	}

	// For non-references, we need to strip qualifiers just in case
	// the template parameter was declared as 'const int' or whatever.
	valueKind = VK_PRValue;
	type = type.getUnqualifiedType();
	break;
	}

	case Decl::Var:
	case Decl::VarTemplateSpecialization:
	case Decl::VarTemplatePartialSpecialization:
	case Decl::Decomposition:
	case Decl::OMPCapturedExpr:
	// In C, "extern void blah;" is valid and is an r-value.
	if (!getLangOpts().CPlusPlus && !type.hasQualifiers() &&
	type->isVoidType()) {
	valueKind = VK_PRValue;
	break;
	}
	[[fallthrough]];

	case Decl::ImplicitParam:
	case Decl::ParmVar: {
	// These are always l-values.
	valueKind = VK_LValue;
	type = type.getNonReferenceType();

	// FIXME: Does the addition of const really only apply in
	// potentially-evaluated contexts? Since the variable isn't actually
	// captured in an unevaluated context, it seems that the answer is no.
	if (!isUnevaluatedContext()) {
	QualType CapturedType = getCapturedDeclRefType(cast<VarDecl>(VD), Loc);
	if (!CapturedType.isNull())
	type = CapturedType;
	}

	break;
	}

	case Decl::Binding:
	// These are always lvalues.
	valueKind = VK_LValue;
	type = type.getNonReferenceType();
	break;

	case Decl::Function: {
	if (unsigned BID = cast<FunctionDecl>(VD)->getBuiltinID()) {
	if (!Context.BuiltinInfo.isDirectlyAddressable(BID)) {
	type = Context.BuiltinFnTy;
	valueKind = VK_PRValue;
	break;
	}
	}

	const FunctionType *fty = type->castAs<FunctionType>();

	// If we're referring to a function with an __unknown_anytype
	// result type, make the entire expression __unknown_anytype.
	if (fty->getReturnType() == Context.UnknownAnyTy) {
	type = Context.UnknownAnyTy;
	valueKind = VK_PRValue;
	break;
	}

	// Functions are l-values in C++.
	if (getLangOpts().CPlusPlus) {
	valueKind = VK_LValue;
	break;
	}

	// C99 DR 316 says that, if a function type comes from a
	// function definition (without a prototype), that type is only
	// used for checking compatibility. Therefore, when referencing
	// the function, we pretend that we don't have the full function
	// type.
	if (!cast<FunctionDecl>(VD)->hasPrototype() && isa<FunctionProtoType>(fty))
	type = Context.getFunctionNoProtoType(fty->getReturnType(),
	fty->getExtInfo());

	// Functions are r-values in C.
	valueKind = VK_PRValue;
	break;
	}

	case Decl::CXXDeductionGuide:
	llvm_unreachable("building reference to deduction guide");

	case Decl::MSProperty:
	case Decl::MSGuid:
	case Decl::TemplateParamObject:
	// FIXME: Should MSGuidDecl and template parameter objects be subject to
	// capture in OpenMP, or duplicated between host and device?
	valueKind = VK_LValue;
	break;

	case Decl::UnnamedGlobalConstant:
	valueKind = VK_LValue;
	break;

	case Decl::CXXMethod:
	// If we're referring to a method with an __unknown_anytype
	// result type, make the entire expression __unknown_anytype.
	// This should only be possible with a type written directly.
	if (const FunctionProtoType *proto =
	dyn_cast<FunctionProtoType>(VD->getType()))
	if (proto->getReturnType() == Context.UnknownAnyTy) {
	type = Context.UnknownAnyTy;
	valueKind = VK_PRValue;
	break;
	}

	// C++ methods are l-values if static, r-values if non-static.
	if (cast<CXXMethodDecl>(VD)->isStatic()) {
	valueKind = VK_LValue;
	break;
	}
	[[fallthrough]];

	case Decl::CXXConversion:
	case Decl::CXXDestructor:
	case Decl::CXXConstructor:
	valueKind = VK_PRValue;
	break;
	}

	auto *E =
	BuildDeclRefExpr(VD, type, valueKind, NameInfo, &SS, FoundD,
	/FIXME: TemplateKWLoc/ SourceLocation(), TemplateArgs);
	// Clang AST consumers assume a DeclRefExpr refers to a valid decl. We
	// wrap a DeclRefExpr referring to an invalid decl with a dependent-type
	// RecoveryExpr to avoid follow-up semantic analysis (thus prevent bogus
	// diagnostics).
	if (VD->isInvalidDecl() && E)
	return CreateRecoveryExpr(E->getBeginLoc(), E->getEndLoc(), {E});
	return E;
	}

	static void ConvertUTF8ToWideString(unsigned CharByteWidth, StringRef Source,
	SmallString<32> &Target) {
	Target.resize(CharByteWidth * (Source.size() + 1));
	char *ResultPtr = &Target[0];
	const llvm::UTF8 *ErrorPtr;
	bool success =
	llvm::ConvertUTF8toWide(CharByteWidth, Source, ResultPtr, ErrorPtr);
	(void)success;
	assert(success);
	Target.resize(ResultPtr - &Target[0]);
	}

	ExprResult Sema::BuildPredefinedExpr(SourceLocation Loc,
	PredefinedIdentKind IK) {
	Decl *currentDecl = getPredefinedExprDecl(CurContext);
	if (!currentDecl) {
	Diag(Loc, diag::ext_predef_outside_function);
	currentDecl = Context.getTranslationUnitDecl();
	}

	QualType ResTy;
	StringLiteral *SL = nullptr;
	if (cast<DeclContext>(currentDecl)->isDependentContext())
	ResTy = Context.DependentTy;
	else {
	// Pre-defined identifiers are of type char[x], where x is the length of
	// the string.
	bool ForceElaboratedPrinting =
	IK == PredefinedIdentKind::Function && getLangOpts().MSVCCompat;
	auto Str =
	PredefinedExpr::ComputeName(IK, currentDecl, ForceElaboratedPrinting);
	unsigned Length = Str.length();

	llvm::APInt LengthI(32, Length + 1);
	if (IK == PredefinedIdentKind::LFunction \|\|
	IK == PredefinedIdentKind::LFuncSig) {
	ResTy =
	Context.adjustStringLiteralBaseType(Context.WideCharTy.withConst());
	SmallString<32> RawChars;
	ConvertUTF8ToWideString(Context.getTypeSizeInChars(ResTy).getQuantity(),
	Str, RawChars);
	ResTy = Context.getConstantArrayType(ResTy, LengthI, nullptr,
	ArraySizeModifier::Normal,
	/IndexTypeQuals/ 0);
	SL = StringLiteral::Create(Context, RawChars, StringLiteralKind::Wide,
	/Pascal/ false, ResTy, Loc);
	} else {
	ResTy = Context.adjustStringLiteralBaseType(Context.CharTy.withConst());
	ResTy = Context.getConstantArrayType(ResTy, LengthI, nullptr,
	ArraySizeModifier::Normal,
	/IndexTypeQuals/ 0);
	SL = StringLiteral::Create(Context, Str, StringLiteralKind::Ordinary,
	/Pascal/ false, ResTy, Loc);
	}
	}

	return PredefinedExpr::Create(Context, Loc, ResTy, IK, LangOpts.MicrosoftExt,
	SL);
	}

	ExprResult Sema::ActOnPredefinedExpr(SourceLocation Loc, tok::TokenKind Kind) {
	return BuildPredefinedExpr(Loc, getPredefinedExprKind(Kind));
	}

	ExprResult Sema::ActOnCharacterConstant(const Token &Tok, Scope *UDLScope) {
	SmallString<16> CharBuffer;
	bool Invalid = false;
	StringRef ThisTok = PP.getSpelling(Tok, CharBuffer, &Invalid);
	if (Invalid)
	return ExprError();

	CharLiteralParser Literal(ThisTok.begin(), ThisTok.end(), Tok.getLocation(),
	PP, Tok.getKind());
	if (Literal.hadError())
	return ExprError();

	QualType Ty;
	if (Literal.isWide())
	Ty = Context.WideCharTy; // L'x' -> wchar_t in C and C++.
	else if (Literal.isUTF8() && getLangOpts().C23)
	Ty = Context.UnsignedCharTy; // u8'x' -> unsigned char in C23
	else if (Literal.isUTF8() && getLangOpts().Char8)
	Ty = Context.Char8Ty; // u8'x' -> char8_t when it exists.
	else if (Literal.isUTF16())
	Ty = Context.Char16Ty; // u'x' -> char16_t in C11 and C++11.
	else if (Literal.isUTF32())
	Ty = Context.Char32Ty; // U'x' -> char32_t in C11 and C++11.
	else if (!getLangOpts().CPlusPlus \|\| Literal.isMultiChar())
	Ty = Context.IntTy; // 'x' -> int in C, 'wxyz' -> int in C++.
	else
	Ty = Context.CharTy; // 'x' -> char in C++;
	// u8'x' -> char in C11-C17 and in C++ without char8_t.

	CharacterLiteralKind Kind = CharacterLiteralKind::Ascii;
	if (Literal.isWide())
	Kind = CharacterLiteralKind::Wide;
	else if (Literal.isUTF16())
	Kind = CharacterLiteralKind::UTF16;
	else if (Literal.isUTF32())
	Kind = CharacterLiteralKind::UTF32;
	else if (Literal.isUTF8())
	Kind = CharacterLiteralKind::UTF8;

	Expr *Lit = new (Context) CharacterLiteral(Literal.getValue(), Kind, Ty,
	Tok.getLocation());

	if (Literal.getUDSuffix().empty())
	return Lit;

	// We're building a user-defined literal.
	IdentifierInfo *UDSuffix = &Context.Idents.get(Literal.getUDSuffix());
	SourceLocation UDSuffixLoc =
	getUDSuffixLoc(*this, Tok.getLocation(), Literal.getUDSuffixOffset());

	// Make sure we're allowed user-defined literals here.
	if (!UDLScope)
	return ExprError(Diag(UDSuffixLoc, diag::err_invalid_character_udl));

	// C++11 [lex.ext]p6: The literal L is treated as a call of the form
	// operator "" X (ch)
	return BuildCookedLiteralOperatorCall(*this, UDLScope, UDSuffix, UDSuffixLoc,
	Lit, Tok.getLocation());
	}

	ExprResult Sema::ActOnIntegerConstant(SourceLocation Loc, uint64_t Val) {
	unsigned IntSize = Context.getTargetInfo().getIntWidth();
	return IntegerLiteral::Create(Context, llvm::APInt(IntSize, Val),
	Context.IntTy, Loc);
	}

	static Expr *BuildFloatingLiteral(Sema &S, NumericLiteralParser &Literal,
	QualType Ty, SourceLocation Loc) {
	const llvm::fltSemantics &Format = S.Context.getFloatTypeSemantics(Ty);

	using llvm::APFloat;
	APFloat Val(Format);

	llvm::RoundingMode RM = S.CurFPFeatures.getRoundingMode();
	if (RM == llvm::RoundingMode::Dynamic)
	RM = llvm::RoundingMode::NearestTiesToEven;
	APFloat::opStatus result = Literal.GetFloatValue(Val, RM);

	// Overflow is always an error, but underflow is only an error if
	// we underflowed to zero (APFloat reports denormals as underflow).
	if ((result & APFloat::opOverflow) \|\|
	((result & APFloat::opUnderflow) && Val.isZero())) {
	unsigned diagnostic;
	SmallString<20> buffer;
	if (result & APFloat::opOverflow) {
	diagnostic = diag::warn_float_overflow;
	APFloat::getLargest(Format).toString(buffer);
	} else {
	diagnostic = diag::warn_float_underflow;
	APFloat::getSmallest(Format).toString(buffer);
	}

	S.Diag(Loc, diagnostic) << Ty << buffer.str();
	}

	bool isExact = (result == APFloat::opOK);
	return FloatingLiteral::Create(S.Context, Val, isExact, Ty, Loc);
	}

	bool Sema::CheckLoopHintExpr(Expr *E, SourceLocation Loc, bool AllowZero) {
	assert(E && "Invalid expression");

	if (E->isValueDependent())
	return false;

	QualType QT = E->getType();
	if (!QT->isIntegerType() \|\| QT->isBooleanType() \|\| QT->isCharType()) {
	Diag(E->getExprLoc(), diag::err_pragma_loop_invalid_argument_type) << QT;
	return true;
	}

	llvm::APSInt ValueAPS;
	ExprResult R = VerifyIntegerConstantExpression(E, &ValueAPS);

	if (R.isInvalid())
	return true;

	// GCC allows the value of unroll count to be 0.
	// https://gcc.gnu.org/onlinedocs/gcc/Loop-Specific-Pragmas.html says
	// "The values of 0 and 1 block any unrolling of the loop."
	// The values doesn't have to be strictly positive in '#pragma GCC unroll' and
	// '#pragma unroll' cases.
	bool ValueIsPositive =
	AllowZero ? ValueAPS.isNonNegative() : ValueAPS.isStrictlyPositive();
	if (!ValueIsPositive \|\| ValueAPS.getActiveBits() > 31) {
	Diag(E->getExprLoc(), diag::err_requires_positive_value)
	<< toString(ValueAPS, 10) << ValueIsPositive;
	return true;
	}

	return false;
	}

	ExprResult Sema::ActOnNumericConstant(const Token &Tok, Scope *UDLScope) {
	// Fast path for a single digit (which is quite common). A single digit
	// cannot have a trigraph, escaped newline, radix prefix, or suffix.
	if (Tok.getLength() == 1 \|\| Tok.getKind() == tok::binary_data) {
	const char Val = PP.getSpellingOfSingleCharacterNumericConstant(Tok);
	return ActOnIntegerConstant(Tok.getLocation(), Val);
	}

	SmallString<128> SpellingBuffer;
	// NumericLiteralParser wants to overread by one character. Add padding to
	// the buffer in case the token is copied to the buffer. If getSpelling()
	// returns a StringRef to the memory buffer, it should have a null char at
	// the EOF, so it is also safe.
	SpellingBuffer.resize(Tok.getLength() + 1);

	// Get the spelling of the token, which eliminates trigraphs, etc.
	bool Invalid = false;
	StringRef TokSpelling = PP.getSpelling(Tok, SpellingBuffer, &Invalid);
	if (Invalid)
	return ExprError();

	NumericLiteralParser Literal(TokSpelling, Tok.getLocation(),
	PP.getSourceManager(), PP.getLangOpts(),
	PP.getTargetInfo(), PP.getDiagnostics());
	if (Literal.hadError)
	return ExprError();

	if (Literal.hasUDSuffix()) {
	// We're building a user-defined literal.
	const IdentifierInfo *UDSuffix = &Context.Idents.get(Literal.getUDSuffix());
	SourceLocation UDSuffixLoc =
	getUDSuffixLoc(*this, Tok.getLocation(), Literal.getUDSuffixOffset());

	// Make sure we're allowed user-defined literals here.
	if (!UDLScope)
	return ExprError(Diag(UDSuffixLoc, diag::err_invalid_numeric_udl));

	QualType CookedTy;
	if (Literal.isFloatingLiteral()) {
	// C++11 [lex.ext]p4: If S contains a literal operator with parameter type
	// long double, the literal is treated as a call of the form
	// operator "" X (f L)
	CookedTy = Context.LongDoubleTy;
	} else {
	// C++11 [lex.ext]p3: If S contains a literal operator with parameter type
	// unsigned long long, the literal is treated as a call of the form
	// operator "" X (n ULL)
	CookedTy = Context.UnsignedLongLongTy;
	}

	DeclarationName OpName =
	Context.DeclarationNames.getCXXLiteralOperatorName(UDSuffix);
	DeclarationNameInfo OpNameInfo(OpName, UDSuffixLoc);
	OpNameInfo.setCXXLiteralOperatorNameLoc(UDSuffixLoc);

	SourceLocation TokLoc = Tok.getLocation();

	// Perform literal operator lookup to determine if we're building a raw
	// literal or a cooked one.
	LookupResult R(*this, OpName, UDSuffixLoc, LookupOrdinaryName);
	switch (LookupLiteralOperator(UDLScope, R, CookedTy,
	/AllowRaw/ true, /AllowTemplate/ true,
	/AllowStringTemplatePack/ false,
	/DiagnoseMissing/ !Literal.isImaginary)) {
	case LOLR_ErrorNoDiagnostic:
	// Lookup failure for imaginary constants isn't fatal, there's still the
	// GNU extension producing _Complex types.
	break;
	case LOLR_Error:
	return ExprError();
	case LOLR_Cooked: {
	Expr *Lit;
	if (Literal.isFloatingLiteral()) {
	Lit = BuildFloatingLiteral(*this, Literal, CookedTy, Tok.getLocation());
	} else {
	llvm::APInt ResultVal(Context.getTargetInfo().getLongLongWidth(), 0);
	if (Literal.GetIntegerValue(ResultVal))
	Diag(Tok.getLocation(), diag::err_integer_literal_too_large)
	<< /* Unsigned */ 1;
	Lit = IntegerLiteral::Create(Context, ResultVal, CookedTy,
	Tok.getLocation());
	}
	return BuildLiteralOperatorCall(R, OpNameInfo, Lit, TokLoc);
	}

	case LOLR_Raw: {
	// C++11 [lit.ext]p3, p4: If S contains a raw literal operator, the
	// literal is treated as a call of the form
	// operator "" X ("n")
	unsigned Length = Literal.getUDSuffixOffset();
	QualType StrTy = Context.getConstantArrayType(
	Context.adjustStringLiteralBaseType(Context.CharTy.withConst()),
	llvm::APInt(32, Length + 1), nullptr, ArraySizeModifier::Normal, 0);
	Expr *Lit =
	StringLiteral::Create(Context, StringRef(TokSpelling.data(), Length),
	StringLiteralKind::Ordinary,
	/Pascal/ false, StrTy, &TokLoc, 1);
	return BuildLiteralOperatorCall(R, OpNameInfo, Lit, TokLoc);
	}

	case LOLR_Template: {
	// C++11 [lit.ext]p3, p4: Otherwise (S contains a literal operator
	// template), L is treated as a call fo the form
	// operator "" X <'c1', 'c2', ... 'ck'>()
	// where n is the source character sequence c1 c2 ... ck.
	TemplateArgumentListInfo ExplicitArgs;
	unsigned CharBits = Context.getIntWidth(Context.CharTy);
	bool CharIsUnsigned = Context.CharTy->isUnsignedIntegerType();
	llvm::APSInt Value(CharBits, CharIsUnsigned);
	for (unsigned I = 0, N = Literal.getUDSuffixOffset(); I != N; ++I) {
	Value = TokSpelling[I];
	TemplateArgument Arg(Context, Value, Context.CharTy);
	TemplateArgumentLocInfo ArgInfo;
	ExplicitArgs.addArgument(TemplateArgumentLoc(Arg, ArgInfo));
	}
	return BuildLiteralOperatorCall(R, OpNameInfo, std::nullopt, TokLoc,
	&ExplicitArgs);
	}
	case LOLR_StringTemplatePack:
	llvm_unreachable("unexpected literal operator lookup result");
	}
	}

	Expr *Res;

	if (Literal.isFixedPointLiteral()) {
	QualType Ty;

	if (Literal.isAccum) {
	if (Literal.isHalf) {
	Ty = Context.ShortAccumTy;
	} else if (Literal.isLong) {
	Ty = Context.LongAccumTy;
	} else {
	Ty = Context.AccumTy;
	}
	} else if (Literal.isFract) {
	if (Literal.isHalf) {
	Ty = Context.ShortFractTy;
	} else if (Literal.isLong) {
	Ty = Context.LongFractTy;
	} else {
	Ty = Context.FractTy;
	}
	}

	if (Literal.isUnsigned) Ty = Context.getCorrespondingUnsignedType(Ty);

	bool isSigned = !Literal.isUnsigned;
	unsigned scale = Context.getFixedPointScale(Ty);
	unsigned bit_width = Context.getTypeInfo(Ty).Width;

	llvm::APInt Val(bit_width, 0, isSigned);
	bool Overflowed = Literal.GetFixedPointValue(Val, scale);
	bool ValIsZero = Val.isZero() && !Overflowed;

	auto MaxVal = Context.getFixedPointMax(Ty).getValue();
	if (Literal.isFract && Val == MaxVal + 1 && !ValIsZero)
	// Clause 6.4.4 - The value of a constant shall be in the range of
	// representable values for its type, with exception for constants of a
	// fract type with a value of exactly 1; such a constant shall denote
	// the maximal value for the type.
	--Val;
	else if (Val.ugt(MaxVal) \|\| Overflowed)
	Diag(Tok.getLocation(), diag::err_too_large_for_fixed_point);

	Res = FixedPointLiteral::CreateFromRawInt(Context, Val, Ty,
	Tok.getLocation(), scale);
	} else if (Literal.isFloatingLiteral()) {
	QualType Ty;
	if (Literal.isHalf){
	if (getLangOpts().HLSL \|\|
	getOpenCLOptions().isAvailableOption("cl_khr_fp16", getLangOpts()))
	Ty = Context.HalfTy;
	else {
	Diag(Tok.getLocation(), diag::err_half_const_requires_fp16);
	return ExprError();
	}
	} else if (Literal.isFloat)
	Ty = Context.FloatTy;
	else if (Literal.isLong)
	Ty = !getLangOpts().HLSL ? Context.LongDoubleTy : Context.DoubleTy;
	else if (Literal.isFloat16)
	Ty = Context.Float16Ty;
	else if (Literal.isFloat128)
	Ty = Context.Float128Ty;
	else if (getLangOpts().HLSL)
	Ty = Context.FloatTy;
	else
	Ty = Context.DoubleTy;

	Res = BuildFloatingLiteral(*this, Literal, Ty, Tok.getLocation());

	if (Ty == Context.DoubleTy) {
	if (getLangOpts().SinglePrecisionConstants) {
	if (Ty->castAs<BuiltinType>()->getKind() != BuiltinType::Float) {
	Res = ImpCastExprToType(Res, Context.FloatTy, CK_FloatingCast).get();
	}
	} else if (getLangOpts().OpenCL && !getOpenCLOptions().isAvailableOption(
	"cl_khr_fp64", getLangOpts())) {
	// Impose single-precision float type when cl_khr_fp64 is not enabled.
	Diag(Tok.getLocation(), diag::warn_double_const_requires_fp64)
	<< (getLangOpts().getOpenCLCompatibleVersion() >= 300);
	Res = ImpCastExprToType(Res, Context.FloatTy, CK_FloatingCast).get();
	}
	}
	} else if (!Literal.isIntegerLiteral()) {
	return ExprError();
	} else {
	QualType Ty;

	// 'z/uz' literals are a C++23 feature.
	if (Literal.isSizeT)
	Diag(Tok.getLocation(), getLangOpts().CPlusPlus
	? getLangOpts().CPlusPlus23
	? diag::warn_cxx20_compat_size_t_suffix
	: diag::ext_cxx23_size_t_suffix
	: diag::err_cxx23_size_t_suffix);

	// 'wb/uwb' literals are a C23 feature. We support _BitInt as a type in C++,
	// but we do not currently support the suffix in C++ mode because it's not
	// entirely clear whether WG21 will prefer this suffix to return a library
	// type such as std::bit_int instead of returning a _BitInt. '__wb/__uwb'
	// literals are a C++ extension.
	if (Literal.isBitInt)
	PP.Diag(Tok.getLocation(),
	getLangOpts().CPlusPlus ? diag::ext_cxx_bitint_suffix
	: getLangOpts().C23 ? diag::warn_c23_compat_bitint_suffix
	: diag::ext_c23_bitint_suffix);

	// Get the value in the widest-possible width. What is "widest" depends on
	// whether the literal is a bit-precise integer or not. For a bit-precise
	// integer type, try to scan the source to determine how many bits are
	// needed to represent the value. This may seem a bit expensive, but trying
	// to get the integer value from an overly-wide APInt is extremely
	// expensive, so the naive approach of assuming
	// llvm::IntegerType::MAX_INT_BITS is a big performance hit.
	unsigned BitsNeeded =
	Literal.isBitInt ? llvm::APInt::getSufficientBitsNeeded(
	Literal.getLiteralDigits(), Literal.getRadix())
	: Context.getTargetInfo().getIntMaxTWidth();
	llvm::APInt ResultVal(BitsNeeded, 0);

	if (Literal.GetIntegerValue(ResultVal)) {
	// If this value didn't fit into uintmax_t, error and force to ull.
	Diag(Tok.getLocation(), diag::err_integer_literal_too_large)
	<< /* Unsigned */ 1;
	Ty = Context.UnsignedLongLongTy;
	assert(Context.getTypeSize(Ty) == ResultVal.getBitWidth() &&
	"long long is not intmax_t?");
	} else {
	// If this value fits into a ULL, try to figure out what else it fits into
	// according to the rules of C99 6.4.4.1p5.

	// Octal, Hexadecimal, and integers with a U suffix are allowed to
	// be an unsigned int.
	bool AllowUnsigned = Literal.isUnsigned \|\| Literal.getRadix() != 10;

	// HLSL doesn't really have `long` or `long long`. We support the `ll`
	// suffix for portability of code with C++, but both `l` and `ll` are
	// 64-bit integer types, and we want the type of `1l` and `1ll` to be the
	// same.
	if (getLangOpts().HLSL && !Literal.isLong && Literal.isLongLong) {
	Literal.isLong = true;
	Literal.isLongLong = false;
	}

	// Check from smallest to largest, picking the smallest type we can.
	unsigned Width = 0;

	// Microsoft specific integer suffixes are explicitly sized.
	if (Literal.MicrosoftInteger) {
	if (Literal.MicrosoftInteger == 8 && !Literal.isUnsigned) {
	Width = 8;
	Ty = Context.CharTy;
	} else {
	Width = Literal.MicrosoftInteger;
	Ty = Context.getIntTypeForBitwidth(Width,
	/Signed=/!Literal.isUnsigned);
	}
	}

	// Bit-precise integer literals are automagically-sized based on the
	// width required by the literal.
	if (Literal.isBitInt) {
	// The signed version has one more bit for the sign value. There are no
	// zero-width bit-precise integers, even if the literal value is 0.
	Width = std::max(ResultVal.getActiveBits(), 1u) +
	(Literal.isUnsigned ? 0u : 1u);

	// Diagnose if the width of the constant is larger than BITINT_MAXWIDTH,
	// and reset the type to the largest supported width.
	unsigned int MaxBitIntWidth =
	Context.getTargetInfo().getMaxBitIntWidth();
	if (Width > MaxBitIntWidth) {
	Diag(Tok.getLocation(), diag::err_integer_literal_too_large)
	<< Literal.isUnsigned;
	Width = MaxBitIntWidth;
	}

	// Reset the result value to the smaller APInt and select the correct
	// type to be used. Note, we zext even for signed values because the
	// literal itself is always an unsigned value (a preceeding - is a
	// unary operator, not part of the literal).
	ResultVal = ResultVal.zextOrTrunc(Width);
	Ty = Context.getBitIntType(Literal.isUnsigned, Width);
	}

	// Check C++23 size_t literals.
	if (Literal.isSizeT) {
	assert(!Literal.MicrosoftInteger &&
	"size_t literals can't be Microsoft literals");
	unsigned SizeTSize = Context.getTargetInfo().getTypeWidth(
	Context.getTargetInfo().getSizeType());

	// Does it fit in size_t?
	if (ResultVal.isIntN(SizeTSize)) {
	// Does it fit in ssize_t?
	if (!Literal.isUnsigned && ResultVal[SizeTSize - 1] == 0)
	Ty = Context.getSignedSizeType();
	else if (AllowUnsigned)
	Ty = Context.getSizeType();
	Width = SizeTSize;
	}
	}

	if (Ty.isNull() && !Literal.isLong && !Literal.isLongLong &&
	!Literal.isSizeT) {
	// Are int/unsigned possibilities?
	unsigned IntSize = Context.getTargetInfo().getIntWidth();

	// Does it fit in a unsigned int?
	if (ResultVal.isIntN(IntSize)) {
	// Does it fit in a signed int?
	if (!Literal.isUnsigned && ResultVal[IntSize-1] == 0)
	Ty = Context.IntTy;
	else if (AllowUnsigned)
	Ty = Context.UnsignedIntTy;
	Width = IntSize;
	}
	}

	// Are long/unsigned long possibilities?
	if (Ty.isNull() && !Literal.isLongLong && !Literal.isSizeT) {
	unsigned LongSize = Context.getTargetInfo().getLongWidth();

	// Does it fit in a unsigned long?
	if (ResultVal.isIntN(LongSize)) {
	// Does it fit in a signed long?
	if (!Literal.isUnsigned && ResultVal[LongSize-1] == 0)
	Ty = Context.LongTy;
	else if (AllowUnsigned)
	Ty = Context.UnsignedLongTy;
	// Check according to the rules of C90 6.1.3.2p5. C++03 [lex.icon]p2
	// is compatible.
	else if (!getLangOpts().C99 && !getLangOpts().CPlusPlus11) {
	const unsigned LongLongSize =
	Context.getTargetInfo().getLongLongWidth();
	Diag(Tok.getLocation(),
	getLangOpts().CPlusPlus
	? Literal.isLong
	? diag::warn_old_implicitly_unsigned_long_cxx
	: /C++98 UB/ diag::
	ext_old_implicitly_unsigned_long_cxx
	: diag::warn_old_implicitly_unsigned_long)
	<< (LongLongSize > LongSize ? /will have type 'long long'/ 0
	: /will be ill-formed/ 1);
	Ty = Context.UnsignedLongTy;
	}
	Width = LongSize;
	}
	}

	// Check long long if needed.
	if (Ty.isNull() && !Literal.isSizeT) {
	unsigned LongLongSize = Context.getTargetInfo().getLongLongWidth();

	// Does it fit in a unsigned long long?
	if (ResultVal.isIntN(LongLongSize)) {
	// Does it fit in a signed long long?
	// To be compatible with MSVC, hex integer literals ending with the
	// LL or i64 suffix are always signed in Microsoft mode.
	if (!Literal.isUnsigned && (ResultVal[LongLongSize-1] == 0 \|\|
	(getLangOpts().MSVCCompat && Literal.isLongLong)))
	Ty = Context.LongLongTy;
	else if (AllowUnsigned)
	Ty = Context.UnsignedLongLongTy;
	Width = LongLongSize;

	// 'long long' is a C99 or C++11 feature, whether the literal
	// explicitly specified 'long long' or we needed the extra width.
	if (getLangOpts().CPlusPlus)
	Diag(Tok.getLocation(), getLangOpts().CPlusPlus11
	? diag::warn_cxx98_compat_longlong
	: diag::ext_cxx11_longlong);
	else if (!getLangOpts().C99)
	Diag(Tok.getLocation(), diag::ext_c99_longlong);
	}
	}

	// If we still couldn't decide a type, we either have 'size_t' literal
	// that is out of range, or a decimal literal that does not fit in a
	// signed long long and has no U suffix.
	if (Ty.isNull()) {
	if (Literal.isSizeT)
	Diag(Tok.getLocation(), diag::err_size_t_literal_too_large)
	<< Literal.isUnsigned;
	else
	Diag(Tok.getLocation(),
	diag::ext_integer_literal_too_large_for_signed);
	Ty = Context.UnsignedLongLongTy;
	Width = Context.getTargetInfo().getLongLongWidth();
	}

	if (ResultVal.getBitWidth() != Width)
	ResultVal = ResultVal.trunc(Width);
	}
	Res = IntegerLiteral::Create(Context, ResultVal, Ty, Tok.getLocation());
	}

	// If this is an imaginary literal, create the ImaginaryLiteral wrapper.
	if (Literal.isImaginary) {
	Res = new (Context) ImaginaryLiteral(Res,
	Context.getComplexType(Res->getType()));

	Diag(Tok.getLocation(), diag::ext_imaginary_constant);
	}
	return Res;
	}

	ExprResult Sema::ActOnParenExpr(SourceLocation L, SourceLocation R, Expr *E) {
	assert(E && "ActOnParenExpr() missing expr");
	QualType ExprTy = E->getType();
	if (getLangOpts().ProtectParens && CurFPFeatures.getAllowFPReassociate() &&
	!E->isLValue() && ExprTy->hasFloatingRepresentation())
	return BuildBuiltinCallExpr(R, Builtin::BI__arithmetic_fence, E);
	return new (Context) ParenExpr(L, R, E);
	}

	static bool CheckVecStepTraitOperandType(Sema &S, QualType T,
	SourceLocation Loc,
	SourceRange ArgRange) {
	// [OpenCL 1.1 6.11.12] "The vec_step built-in function takes a built-in
	// scalar or vector data type argument..."
	// Every built-in scalar type (OpenCL 1.1 6.1.1) is either an arithmetic
	// type (C99 6.2.5p18) or void.
	if (!(T->isArithmeticType() \|\| T->isVoidType() \|\| T->isVectorType())) {
	S.Diag(Loc, diag::err_vecstep_non_scalar_vector_type)
	<< T << ArgRange;
	return true;
	}

	assert((T->isVoidType() \|\| !T->isIncompleteType()) &&
	"Scalar types should always be complete");
	return false;
	}

	static bool CheckVectorElementsTraitOperandType(Sema &S, QualType T,
	SourceLocation Loc,
	SourceRange ArgRange) {
	// builtin_vectorelements supports both fixed-sized and scalable vectors.
	if (!T->isVectorType() && !T->isSizelessVectorType())
	return S.Diag(Loc, diag::err_builtin_non_vector_type)
	<< ""
	<< "__builtin_vectorelements" << T << ArgRange;

	return false;
	}

	static bool checkPtrAuthTypeDiscriminatorOperandType(Sema &S, QualType T,
	SourceLocation Loc,
	SourceRange ArgRange) {
	if (S.checkPointerAuthEnabled(Loc, ArgRange))
	return true;

	if (!T->isFunctionType() && !T->isFunctionPointerType() &&
	!T->isFunctionReferenceType() && !T->isMemberFunctionPointerType()) {
	S.Diag(Loc, diag::err_ptrauth_type_disc_undiscriminated) << T << ArgRange;
	return true;
	}

	return false;
	}

	static bool CheckExtensionTraitOperandType(Sema &S, QualType T,
	SourceLocation Loc,
	SourceRange ArgRange,
	UnaryExprOrTypeTrait TraitKind) {
	// Invalid types must be hard errors for SFINAE in C++.
	if (S.LangOpts.CPlusPlus)
	return true;

	// C99 6.5.3.4p1:
	if (T->isFunctionType() &&
	(TraitKind == UETT_SizeOf \|\| TraitKind == UETT_AlignOf \|\|
	TraitKind == UETT_PreferredAlignOf)) {
	// sizeof(function)/alignof(function) is allowed as an extension.
	S.Diag(Loc, diag::ext_sizeof_alignof_function_type)
	<< getTraitSpelling(TraitKind) << ArgRange;
	return false;
	}

	// Allow sizeof(void)/alignof(void) as an extension, unless in OpenCL where
	// this is an error (OpenCL v1.1 s6.3.k)
	if (T->isVoidType()) {
	unsigned DiagID = S.LangOpts.OpenCL ? diag::err_opencl_sizeof_alignof_type
	: diag::ext_sizeof_alignof_void_type;
	S.Diag(Loc, DiagID) << getTraitSpelling(TraitKind) << ArgRange;
	return false;
	}

	return true;
	}

	static bool CheckObjCTraitOperandConstraints(Sema &S, QualType T,
	SourceLocation Loc,
	SourceRange ArgRange,
	UnaryExprOrTypeTrait TraitKind) {
	// Reject sizeof(interface) and sizeof(interface<proto>) if the
	// runtime doesn't allow it.
	if (!S.LangOpts.ObjCRuntime.allowsSizeofAlignof() && T->isObjCObjectType()) {
	S.Diag(Loc, diag::err_sizeof_nonfragile_interface)
	<< T << (TraitKind == UETT_SizeOf)
	<< ArgRange;
	return true;
	}

	return false;
	}

	/// Check whether E is a pointer from a decayed array type (the decayed
	/// pointer type is equal to T) and emit a warning if it is.
	static void warnOnSizeofOnArrayDecay(Sema &S, SourceLocation Loc, QualType T,
	const Expr *E) {
	// Don't warn if the operation changed the type.
	if (T != E->getType())
	return;

	// Now look for array decays.
	const auto *ICE = dyn_cast<ImplicitCastExpr>(E);
	if (!ICE \|\| ICE->getCastKind() != CK_ArrayToPointerDecay)
	return;

	S.Diag(Loc, diag::warn_sizeof_array_decay) << ICE->getSourceRange()
	<< ICE->getType()
	<< ICE->getSubExpr()->getType();
	}

	bool Sema::CheckUnaryExprOrTypeTraitOperand(Expr *E,
	UnaryExprOrTypeTrait ExprKind) {
	QualType ExprTy = E->getType();
	assert(!ExprTy->isReferenceType());

	bool IsUnevaluatedOperand =
	(ExprKind == UETT_SizeOf \|\| ExprKind == UETT_DataSizeOf \|\|
	ExprKind == UETT_AlignOf \|\| ExprKind == UETT_PreferredAlignOf \|\|
	ExprKind == UETT_VecStep);
	if (IsUnevaluatedOperand) {
	ExprResult Result = CheckUnevaluatedOperand(E);
	if (Result.isInvalid())
	return true;
	E = Result.get();
	}

	// The operand for sizeof and alignof is in an unevaluated expression context,
	// so side effects could result in unintended consequences.
	// Exclude instantiation-dependent expressions, because 'sizeof' is sometimes
	// used to build SFINAE gadgets.
	// FIXME: Should we consider instantiation-dependent operands to 'alignof'?
	if (IsUnevaluatedOperand && !inTemplateInstantiation() &&
	!E->isInstantiationDependent() &&
	!E->getType()->isVariableArrayType() &&
	E->HasSideEffects(Context, false))
	Diag(E->getExprLoc(), diag::warn_side_effects_unevaluated_context);

	if (ExprKind == UETT_VecStep)
	return CheckVecStepTraitOperandType(*this, ExprTy, E->getExprLoc(),
	E->getSourceRange());

	if (ExprKind == UETT_VectorElements)
	return CheckVectorElementsTraitOperandType(*this, ExprTy, E->getExprLoc(),
	E->getSourceRange());

	// Explicitly list some types as extensions.
	if (!CheckExtensionTraitOperandType(*this, ExprTy, E->getExprLoc(),
	E->getSourceRange(), ExprKind))
	return false;

	// WebAssembly tables are always illegal operands to unary expressions and
	// type traits.
	if (Context.getTargetInfo().getTriple().isWasm() &&
	E->getType()->isWebAssemblyTableType()) {
	Diag(E->getExprLoc(), diag::err_wasm_table_invalid_uett_operand)
	<< getTraitSpelling(ExprKind);
	return true;
	}

	// 'alignof' applied to an expression only requires the base element type of
	// the expression to be complete. 'sizeof' requires the expression's type to
	// be complete (and will attempt to complete it if it's an array of unknown
	// bound).
	if (ExprKind == UETT_AlignOf \|\| ExprKind == UETT_PreferredAlignOf) {
	if (RequireCompleteSizedType(
	E->getExprLoc(), Context.getBaseElementType(E->getType()),
	diag::err_sizeof_alignof_incomplete_or_sizeless_type,
	getTraitSpelling(ExprKind), E->getSourceRange()))
	return true;
	} else {
	if (RequireCompleteSizedExprType(
	E, diag::err_sizeof_alignof_incomplete_or_sizeless_type,
	getTraitSpelling(ExprKind), E->getSourceRange()))
	return true;
	}

	// Completing the expression's type may have changed it.
	ExprTy = E->getType();
	assert(!ExprTy->isReferenceType());

	if (ExprTy->isFunctionType()) {
	Diag(E->getExprLoc(), diag::err_sizeof_alignof_function_type)
	<< getTraitSpelling(ExprKind) << E->getSourceRange();
	return true;
	}

	if (CheckObjCTraitOperandConstraints(*this, ExprTy, E->getExprLoc(),
	E->getSourceRange(), ExprKind))
	return true;

	if (ExprKind == UETT_SizeOf) {
	if (const auto *DeclRef = dyn_cast<DeclRefExpr>(E->IgnoreParens())) {
	if (const auto *PVD = dyn_cast<ParmVarDecl>(DeclRef->getFoundDecl())) {
	QualType OType = PVD->getOriginalType();
	QualType Type = PVD->getType();
	if (Type->isPointerType() && OType->isArrayType()) {
	Diag(E->getExprLoc(), diag::warn_sizeof_array_param)
	<< Type << OType;
	Diag(PVD->getLocation(), diag::note_declared_at);
	}
	}
	}

	// Warn on "sizeof(array op x)" and "sizeof(x op array)", where the array
	// decays into a pointer and returns an unintended result. This is most
	// likely a typo for "sizeof(array) op x".
	if (const auto *BO = dyn_cast<BinaryOperator>(E->IgnoreParens())) {
	warnOnSizeofOnArrayDecay(*this, BO->getOperatorLoc(), BO->getType(),
	BO->getLHS());
	warnOnSizeofOnArrayDecay(*this, BO->getOperatorLoc(), BO->getType(),
	BO->getRHS());
	}
	}

	return false;
	}

	static bool CheckAlignOfExpr(Sema &S, Expr *E, UnaryExprOrTypeTrait ExprKind) {
	// Cannot know anything else if the expression is dependent.
	if (E->isTypeDependent())
	return false;

	if (E->getObjectKind() == OK_BitField) {
	S.Diag(E->getExprLoc(), diag::err_sizeof_alignof_typeof_bitfield)
	<< 1 << E->getSourceRange();
	return true;
	}

	ValueDecl *D = nullptr;
	Expr *Inner = E->IgnoreParens();
	if (DeclRefExpr *DRE = dyn_cast<DeclRefExpr>(Inner)) {
	D = DRE->getDecl();
	} else if (MemberExpr *ME = dyn_cast<MemberExpr>(Inner)) {
	D = ME->getMemberDecl();
	}

	// If it's a field, require the containing struct to have a
	// complete definition so that we can compute the layout.
	//
	// This can happen in C++11 onwards, either by naming the member
	// in a way that is not transformed into a member access expression
	// (in an unevaluated operand, for instance), or by naming the member
	// in a trailing-return-type.
	//
	// For the record, since __alignof__ on expressions is a GCC
	// extension, GCC seems to permit this but always gives the
	// nonsensical answer 0.
	//
	// We don't really need the layout here --- we could instead just
	// directly check for all the appropriate alignment-lowing
	// attributes --- but that would require duplicating a lot of
	// logic that just isn't worth duplicating for such a marginal
	// use-case.
	if (FieldDecl *FD = dyn_cast_or_null<FieldDecl>(D)) {
	// Fast path this check, since we at least know the record has a
	// definition if we can find a member of it.
	if (!FD->getParent()->isCompleteDefinition()) {
	S.Diag(E->getExprLoc(), diag::err_alignof_member_of_incomplete_type)
	<< E->getSourceRange();
	return true;
	}

	// Otherwise, if it's a field, and the field doesn't have
	// reference type, then it must have a complete type (or be a
	// flexible array member, which we explicitly want to
	// white-list anyway), which makes the following checks trivial.
	if (!FD->getType()->isReferenceType())
	return false;
	}

	return S.CheckUnaryExprOrTypeTraitOperand(E, ExprKind);
	}

	bool Sema::CheckVecStepExpr(Expr *E) {
	E = E->IgnoreParens();

	// Cannot know anything else if the expression is dependent.
	if (E->isTypeDependent())
	return false;

	return CheckUnaryExprOrTypeTraitOperand(E, UETT_VecStep);
	}

	static void captureVariablyModifiedType(ASTContext &Context, QualType T,
	CapturingScopeInfo *CSI) {
	assert(T->isVariablyModifiedType());
	assert(CSI != nullptr);

	// We're going to walk down into the type and look for VLA expressions.
	do {
	const Type *Ty = T.getTypePtr();
	switch (Ty->getTypeClass()) {
	#define TYPE(Class, Base)
	#define ABSTRACT_TYPE(Class, Base)
	#define NON_CANONICAL_TYPE(Class, Base)
	#define DEPENDENT_TYPE(Class, Base) case Type::Class:
	#define NON_CANONICAL_UNLESS_DEPENDENT_TYPE(Class, Base)
	#include "clang/AST/TypeNodes.inc"
	T = QualType();
	break;
	// These types are never variably-modified.
	case Type::Builtin:
	case Type::Complex:
	case Type::Vector:
	case Type::ExtVector:
	case Type::ConstantMatrix:
	case Type::Record:
	case Type::Enum:
	case Type::TemplateSpecialization:
	case Type::ObjCObject:
	case Type::ObjCInterface:
	case Type::ObjCObjectPointer:
	case Type::ObjCTypeParam:
	case Type::Pipe:
	case Type::BitInt:
	llvm_unreachable("type class is never variably-modified!");
	case Type::Elaborated:
	T = cast<ElaboratedType>(Ty)->getNamedType();
	break;
	case Type::Adjusted:
	T = cast<AdjustedType>(Ty)->getOriginalType();
	break;
	case Type::Decayed:
	T = cast<DecayedType>(Ty)->getPointeeType();
	break;
	case Type::ArrayParameter:
	T = cast<ArrayParameterType>(Ty)->getElementType();
	break;
	case Type::Pointer:
	T = cast<PointerType>(Ty)->getPointeeType();
	break;
	case Type::BlockPointer:
	T = cast<BlockPointerType>(Ty)->getPointeeType();
	break;
	case Type::LValueReference:
	case Type::RValueReference:
	T = cast<ReferenceType>(Ty)->getPointeeType();
	break;
	case Type::MemberPointer:
	T = cast<MemberPointerType>(Ty)->getPointeeType();
	break;
	case Type::ConstantArray:
	case Type::IncompleteArray:
	// Losing element qualification here is fine.
	T = cast<ArrayType>(Ty)->getElementType();
	break;
	case Type::VariableArray: {
	// Losing element qualification here is fine.
	const VariableArrayType *VAT = cast<VariableArrayType>(Ty);

	// Unknown size indication requires no size computation.
	// Otherwise, evaluate and record it.
	auto Size = VAT->getSizeExpr();
	if (Size && !CSI->isVLATypeCaptured(VAT) &&
	(isa<CapturedRegionScopeInfo>(CSI) \|\| isa<LambdaScopeInfo>(CSI)))
	CSI->addVLATypeCapture(Size->getExprLoc(), VAT, Context.getSizeType());

	T = VAT->getElementType();
	break;
	}
	case Type::FunctionProto:
	case Type::FunctionNoProto:
	T = cast<FunctionType>(Ty)->getReturnType();
	break;
	case Type::Paren:
	case Type::TypeOf:
	case Type::UnaryTransform:
	case Type::Attributed:
	case Type::BTFTagAttributed:
	case Type::SubstTemplateTypeParm:
	case Type::MacroQualified:
	case Type::CountAttributed:
	// Keep walking after single level desugaring.
	T = T.getSingleStepDesugaredType(Context);
	break;
	case Type::Typedef:
	T = cast<TypedefType>(Ty)->desugar();
	break;
	case Type::Decltype:
	T = cast<DecltypeType>(Ty)->desugar();
	break;
	case Type::PackIndexing:
	T = cast<PackIndexingType>(Ty)->desugar();
	break;
	case Type::Using:
	T = cast<UsingType>(Ty)->desugar();
	break;
	case Type::Auto:
	case Type::DeducedTemplateSpecialization:
	T = cast<DeducedType>(Ty)->getDeducedType();
	break;
	case Type::TypeOfExpr:
	T = cast<TypeOfExprType>(Ty)->getUnderlyingExpr()->getType();
	break;
	case Type::Atomic:
	T = cast<AtomicType>(Ty)->getValueType();
	break;
	}
	} while (!T.isNull() && T->isVariablyModifiedType());
	}

	bool Sema::CheckUnaryExprOrTypeTraitOperand(QualType ExprType,
	SourceLocation OpLoc,
	SourceRange ExprRange,
	UnaryExprOrTypeTrait ExprKind,
	StringRef KWName) {
	if (ExprType->isDependentType())
	return false;

	// C++ [expr.sizeof]p2:
	// When applied to a reference or a reference type, the result
	// is the size of the referenced type.
	// C++11 [expr.alignof]p3:
	// When alignof is applied to a reference type, the result
	// shall be the alignment of the referenced type.
	if (const ReferenceType *Ref = ExprType->getAs<ReferenceType>())
	ExprType = Ref->getPointeeType();

	// C11 6.5.3.4/3, C++11 [expr.alignof]p3:
	// When alignof or _Alignof is applied to an array type, the result
	// is the alignment of the element type.
	if (ExprKind == UETT_AlignOf \|\| ExprKind == UETT_PreferredAlignOf \|\|
	ExprKind == UETT_OpenMPRequiredSimdAlign) {
	// If the trait is 'alignof' in C before C2y, the ability to apply the
	// trait to an incomplete array is an extension.
	if (ExprKind == UETT_AlignOf && !getLangOpts().CPlusPlus &&
	ExprType->isIncompleteArrayType())
	Diag(OpLoc, getLangOpts().C2y
	? diag::warn_c2y_compat_alignof_incomplete_array
	: diag::ext_c2y_alignof_incomplete_array);
	ExprType = Context.getBaseElementType(ExprType);
	}

	if (ExprKind == UETT_VecStep)
	return CheckVecStepTraitOperandType(*this, ExprType, OpLoc, ExprRange);

	if (ExprKind == UETT_VectorElements)
	return CheckVectorElementsTraitOperandType(*this, ExprType, OpLoc,
	ExprRange);

	if (ExprKind == UETT_PtrAuthTypeDiscriminator)
	return checkPtrAuthTypeDiscriminatorOperandType(*this, ExprType, OpLoc,
	ExprRange);

	// Explicitly list some types as extensions.
	if (!CheckExtensionTraitOperandType(*this, ExprType, OpLoc, ExprRange,
	ExprKind))
	return false;

	if (RequireCompleteSizedType(
	OpLoc, ExprType, diag::err_sizeof_alignof_incomplete_or_sizeless_type,
	KWName, ExprRange))
	return true;

	if (ExprType->isFunctionType()) {
	Diag(OpLoc, diag::err_sizeof_alignof_function_type) << KWName << ExprRange;
	return true;
	}

	// WebAssembly tables are always illegal operands to unary expressions and
	// type traits.
	if (Context.getTargetInfo().getTriple().isWasm() &&
	ExprType->isWebAssemblyTableType()) {
	Diag(OpLoc, diag::err_wasm_table_invalid_uett_operand)
	<< getTraitSpelling(ExprKind);
	return true;
	}

	if (CheckObjCTraitOperandConstraints(*this, ExprType, OpLoc, ExprRange,
	ExprKind))
	return true;

	if (ExprType->isVariablyModifiedType() && FunctionScopes.size() > 1) {
	if (auto *TT = ExprType->getAs<TypedefType>()) {
	for (auto I = FunctionScopes.rbegin(),
	E = std::prev(FunctionScopes.rend());
	I != E; ++I) {
	auto CSI = dyn_cast<CapturingScopeInfo>(I);
	if (CSI == nullptr)
	break;
	DeclContext *DC = nullptr;
	if (auto *LSI = dyn_cast<LambdaScopeInfo>(CSI))
	DC = LSI->CallOperator;
	else if (auto *CRSI = dyn_cast<CapturedRegionScopeInfo>(CSI))
	DC = CRSI->TheCapturedDecl;
	else if (auto *BSI = dyn_cast<BlockScopeInfo>(CSI))
	DC = BSI->TheDecl;
	if (DC) {
	if (DC->containsDecl(TT->getDecl()))
	break;
	captureVariablyModifiedType(Context, ExprType, CSI);
	}
	}
	}
	}

	return false;
	}

	ExprResult Sema::CreateUnaryExprOrTypeTraitExpr(TypeSourceInfo *TInfo,
	SourceLocation OpLoc,
	UnaryExprOrTypeTrait ExprKind,
	SourceRange R) {
	if (!TInfo)
	return ExprError();

	QualType T = TInfo->getType();

	if (!T->isDependentType() &&
	CheckUnaryExprOrTypeTraitOperand(T, OpLoc, R, ExprKind,
	getTraitSpelling(ExprKind)))
	return ExprError();

	// Adds overload of TransformToPotentiallyEvaluated for TypeSourceInfo to
	// properly deal with VLAs in nested calls of sizeof and typeof.
	if (isUnevaluatedContext() && ExprKind == UETT_SizeOf &&
	TInfo->getType()->isVariablyModifiedType())
	TInfo = TransformToPotentiallyEvaluated(TInfo);

	// C99 6.5.3.4p4: the type (an unsigned integer type) is size_t.
	return new (Context) UnaryExprOrTypeTraitExpr(
	ExprKind, TInfo, Context.getSizeType(), OpLoc, R.getEnd());
	}

	ExprResult
	Sema::CreateUnaryExprOrTypeTraitExpr(Expr *E, SourceLocation OpLoc,
	UnaryExprOrTypeTrait ExprKind) {
	ExprResult PE = CheckPlaceholderExpr(E);
	if (PE.isInvalid())
	return ExprError();

	E = PE.get();

	// Verify that the operand is valid.
	bool isInvalid = false;
	if (E->isTypeDependent()) {
	// Delay type-checking for type-dependent expressions.
	} else if (ExprKind == UETT_AlignOf \|\| ExprKind == UETT_PreferredAlignOf) {
	isInvalid = CheckAlignOfExpr(*this, E, ExprKind);
	} else if (ExprKind == UETT_VecStep) {
	isInvalid = CheckVecStepExpr(E);
	} else if (ExprKind == UETT_OpenMPRequiredSimdAlign) {
	Diag(E->getExprLoc(), diag::err_openmp_default_simd_align_expr);
	isInvalid = true;
	} else if (E->refersToBitField()) { // C99 6.5.3.4p1.
	Diag(E->getExprLoc(), diag::err_sizeof_alignof_typeof_bitfield) << 0;
	isInvalid = true;
	} else if (ExprKind == UETT_VectorElements) {
	isInvalid = CheckUnaryExprOrTypeTraitOperand(E, UETT_VectorElements);
	} else {
	isInvalid = CheckUnaryExprOrTypeTraitOperand(E, UETT_SizeOf);
	}

	if (isInvalid)
	return ExprError();

	if (ExprKind == UETT_SizeOf && E->getType()->isVariableArrayType()) {
	PE = TransformToPotentiallyEvaluated(E);
	if (PE.isInvalid()) return ExprError();
	E = PE.get();
	}

	// C99 6.5.3.4p4: the type (an unsigned integer type) is size_t.
	return new (Context) UnaryExprOrTypeTraitExpr(
	ExprKind, E, Context.getSizeType(), OpLoc, E->getSourceRange().getEnd());
	}

	ExprResult
	Sema::ActOnUnaryExprOrTypeTraitExpr(SourceLocation OpLoc,
	UnaryExprOrTypeTrait ExprKind, bool IsType,
	void *TyOrEx, SourceRange ArgRange) {
	// If error parsing type, ignore.
	if (!TyOrEx) return ExprError();

	if (IsType) {
	TypeSourceInfo *TInfo;
	(void) GetTypeFromParser(ParsedType::getFromOpaquePtr(TyOrEx), &TInfo);
	return CreateUnaryExprOrTypeTraitExpr(TInfo, OpLoc, ExprKind, ArgRange);
	}

	Expr ArgEx = (Expr )TyOrEx;
	ExprResult Result = CreateUnaryExprOrTypeTraitExpr(ArgEx, OpLoc, ExprKind);
	return Result;
	}

	bool Sema::CheckAlignasTypeArgument(StringRef KWName, TypeSourceInfo *TInfo,
	SourceLocation OpLoc, SourceRange R) {
	if (!TInfo)
	return true;
	return CheckUnaryExprOrTypeTraitOperand(TInfo->getType(), OpLoc, R,
	UETT_AlignOf, KWName);
	}

	bool Sema::ActOnAlignasTypeArgument(StringRef KWName, ParsedType Ty,
	SourceLocation OpLoc, SourceRange R) {
	TypeSourceInfo *TInfo;
	(void)GetTypeFromParser(ParsedType::getFromOpaquePtr(Ty.getAsOpaquePtr()),
	&TInfo);
	return CheckAlignasTypeArgument(KWName, TInfo, OpLoc, R);
	}

	static QualType CheckRealImagOperand(Sema &S, ExprResult &V, SourceLocation Loc,
	bool IsReal) {
	if (V.get()->isTypeDependent())
	return S.Context.DependentTy;

	// _Real and _Imag are only l-values for normal l-values.
	if (V.get()->getObjectKind() != OK_Ordinary) {
	V = S.DefaultLvalueConversion(V.get());
	if (V.isInvalid())
	return QualType();
	}

	// These operators return the element type of a complex type.
	if (const ComplexType *CT = V.get()->getType()->getAs<ComplexType>())
	return CT->getElementType();

	// Otherwise they pass through real integer and floating point types here.
	if (V.get()->getType()->isArithmeticType())
	return V.get()->getType();

	// Test for placeholders.
	ExprResult PR = S.CheckPlaceholderExpr(V.get());
	if (PR.isInvalid()) return QualType();
	if (PR.get() != V.get()) {
	V = PR;
	return CheckRealImagOperand(S, V, Loc, IsReal);
	}

	// Reject anything else.
	S.Diag(Loc, diag::err_realimag_invalid_type) << V.get()->getType()
	<< (IsReal ? "__real" : "__imag");
	return QualType();
	}



	ExprResult
	Sema::ActOnPostfixUnaryOp(Scope *S, SourceLocation OpLoc,
	tok::TokenKind Kind, Expr *Input) {
	UnaryOperatorKind Opc;
	switch (Kind) {
	default: llvm_unreachable("Unknown unary op!");
	case tok::plusplus: Opc = UO_PostInc; break;
	case tok::minusminus: Opc = UO_PostDec; break;
	}

	// Since this might is a postfix expression, get rid of ParenListExprs.
	ExprResult Result = MaybeConvertParenListExprToParenExpr(S, Input);
	if (Result.isInvalid()) return ExprError();
	Input = Result.get();

	return BuildUnaryOp(S, OpLoc, Opc, Input);
	}

	/// Diagnose if arithmetic on the given ObjC pointer is illegal.
	///
	/// \return true on error
	static bool checkArithmeticOnObjCPointer(Sema &S,
	SourceLocation opLoc,
	Expr *op) {
	assert(op->getType()->isObjCObjectPointerType());
	if (S.LangOpts.ObjCRuntime.allowsPointerArithmetic() &&
	!S.LangOpts.ObjCSubscriptingLegacyRuntime)
	return false;

	S.Diag(opLoc, diag::err_arithmetic_nonfragile_interface)
	<< op->getType()->castAs<ObjCObjectPointerType>()->getPointeeType()
	<< op->getSourceRange();
	return true;
	}

	static bool isMSPropertySubscriptExpr(Sema &S, Expr *Base) {
	auto *BaseNoParens = Base->IgnoreParens();
	if (auto *MSProp = dyn_cast<MSPropertyRefExpr>(BaseNoParens))
	return MSProp->getPropertyDecl()->getType()->isArrayType();
	return isa<MSPropertySubscriptExpr>(BaseNoParens);
	}

	// Returns the type used for LHS[RHS], given one of LHS, RHS is type-dependent.
	// Typically this is DependentTy, but can sometimes be more precise.
	//
	// There are cases when we could determine a non-dependent type:
	// - LHS and RHS may have non-dependent types despite being type-dependent
	// (e.g. unbounded array static members of the current instantiation)
	// - one may be a dependent-sized array with known element type
	// - one may be a dependent-typed valid index (enum in current instantiation)
	//
	// We always return a dependent type, in such cases it is DependentTy.
	// This avoids creating type-dependent expressions with non-dependent types.
	// FIXME: is this important to avoid? See https://reviews.llvm.org/D107275
	static QualType getDependentArraySubscriptType(Expr LHS, Expr RHS,
	const ASTContext &Ctx) {
	assert(LHS->isTypeDependent() \|\| RHS->isTypeDependent());
	QualType LTy = LHS->getType(), RTy = RHS->getType();
	QualType Result = Ctx.DependentTy;
	if (RTy->isIntegralOrUnscopedEnumerationType()) {
	if (const PointerType *PT = LTy->getAs<PointerType>())
	Result = PT->getPointeeType();
	else if (const ArrayType *AT = LTy->getAsArrayTypeUnsafe())
	Result = AT->getElementType();
	} else if (LTy->isIntegralOrUnscopedEnumerationType()) {
	if (const PointerType *PT = RTy->getAs<PointerType>())
	Result = PT->getPointeeType();
	else if (const ArrayType *AT = RTy->getAsArrayTypeUnsafe())
	Result = AT->getElementType();
	}
	// Ensure we return a dependent type.
	return Result->isDependentType() ? Result : Ctx.DependentTy;
	}

	ExprResult Sema::ActOnArraySubscriptExpr(Scope S, Expr base,
	SourceLocation lbLoc,
	MultiExprArg ArgExprs,
	SourceLocation rbLoc) {

	if (base && !base->getType().isNull() &&
	base->hasPlaceholderType(BuiltinType::ArraySection)) {
	auto *AS = cast<ArraySectionExpr>(base);
	if (AS->isOMPArraySection())
	return OpenMP().ActOnOMPArraySectionExpr(
	base, lbLoc, ArgExprs.front(), SourceLocation(), SourceLocation(),
	/Length/ nullptr,
	/Stride=/nullptr, rbLoc);

	return OpenACC().ActOnArraySectionExpr(base, lbLoc, ArgExprs.front(),
	SourceLocation(), /Length/ nullptr,
	rbLoc);
	}

	// Since this might be a postfix expression, get rid of ParenListExprs.
	if (isa<ParenListExpr>(base)) {
	ExprResult result = MaybeConvertParenListExprToParenExpr(S, base);
	if (result.isInvalid())
	return ExprError();
	base = result.get();
	}

	// Check if base and idx form a MatrixSubscriptExpr.
	//
	// Helper to check for comma expressions, which are not allowed as indices for
	// matrix subscript expressions.
	auto CheckAndReportCommaError = [this, base, rbLoc](Expr *E) {
	if (isa<BinaryOperator>(E) && cast<BinaryOperator>(E)->isCommaOp()) {
	Diag(E->getExprLoc(), diag::err_matrix_subscript_comma)
	<< SourceRange(base->getBeginLoc(), rbLoc);
	return true;
	}
	return false;
	};
	// The matrix subscript operator ([][])is considered a single operator.
	// Separating the index expressions by parenthesis is not allowed.
	if (base && !base->getType().isNull() &&
	base->hasPlaceholderType(BuiltinType::IncompleteMatrixIdx) &&
	!isa<MatrixSubscriptExpr>(base)) {
	Diag(base->getExprLoc(), diag::err_matrix_separate_incomplete_index)
	<< SourceRange(base->getBeginLoc(), rbLoc);
	return ExprError();
	}
	// If the base is a MatrixSubscriptExpr, try to create a new
	// MatrixSubscriptExpr.
	auto *matSubscriptE = dyn_cast<MatrixSubscriptExpr>(base);
	if (matSubscriptE) {
	assert(ArgExprs.size() == 1);
	if (CheckAndReportCommaError(ArgExprs.front()))
	return ExprError();

	assert(matSubscriptE->isIncomplete() &&
	"base has to be an incomplete matrix subscript");
	return CreateBuiltinMatrixSubscriptExpr(matSubscriptE->getBase(),
	matSubscriptE->getRowIdx(),
	ArgExprs.front(), rbLoc);
	}
	if (base->getType()->isWebAssemblyTableType()) {
	Diag(base->getExprLoc(), diag::err_wasm_table_art)
	<< SourceRange(base->getBeginLoc(), rbLoc) << 3;
	return ExprError();
	}

	// Handle any non-overload placeholder types in the base and index
	// expressions. We can't handle overloads here because the other
	// operand might be an overloadable type, in which case the overload
	// resolution for the operator overload should get the first crack
	// at the overload.
	bool IsMSPropertySubscript = false;
	if (base->getType()->isNonOverloadPlaceholderType()) {
	IsMSPropertySubscript = isMSPropertySubscriptExpr(*this, base);
	if (!IsMSPropertySubscript) {
	ExprResult result = CheckPlaceholderExpr(base);
	if (result.isInvalid())
	return ExprError();
	base = result.get();
	}
	}

	// If the base is a matrix type, try to create a new MatrixSubscriptExpr.
	if (base->getType()->isMatrixType()) {
	assert(ArgExprs.size() == 1);
	if (CheckAndReportCommaError(ArgExprs.front()))
	return ExprError();

	return CreateBuiltinMatrixSubscriptExpr(base, ArgExprs.front(), nullptr,
	rbLoc);
	}

	if (ArgExprs.size() == 1 && getLangOpts().CPlusPlus20) {
	Expr *idx = ArgExprs[0];
	if ((isa<BinaryOperator>(idx) && cast<BinaryOperator>(idx)->isCommaOp()) \|\|
	(isa<CXXOperatorCallExpr>(idx) &&
	cast<CXXOperatorCallExpr>(idx)->getOperator() == OO_Comma)) {
	Diag(idx->getExprLoc(), diag::warn_deprecated_comma_subscript)
	<< SourceRange(base->getBeginLoc(), rbLoc);
	}
	}

	if (ArgExprs.size() == 1 &&
	ArgExprs[0]->getType()->isNonOverloadPlaceholderType()) {
	ExprResult result = CheckPlaceholderExpr(ArgExprs[0]);
	if (result.isInvalid())
	return ExprError();
	ArgExprs[0] = result.get();
	} else {
	if (CheckArgsForPlaceholders(ArgExprs))
	return ExprError();
	}

	// Build an unanalyzed expression if either operand is type-dependent.
	if (getLangOpts().CPlusPlus && ArgExprs.size() == 1 &&
	(base->isTypeDependent() \|\|
	Expr::hasAnyTypeDependentArguments(ArgExprs)) &&
	!isa<PackExpansionExpr>(ArgExprs[0])) {
	return new (Context) ArraySubscriptExpr(
	base, ArgExprs.front(),
	getDependentArraySubscriptType(base, ArgExprs.front(), getASTContext()),
	VK_LValue, OK_Ordinary, rbLoc);
	}

	// MSDN, property (C++)
	// https://msdn.microsoft.com/en-us/library/yhfk0thd(v=vs.120).aspx
	// This attribute can also be used in the declaration of an empty array in a
	// class or structure definition. For example:
	// __declspec(property(get=GetX, put=PutX)) int x[];
	// The above statement indicates that x[] can be used with one or more array
	// indices. In this case, i=p->x[a][b] will be turned into i=p->GetX(a, b),
	// and p->x[a][b] = i will be turned into p->PutX(a, b, i);
	if (IsMSPropertySubscript) {
	assert(ArgExprs.size() == 1);
	// Build MS property subscript expression if base is MS property reference
	// or MS property subscript.
	return new (Context)
	MSPropertySubscriptExpr(base, ArgExprs.front(), Context.PseudoObjectTy,
	VK_LValue, OK_Ordinary, rbLoc);
	}

	// Use C++ overloaded-operator rules if either operand has record
	// type. The spec says to do this if either type is overloadable,
	// but enum types can't declare subscript operators or conversion
	// operators, so there's nothing interesting for overload resolution
	// to do if there aren't any record types involved.
	//
	// ObjC pointers have their own subscripting logic that is not tied
	// to overload resolution and so should not take this path.
	if (getLangOpts().CPlusPlus && !base->getType()->isObjCObjectPointerType() &&
	((base->getType()->isRecordType() \|\|
	(ArgExprs.size() != 1 \|\| isa<PackExpansionExpr>(ArgExprs[0]) \|\|
	ArgExprs[0]->getType()->isRecordType())))) {
	return CreateOverloadedArraySubscriptExpr(lbLoc, rbLoc, base, ArgExprs);
	}

	ExprResult Res =
	CreateBuiltinArraySubscriptExpr(base, lbLoc, ArgExprs.front(), rbLoc);

	if (!Res.isInvalid() && isa<ArraySubscriptExpr>(Res.get()))
	CheckSubscriptAccessOfNoDeref(cast<ArraySubscriptExpr>(Res.get()));

	return Res;
	}

	ExprResult Sema::tryConvertExprToType(Expr *E, QualType Ty) {
	InitializedEntity Entity = InitializedEntity::InitializeTemporary(Ty);
	InitializationKind Kind =
	InitializationKind::CreateCopy(E->getBeginLoc(), SourceLocation());
	InitializationSequence InitSeq(*this, Entity, Kind, E);
	return InitSeq.Perform(*this, Entity, Kind, E);
	}

	ExprResult Sema::CreateBuiltinMatrixSubscriptExpr(Expr Base, Expr RowIdx,
	Expr *ColumnIdx,
	SourceLocation RBLoc) {
	ExprResult BaseR = CheckPlaceholderExpr(Base);
	if (BaseR.isInvalid())
	return BaseR;
	Base = BaseR.get();

	ExprResult RowR = CheckPlaceholderExpr(RowIdx);
	if (RowR.isInvalid())
	return RowR;
	RowIdx = RowR.get();

	if (!ColumnIdx)
	return new (Context) MatrixSubscriptExpr(
	Base, RowIdx, ColumnIdx, Context.IncompleteMatrixIdxTy, RBLoc);

	// Build an unanalyzed expression if any of the operands is type-dependent.
	if (Base->isTypeDependent() \|\| RowIdx->isTypeDependent() \|\|
	ColumnIdx->isTypeDependent())
	return new (Context) MatrixSubscriptExpr(Base, RowIdx, ColumnIdx,
	Context.DependentTy, RBLoc);

	ExprResult ColumnR = CheckPlaceholderExpr(ColumnIdx);
	if (ColumnR.isInvalid())
	return ColumnR;
	ColumnIdx = ColumnR.get();

	// Check that IndexExpr is an integer expression. If it is a constant
	// expression, check that it is less than Dim (= the number of elements in the
	// corresponding dimension).
	auto IsIndexValid = [&](Expr *IndexExpr, unsigned Dim,
	bool IsColumnIdx) -> Expr * {
	if (!IndexExpr->getType()->isIntegerType() &&
	!IndexExpr->isTypeDependent()) {
	Diag(IndexExpr->getBeginLoc(), diag::err_matrix_index_not_integer)
	<< IsColumnIdx;
	return nullptr;
	}

	if (std::optional<llvm::APSInt> Idx =
	IndexExpr->getIntegerConstantExpr(Context)) {
	if ((Idx < 0 \|\| Idx >= Dim)) {
	Diag(IndexExpr->getBeginLoc(), diag::err_matrix_index_outside_range)
	<< IsColumnIdx << Dim;
	return nullptr;
	}
	}

	ExprResult ConvExpr =
	tryConvertExprToType(IndexExpr, Context.getSizeType());
	assert(!ConvExpr.isInvalid() &&
	"should be able to convert any integer type to size type");
	return ConvExpr.get();
	};

	auto *MTy = Base->getType()->getAs<ConstantMatrixType>();
	RowIdx = IsIndexValid(RowIdx, MTy->getNumRows(), false);
	ColumnIdx = IsIndexValid(ColumnIdx, MTy->getNumColumns(), true);
	if (!RowIdx \|\| !ColumnIdx)
	return ExprError();

	return new (Context) MatrixSubscriptExpr(Base, RowIdx, ColumnIdx,
	MTy->getElementType(), RBLoc);
	}

	void Sema::CheckAddressOfNoDeref(const Expr *E) {
	ExpressionEvaluationContextRecord &LastRecord = ExprEvalContexts.back();
	const Expr *StrippedExpr = E->IgnoreParenImpCasts();

	// For expressions like `&(*s).b`, the base is recorded and what should be
	// checked.
	const MemberExpr *Member = nullptr;
	while ((Member = dyn_cast<MemberExpr>(StrippedExpr)) && !Member->isArrow())
	StrippedExpr = Member->getBase()->IgnoreParenImpCasts();

	LastRecord.PossibleDerefs.erase(StrippedExpr);
	}

	void Sema::CheckSubscriptAccessOfNoDeref(const ArraySubscriptExpr *E) {
	if (isUnevaluatedContext())
	return;

	QualType ResultTy = E->getType();
	ExpressionEvaluationContextRecord &LastRecord = ExprEvalContexts.back();

	// Bail if the element is an array since it is not memory access.
	if (isa<ArrayType>(ResultTy))
	return;

	if (ResultTy->hasAttr(attr::NoDeref)) {
	LastRecord.PossibleDerefs.insert(E);
	return;
	}

	// Check if the base type is a pointer to a member access of a struct
	// marked with noderef.
	const Expr *Base = E->getBase();
	QualType BaseTy = Base->getType();
	if (!(isa<ArrayType>(BaseTy) \|\| isa<PointerType>(BaseTy)))
	// Not a pointer access
	return;

	const MemberExpr *Member = nullptr;
	while ((Member = dyn_cast<MemberExpr>(Base->IgnoreParenCasts())) &&
	Member->isArrow())
	Base = Member->getBase();

	if (const auto *Ptr = dyn_cast<PointerType>(Base->getType())) {
	if (Ptr->getPointeeType()->hasAttr(attr::NoDeref))
	LastRecord.PossibleDerefs.insert(E);
	}
	}

	ExprResult
	Sema::CreateBuiltinArraySubscriptExpr(Expr *Base, SourceLocation LLoc,
	Expr *Idx, SourceLocation RLoc) {
	Expr *LHSExp = Base;
	Expr *RHSExp = Idx;

	ExprValueKind VK = VK_LValue;
	ExprObjectKind OK = OK_Ordinary;

	// Per C++ core issue 1213, the result is an xvalue if either operand is
	// a non-lvalue array, and an lvalue otherwise.
	if (getLangOpts().CPlusPlus11) {
	for (auto *Op : {LHSExp, RHSExp}) {
	Op = Op->IgnoreImplicit();
	if (Op->getType()->isArrayType() && !Op->isLValue())
	VK = VK_XValue;
	}
	}

	// Perform default conversions.
	if (!LHSExp->getType()->isSubscriptableVectorType()) {
	ExprResult Result = DefaultFunctionArrayLvalueConversion(LHSExp);
	if (Result.isInvalid())
	return ExprError();
	LHSExp = Result.get();
	}
	ExprResult Result = DefaultFunctionArrayLvalueConversion(RHSExp);
	if (Result.isInvalid())
	return ExprError();
	RHSExp = Result.get();

	QualType LHSTy = LHSExp->getType(), RHSTy = RHSExp->getType();

	// C99 6.5.2.1p2: the expression e1[e2] is by definition precisely equivalent
	// to the expression *((e1)+(e2)). This means the array "Base" may actually be
	// in the subscript position. As a result, we need to derive the array base
	// and index from the expression types.
	Expr BaseExpr, IndexExpr;
	QualType ResultType;
	if (LHSTy->isDependentType() \|\| RHSTy->isDependentType()) {
	BaseExpr = LHSExp;
	IndexExpr = RHSExp;
	ResultType =
	getDependentArraySubscriptType(LHSExp, RHSExp, getASTContext());
	} else if (const PointerType *PTy = LHSTy->getAs<PointerType>()) {
	BaseExpr = LHSExp;
	IndexExpr = RHSExp;
	ResultType = PTy->getPointeeType();
	} else if (const ObjCObjectPointerType *PTy =
	LHSTy->getAs<ObjCObjectPointerType>()) {
	BaseExpr = LHSExp;
	IndexExpr = RHSExp;

	// Use custom logic if this should be the pseudo-object subscript
	// expression.
	if (!LangOpts.isSubscriptPointerArithmetic())
	return ObjC().BuildObjCSubscriptExpression(RLoc, BaseExpr, IndexExpr,
	nullptr, nullptr);

	ResultType = PTy->getPointeeType();
	} else if (const PointerType *PTy = RHSTy->getAs<PointerType>()) {
	// Handle the uncommon case of "123[Ptr]".
	BaseExpr = RHSExp;
	IndexExpr = LHSExp;
	ResultType = PTy->getPointeeType();
	} else if (const ObjCObjectPointerType *PTy =
	RHSTy->getAs<ObjCObjectPointerType>()) {
	// Handle the uncommon case of "123[Ptr]".
	BaseExpr = RHSExp;
	IndexExpr = LHSExp;
	ResultType = PTy->getPointeeType();
	if (!LangOpts.isSubscriptPointerArithmetic()) {
	Diag(LLoc, diag::err_subscript_nonfragile_interface)
	<< ResultType << BaseExpr->getSourceRange();
	return ExprError();
	}
	} else if (LHSTy->isSubscriptableVectorType()) {
	if (LHSTy->isBuiltinType() &&
	LHSTy->getAs<BuiltinType>()->isSveVLSBuiltinType()) {
	const BuiltinType *BTy = LHSTy->getAs<BuiltinType>();
	if (BTy->isSVEBool())
	return ExprError(Diag(LLoc, diag::err_subscript_svbool_t)
	<< LHSExp->getSourceRange()
	<< RHSExp->getSourceRange());
	ResultType = BTy->getSveEltType(Context);
	} else {
	const VectorType *VTy = LHSTy->getAs<VectorType>();
	ResultType = VTy->getElementType();
	}
	BaseExpr = LHSExp; // vectors: V[123]
	IndexExpr = RHSExp;
	// We apply C++ DR1213 to vector subscripting too.
	if (getLangOpts().CPlusPlus11 && LHSExp->isPRValue()) {
	ExprResult Materialized = TemporaryMaterializationConversion(LHSExp);
	if (Materialized.isInvalid())
	return ExprError();
	LHSExp = Materialized.get();
	}
	VK = LHSExp->getValueKind();
	if (VK != VK_PRValue)
	OK = OK_VectorComponent;

	QualType BaseType = BaseExpr->getType();
	Qualifiers BaseQuals = BaseType.getQualifiers();
	Qualifiers MemberQuals = ResultType.getQualifiers();
	Qualifiers Combined = BaseQuals + MemberQuals;
	if (Combined != MemberQuals)
	ResultType = Context.getQualifiedType(ResultType, Combined);
	} else if (LHSTy->isArrayType()) {
	// If we see an array that wasn't promoted by
	// DefaultFunctionArrayLvalueConversion, it must be an array that
	// wasn't promoted because of the C90 rule that doesn't
	// allow promoting non-lvalue arrays. Warn, then
	// force the promotion here.
	Diag(LHSExp->getBeginLoc(), diag::ext_subscript_non_lvalue)
	<< LHSExp->getSourceRange();
	LHSExp = ImpCastExprToType(LHSExp, Context.getArrayDecayedType(LHSTy),
	CK_ArrayToPointerDecay).get();
	LHSTy = LHSExp->getType();

	BaseExpr = LHSExp;
	IndexExpr = RHSExp;
	ResultType = LHSTy->castAs<PointerType>()->getPointeeType();
	} else if (RHSTy->isArrayType()) {
	// Same as previous, except for 123[f().a] case
	Diag(RHSExp->getBeginLoc(), diag::ext_subscript_non_lvalue)
	<< RHSExp->getSourceRange();
	RHSExp = ImpCastExprToType(RHSExp, Context.getArrayDecayedType(RHSTy),
	CK_ArrayToPointerDecay).get();
	RHSTy = RHSExp->getType();

	BaseExpr = RHSExp;
	IndexExpr = LHSExp;
	ResultType = RHSTy->castAs<PointerType>()->getPointeeType();
	} else {
	return ExprError(Diag(LLoc, diag::err_typecheck_subscript_value)
	<< LHSExp->getSourceRange() << RHSExp->getSourceRange());
	}
	// C99 6.5.2.1p1
	if (!IndexExpr->getType()->isIntegerType() && !IndexExpr->isTypeDependent())
	return ExprError(Diag(LLoc, diag::err_typecheck_subscript_not_integer)
	<< IndexExpr->getSourceRange());

	if ((IndexExpr->getType()->isSpecificBuiltinType(BuiltinType::Char_S) \|\|
	IndexExpr->getType()->isSpecificBuiltinType(BuiltinType::Char_U)) &&
	!IndexExpr->isTypeDependent()) {
	std::optional<llvm::APSInt> IntegerContantExpr =
	IndexExpr->getIntegerConstantExpr(getASTContext());
	if (!IntegerContantExpr.has_value() \|\|
	IntegerContantExpr.value().isNegative())
	Diag(LLoc, diag::warn_subscript_is_char) << IndexExpr->getSourceRange();
	}

	// C99 6.5.2.1p1: "shall have type "pointer to object type". Similarly,
	// C++ [expr.sub]p1: The type "T" shall be a completely-defined object
	// type. Note that Functions are not objects, and that (in C99 parlance)
	// incomplete types are not object types.
	if (ResultType->isFunctionType()) {
	Diag(BaseExpr->getBeginLoc(), diag::err_subscript_function_type)
	<< ResultType << BaseExpr->getSourceRange();
	return ExprError();
	}

	if (ResultType->isVoidType() && !getLangOpts().CPlusPlus) {
	// GNU extension: subscripting on pointer to void
	Diag(LLoc, diag::ext_gnu_subscript_void_type)
	<< BaseExpr->getSourceRange();

	// C forbids expressions of unqualified void type from being l-values.
	// See IsCForbiddenLValueType.
	if (!ResultType.hasQualifiers())
	VK = VK_PRValue;
	} else if (!ResultType->isDependentType() &&
	!ResultType.isWebAssemblyReferenceType() &&
	RequireCompleteSizedType(
	LLoc, ResultType,
	diag::err_subscript_incomplete_or_sizeless_type, BaseExpr))
	return ExprError();

	assert(VK == VK_PRValue \|\| LangOpts.CPlusPlus \|\|
	!ResultType.isCForbiddenLValueType());

	if (LHSExp->IgnoreParenImpCasts()->getType()->isVariablyModifiedType() &&
	FunctionScopes.size() > 1) {
	if (auto *TT =
	LHSExp->IgnoreParenImpCasts()->getType()->getAs<TypedefType>()) {
	for (auto I = FunctionScopes.rbegin(),
	E = std::prev(FunctionScopes.rend());
	I != E; ++I) {
	auto CSI = dyn_cast<CapturingScopeInfo>(I);
	if (CSI == nullptr)
	break;
	DeclContext *DC = nullptr;
	if (auto *LSI = dyn_cast<LambdaScopeInfo>(CSI))
	DC = LSI->CallOperator;
	else if (auto *CRSI = dyn_cast<CapturedRegionScopeInfo>(CSI))
	DC = CRSI->TheCapturedDecl;
	else if (auto *BSI = dyn_cast<BlockScopeInfo>(CSI))
	DC = BSI->TheDecl;
	if (DC) {
	if (DC->containsDecl(TT->getDecl()))
	break;
	captureVariablyModifiedType(
	Context, LHSExp->IgnoreParenImpCasts()->getType(), CSI);
	}
	}
	}
	}

	return new (Context)
	ArraySubscriptExpr(LHSExp, RHSExp, ResultType, VK, OK, RLoc);
	}

	bool Sema::CheckCXXDefaultArgExpr(SourceLocation CallLoc, FunctionDecl *FD,
	ParmVarDecl Param, Expr RewrittenInit,
	bool SkipImmediateInvocations) {
	if (Param->hasUnparsedDefaultArg()) {
	assert(!RewrittenInit && "Should not have a rewritten init expression yet");
	// If we've already cleared out the location for the default argument,
	// that means we're parsing it right now.
	if (!UnparsedDefaultArgLocs.count(Param)) {
	Diag(Param->getBeginLoc(), diag::err_recursive_default_argument) << FD;
	Diag(CallLoc, diag::note_recursive_default_argument_used_here);
	Param->setInvalidDecl();
	return true;
	}

	Diag(CallLoc, diag::err_use_of_default_argument_to_function_declared_later)
	<< FD << cast<CXXRecordDecl>(FD->getDeclContext());
	Diag(UnparsedDefaultArgLocs[Param],
	diag::note_default_argument_declared_here);
	return true;
	}

	if (Param->hasUninstantiatedDefaultArg()) {
	assert(!RewrittenInit && "Should not have a rewitten init expression yet");
	if (InstantiateDefaultArgument(CallLoc, FD, Param))
	return true;
	}

	Expr *Init = RewrittenInit ? RewrittenInit : Param->getInit();
	assert(Init && "default argument but no initializer?");

	// If the default expression creates temporaries, we need to
	// push them to the current stack of expression temporaries so they'll
	// be properly destroyed.
	// FIXME: We should really be rebuilding the default argument with new
	// bound temporaries; see the comment in PR5810.
	// We don't need to do that with block decls, though, because
	// blocks in default argument expression can never capture anything.
	if (auto *InitWithCleanup = dyn_cast<ExprWithCleanups>(Init)) {
	// Set the "needs cleanups" bit regardless of whether there are
	// any explicit objects.
	Cleanup.setExprNeedsCleanups(InitWithCleanup->cleanupsHaveSideEffects());
	// Append all the objects to the cleanup list. Right now, this
	// should always be a no-op, because blocks in default argument
	// expressions should never be able to capture anything.
	assert(!InitWithCleanup->getNumObjects() &&
	"default argument expression has capturing blocks?");
	}
	// C++ [expr.const]p15.1:
	// An expression or conversion is in an immediate function context if it is
	// potentially evaluated and [...] its innermost enclosing non-block scope
	// is a function parameter scope of an immediate function.
	EnterExpressionEvaluationContext EvalContext(
	*this,
	FD->isImmediateFunction()
	? ExpressionEvaluationContext::ImmediateFunctionContext
	: ExpressionEvaluationContext::PotentiallyEvaluated,
	Param);
	ExprEvalContexts.back().IsCurrentlyCheckingDefaultArgumentOrInitializer =
	SkipImmediateInvocations;
	runWithSufficientStackSpace(CallLoc, [&] {
	MarkDeclarationsReferencedInExpr(Init, /SkipLocalVariables=/true);
	});
	return false;
	}

	struct ImmediateCallVisitor : public RecursiveASTVisitor<ImmediateCallVisitor> {
	const ASTContext &Context;
	ImmediateCallVisitor(const ASTContext &Ctx) : Context(Ctx) {}

	bool HasImmediateCalls = false;
	bool shouldVisitImplicitCode() const { return true; }

	bool VisitCallExpr(CallExpr *E) {
	if (const FunctionDecl *FD = E->getDirectCallee())
	HasImmediateCalls \|= FD->isImmediateFunction();
	return RecursiveASTVisitor<ImmediateCallVisitor>::VisitStmt(E);
	}

	bool VisitCXXConstructExpr(CXXConstructExpr *E) {
	if (const FunctionDecl *FD = E->getConstructor())
	HasImmediateCalls \|= FD->isImmediateFunction();
	return RecursiveASTVisitor<ImmediateCallVisitor>::VisitStmt(E);
	}

	// SourceLocExpr are not immediate invocations
	// but CXXDefaultInitExpr/CXXDefaultArgExpr containing a SourceLocExpr
	// need to be rebuilt so that they refer to the correct SourceLocation and
	// DeclContext.
	bool VisitSourceLocExpr(SourceLocExpr *E) {
	HasImmediateCalls = true;
	return RecursiveASTVisitor<ImmediateCallVisitor>::VisitStmt(E);
	}

	// A nested lambda might have parameters with immediate invocations
	// in their default arguments.
	// The compound statement is not visited (as it does not constitute a
	// subexpression).
	// FIXME: We should consider visiting and transforming captures
	// with init expressions.
	bool VisitLambdaExpr(LambdaExpr *E) {
	return VisitCXXMethodDecl(E->getCallOperator());
	}

	bool VisitCXXDefaultArgExpr(CXXDefaultArgExpr *E) {
	return TraverseStmt(E->getExpr());
	}

	bool VisitCXXDefaultInitExpr(CXXDefaultInitExpr *E) {
	return TraverseStmt(E->getExpr());
	}
	};

	struct EnsureImmediateInvocationInDefaultArgs
	: TreeTransform<EnsureImmediateInvocationInDefaultArgs> {
	EnsureImmediateInvocationInDefaultArgs(Sema &SemaRef)
	: TreeTransform(SemaRef) {}

	// Lambda can only have immediate invocations in the default
	// args of their parameters, which is transformed upon calling the closure.
	// The body is not a subexpression, so we have nothing to do.
	// FIXME: Immediate calls in capture initializers should be transformed.
	ExprResult TransformLambdaExpr(LambdaExpr *E) { return E; }
	ExprResult TransformBlockExpr(BlockExpr *E) { return E; }

	// Make sure we don't rebuild the this pointer as it would
	// cause it to incorrectly point it to the outermost class
	// in the case of nested struct initialization.
	ExprResult TransformCXXThisExpr(CXXThisExpr *E) { return E; }

	// Rewrite to source location to refer to the context in which they are used.
	ExprResult TransformSourceLocExpr(SourceLocExpr *E) {
	- if (E->getParentContext() == SemaRef.CurContext)
	+ DeclContext *DC = E->getParentContext();
	+ if (DC == SemaRef.CurContext)
	return E;
	- return getDerived().RebuildSourceLocExpr(E->getIdentKind(), E->getType(),
	- E->getBeginLoc(), E->getEndLoc(),
	- SemaRef.CurContext);
	+
	+ // FIXME: During instantiation, because the rebuild of defaults arguments
	+ // is not always done in the context of the template instantiator,
	+ // we run the risk of producing a dependent source location
	+ // that would never be rebuilt.
	+ // This usually happens during overload resolution, or in contexts
	+ // where the value of the source location does not matter.
	+ // However, we should find a better way to deal with source location
	+ // of function templates.
	+ if (!SemaRef.CurrentInstantiationScope \|\|
	+ !SemaRef.CurContext->isDependentContext() \|\| DC->isDependentContext())
	+ DC = SemaRef.CurContext;
	+
	+ return getDerived().RebuildSourceLocExpr(
	+ E->getIdentKind(), E->getType(), E->getBeginLoc(), E->getEndLoc(), DC);
	}
	};

	ExprResult Sema::BuildCXXDefaultArgExpr(SourceLocation CallLoc,
	FunctionDecl FD, ParmVarDecl Param,
	Expr *Init) {
	assert(Param->hasDefaultArg() && "can't build nonexistent default arg");

	bool NestedDefaultChecking = isCheckingDefaultArgumentOrInitializer();
	bool InLifetimeExtendingContext = isInLifetimeExtendingContext();
	std::optional<ExpressionEvaluationContextRecord::InitializationContext>
	InitializationContext =
	OutermostDeclarationWithDelayedImmediateInvocations();
	if (!InitializationContext.has_value())
	InitializationContext.emplace(CallLoc, Param, CurContext);

	if (!Init && !Param->hasUnparsedDefaultArg()) {
	// Mark that we are replacing a default argument first.
	// If we are instantiating a template we won't have to
	// retransform immediate calls.
	// C++ [expr.const]p15.1:
	// An expression or conversion is in an immediate function context if it
	// is potentially evaluated and [...] its innermost enclosing non-block
	// scope is a function parameter scope of an immediate function.
	EnterExpressionEvaluationContext EvalContext(
	*this,
	FD->isImmediateFunction()
	? ExpressionEvaluationContext::ImmediateFunctionContext
	: ExpressionEvaluationContext::PotentiallyEvaluated,
	Param);

	if (Param->hasUninstantiatedDefaultArg()) {
	if (InstantiateDefaultArgument(CallLoc, FD, Param))
	return ExprError();
	}
	// CWG2631
	// An immediate invocation that is not evaluated where it appears is
	// evaluated and checked for whether it is a constant expression at the
	// point where the enclosing initializer is used in a function call.
	ImmediateCallVisitor V(getASTContext());
	if (!NestedDefaultChecking)
	V.TraverseDecl(Param);

	// Rewrite the call argument that was created from the corresponding
	// parameter's default argument.
	if (V.HasImmediateCalls \|\| InLifetimeExtendingContext) {
	if (V.HasImmediateCalls)
	ExprEvalContexts.back().DelayedDefaultInitializationContext = {
	CallLoc, Param, CurContext};
	// Pass down lifetime extending flag, and collect temporaries in
	// CreateMaterializeTemporaryExpr when we rewrite the call argument.
	keepInLifetimeExtendingContext();
	EnsureImmediateInvocationInDefaultArgs Immediate(*this);
	ExprResult Res;
	runWithSufficientStackSpace(CallLoc, [&] {
	Res = Immediate.TransformInitializer(Param->getInit(),
	/NotCopy=/false);
	});
	if (Res.isInvalid())
	return ExprError();
	Res = ConvertParamDefaultArgument(Param, Res.get(),
	Res.get()->getBeginLoc());
	if (Res.isInvalid())
	return ExprError();
	Init = Res.get();
	}
	}

	if (CheckCXXDefaultArgExpr(
	CallLoc, FD, Param, Init,
	/SkipImmediateInvocations=/NestedDefaultChecking))
	return ExprError();

	return CXXDefaultArgExpr::Create(Context, InitializationContext->Loc, Param,
	Init, InitializationContext->Context);
	}

	ExprResult Sema::BuildCXXDefaultInitExpr(SourceLocation Loc, FieldDecl *Field) {
	assert(Field->hasInClassInitializer());

	// If we might have already tried and failed to instantiate, don't try again.
	if (Field->isInvalidDecl())
	return ExprError();

	CXXThisScopeRAII This(*this, Field->getParent(), Qualifiers());

	auto *ParentRD = cast<CXXRecordDecl>(Field->getParent());

	std::optional<ExpressionEvaluationContextRecord::InitializationContext>
	InitializationContext =
	OutermostDeclarationWithDelayedImmediateInvocations();
	if (!InitializationContext.has_value())
	InitializationContext.emplace(Loc, Field, CurContext);

	Expr *Init = nullptr;

	bool NestedDefaultChecking = isCheckingDefaultArgumentOrInitializer();

	EnterExpressionEvaluationContext EvalContext(
	*this, ExpressionEvaluationContext::PotentiallyEvaluated, Field);

	if (!Field->getInClassInitializer()) {
	// Maybe we haven't instantiated the in-class initializer. Go check the
	// pattern FieldDecl to see if it has one.
	if (isTemplateInstantiation(ParentRD->getTemplateSpecializationKind())) {
	CXXRecordDecl *ClassPattern = ParentRD->getTemplateInstantiationPattern();
	DeclContext::lookup_result Lookup =
	ClassPattern->lookup(Field->getDeclName());

	FieldDecl *Pattern = nullptr;
	for (auto *L : Lookup) {
	if ((Pattern = dyn_cast<FieldDecl>(L)))
	break;
	}
	assert(Pattern && "We must have set the Pattern!");
	if (!Pattern->hasInClassInitializer() \|\|
	InstantiateInClassInitializer(Loc, Field, Pattern,
	getTemplateInstantiationArgs(Field))) {
	Field->setInvalidDecl();
	return ExprError();
	}
	}
	}

	// CWG2631
	// An immediate invocation that is not evaluated where it appears is
	// evaluated and checked for whether it is a constant expression at the
	// point where the enclosing initializer is used in a [...] a constructor
	// definition, or an aggregate initialization.
	ImmediateCallVisitor V(getASTContext());
	if (!NestedDefaultChecking)
	V.TraverseDecl(Field);
	if (V.HasImmediateCalls) {
	ExprEvalContexts.back().DelayedDefaultInitializationContext = {Loc, Field,
	CurContext};
	ExprEvalContexts.back().IsCurrentlyCheckingDefaultArgumentOrInitializer =
	NestedDefaultChecking;

	EnsureImmediateInvocationInDefaultArgs Immediate(*this);
	ExprResult Res;
	runWithSufficientStackSpace(Loc, [&] {
	Res = Immediate.TransformInitializer(Field->getInClassInitializer(),
	/CXXDirectInit=/false);
	});
	if (!Res.isInvalid())
	Res = ConvertMemberDefaultInitExpression(Field, Res.get(), Loc);
	if (Res.isInvalid()) {
	Field->setInvalidDecl();
	return ExprError();
	}
	Init = Res.get();
	}

	if (Field->getInClassInitializer()) {
	Expr *E = Init ? Init : Field->getInClassInitializer();
	if (!NestedDefaultChecking)
	runWithSufficientStackSpace(Loc, [&] {
	MarkDeclarationsReferencedInExpr(E, /SkipLocalVariables=/false);
	});
	// C++11 [class.base.init]p7:
	// The initialization of each base and member constitutes a
	// full-expression.
	ExprResult Res = ActOnFinishFullExpr(E, /DiscardedValue=/false);
	if (Res.isInvalid()) {
	Field->setInvalidDecl();
	return ExprError();
	}
	Init = Res.get();

	return CXXDefaultInitExpr::Create(Context, InitializationContext->Loc,
	Field, InitializationContext->Context,
	Init);
	}

	// DR1351:
	// If the brace-or-equal-initializer of a non-static data member
	// invokes a defaulted default constructor of its class or of an
	// enclosing class in a potentially evaluated subexpression, the
	// program is ill-formed.
	//
	// This resolution is unworkable: the exception specification of the
	// default constructor can be needed in an unevaluated context, in
	// particular, in the operand of a noexcept-expression, and we can be
	// unable to compute an exception specification for an enclosed class.
	//
	// Any attempt to resolve the exception specification of a defaulted default
	// constructor before the initializer is lexically complete will ultimately
	// come here at which point we can diagnose it.
	RecordDecl *OutermostClass = ParentRD->getOuterLexicalRecordContext();
	Diag(Loc, diag::err_default_member_initializer_not_yet_parsed)
	<< OutermostClass << Field;
	Diag(Field->getEndLoc(),
	diag::note_default_member_initializer_not_yet_parsed);
	// Recover by marking the field invalid, unless we're in a SFINAE context.
	if (!isSFINAEContext())
	Field->setInvalidDecl();
	return ExprError();
	}

	Sema::VariadicCallType
	Sema::getVariadicCallType(FunctionDecl FDecl, const FunctionProtoType Proto,
	Expr *Fn) {
	if (Proto && Proto->isVariadic()) {
	if (isa_and_nonnull<CXXConstructorDecl>(FDecl))
	return VariadicConstructor;
	else if (Fn && Fn->getType()->isBlockPointerType())
	return VariadicBlock;
	else if (FDecl) {
	if (CXXMethodDecl *Method = dyn_cast_or_null<CXXMethodDecl>(FDecl))
	if (Method->isInstance())
	return VariadicMethod;
	} else if (Fn && Fn->getType() == Context.BoundMemberTy)
	return VariadicMethod;
	return VariadicFunction;
	}
	return VariadicDoesNotApply;
	}

	namespace {
	class FunctionCallCCC final : public FunctionCallFilterCCC {
	public:
	FunctionCallCCC(Sema &SemaRef, const IdentifierInfo *FuncName,
	unsigned NumArgs, MemberExpr *ME)
	: FunctionCallFilterCCC(SemaRef, NumArgs, false, ME),
	FunctionName(FuncName) {}

	bool ValidateCandidate(const TypoCorrection &candidate) override {
	if (!candidate.getCorrectionSpecifier() \|\|
	candidate.getCorrectionAsIdentifierInfo() != FunctionName) {
	return false;
	}

	return FunctionCallFilterCCC::ValidateCandidate(candidate);
	}

	std::unique_ptr<CorrectionCandidateCallback> clone() override {
	return std::make_unique<FunctionCallCCC>(*this);
	}

	private:
	const IdentifierInfo *const FunctionName;
	};
	}

	static TypoCorrection TryTypoCorrectionForCall(Sema &S, Expr *Fn,
	FunctionDecl *FDecl,
	ArrayRef<Expr *> Args) {
	MemberExpr *ME = dyn_cast<MemberExpr>(Fn);
	DeclarationName FuncName = FDecl->getDeclName();
	SourceLocation NameLoc = ME ? ME->getMemberLoc() : Fn->getBeginLoc();

	FunctionCallCCC CCC(S, FuncName.getAsIdentifierInfo(), Args.size(), ME);
	if (TypoCorrection Corrected = S.CorrectTypo(
	DeclarationNameInfo(FuncName, NameLoc), Sema::LookupOrdinaryName,
	S.getScopeForContext(S.CurContext), nullptr, CCC,
	Sema::CTK_ErrorRecovery)) {
	if (NamedDecl *ND = Corrected.getFoundDecl()) {
	if (Corrected.isOverloaded()) {
	OverloadCandidateSet OCS(NameLoc, OverloadCandidateSet::CSK_Normal);
	OverloadCandidateSet::iterator Best;
	for (NamedDecl *CD : Corrected) {
	if (FunctionDecl *FD = dyn_cast<FunctionDecl>(CD))
	S.AddOverloadCandidate(FD, DeclAccessPair::make(FD, AS_none), Args,
	OCS);
	}
	switch (OCS.BestViableFunction(S, NameLoc, Best)) {
	case OR_Success:
	ND = Best->FoundDecl;
	Corrected.setCorrectionDecl(ND);
	break;
	default:
	break;
	}
	}
	ND = ND->getUnderlyingDecl();
	if (isa<ValueDecl>(ND) \|\| isa<FunctionTemplateDecl>(ND))
	return Corrected;
	}
	}
	return TypoCorrection();
	}

	// [C++26][[expr.unary.op]/p4
	// A pointer to member is only formed when an explicit &
	// is used and its operand is a qualified-id not enclosed in parentheses.
	static bool isParenthetizedAndQualifiedAddressOfExpr(Expr *Fn) {
	if (!isa<ParenExpr>(Fn))
	return false;

	Fn = Fn->IgnoreParens();

	auto *UO = dyn_cast<UnaryOperator>(Fn);
	if (!UO \|\| UO->getOpcode() != clang::UO_AddrOf)
	return false;
	if (auto *DRE = dyn_cast<DeclRefExpr>(UO->getSubExpr()->IgnoreParens())) {
	return DRE->hasQualifier();
	}
	if (auto *OVL = dyn_cast<OverloadExpr>(UO->getSubExpr()->IgnoreParens()))
	return OVL->getQualifier();
	return false;
	}

	bool
	Sema::ConvertArgumentsForCall(CallExpr Call, Expr Fn,
	FunctionDecl *FDecl,
	const FunctionProtoType *Proto,
	ArrayRef<Expr *> Args,
	SourceLocation RParenLoc,
	bool IsExecConfig) {
	// Bail out early if calling a builtin with custom typechecking.
	if (FDecl)
	if (unsigned ID = FDecl->getBuiltinID())
	if (Context.BuiltinInfo.hasCustomTypechecking(ID))
	return false;

	// C99 6.5.2.2p7 - the arguments are implicitly converted, as if by
	// assignment, to the types of the corresponding parameter, ...

	bool AddressOf = isParenthetizedAndQualifiedAddressOfExpr(Fn);
	bool HasExplicitObjectParameter =
	!AddressOf && FDecl && FDecl->hasCXXExplicitFunctionObjectParameter();
	unsigned ExplicitObjectParameterOffset = HasExplicitObjectParameter ? 1 : 0;
	unsigned NumParams = Proto->getNumParams();
	bool Invalid = false;
	unsigned MinArgs = FDecl ? FDecl->getMinRequiredArguments() : NumParams;
	unsigned FnKind = Fn->getType()->isBlockPointerType()
	? 1 /* block */
	: (IsExecConfig ? 3 /* kernel function (exec config) */
	: 0 /* function */);

	// If too few arguments are available (and we don't have default
	// arguments for the remaining parameters), don't make the call.
	if (Args.size() < NumParams) {
	if (Args.size() < MinArgs) {
	TypoCorrection TC;
	if (FDecl && (TC = TryTypoCorrectionForCall(*this, Fn, FDecl, Args))) {
	unsigned diag_id =
	MinArgs == NumParams && !Proto->isVariadic()
	? diag::err_typecheck_call_too_few_args_suggest
	: diag::err_typecheck_call_too_few_args_at_least_suggest;
	diagnoseTypo(
	TC, PDiag(diag_id)
	<< FnKind << MinArgs - ExplicitObjectParameterOffset
	<< static_cast<unsigned>(Args.size()) -
	ExplicitObjectParameterOffset
	<< HasExplicitObjectParameter << TC.getCorrectionRange());
	} else if (MinArgs - ExplicitObjectParameterOffset == 1 && FDecl &&
	FDecl->getParamDecl(ExplicitObjectParameterOffset)
	->getDeclName())
	Diag(RParenLoc,
	MinArgs == NumParams && !Proto->isVariadic()
	? diag::err_typecheck_call_too_few_args_one
	: diag::err_typecheck_call_too_few_args_at_least_one)
	<< FnKind << FDecl->getParamDecl(ExplicitObjectParameterOffset)
	<< HasExplicitObjectParameter << Fn->getSourceRange();
	else
	Diag(RParenLoc, MinArgs == NumParams && !Proto->isVariadic()
	? diag::err_typecheck_call_too_few_args
	: diag::err_typecheck_call_too_few_args_at_least)
	<< FnKind << MinArgs - ExplicitObjectParameterOffset
	<< static_cast<unsigned>(Args.size()) -
	ExplicitObjectParameterOffset
	<< HasExplicitObjectParameter << Fn->getSourceRange();

	// Emit the location of the prototype.
	if (!TC && FDecl && !FDecl->getBuiltinID() && !IsExecConfig)
	Diag(FDecl->getLocation(), diag::note_callee_decl)
	<< FDecl << FDecl->getParametersSourceRange();

	return true;
	}
	// We reserve space for the default arguments when we create
	// the call expression, before calling ConvertArgumentsForCall.
	assert((Call->getNumArgs() == NumParams) &&
	"We should have reserved space for the default arguments before!");
	}

	// If too many are passed and not variadic, error on the extras and drop
	// them.
	if (Args.size() > NumParams) {
	if (!Proto->isVariadic()) {
	TypoCorrection TC;
	if (FDecl && (TC = TryTypoCorrectionForCall(*this, Fn, FDecl, Args))) {
	unsigned diag_id =
	MinArgs == NumParams && !Proto->isVariadic()
	? diag::err_typecheck_call_too_many_args_suggest
	: diag::err_typecheck_call_too_many_args_at_most_suggest;
	diagnoseTypo(
	TC, PDiag(diag_id)
	<< FnKind << NumParams - ExplicitObjectParameterOffset
	<< static_cast<unsigned>(Args.size()) -
	ExplicitObjectParameterOffset
	<< HasExplicitObjectParameter << TC.getCorrectionRange());
	} else if (NumParams - ExplicitObjectParameterOffset == 1 && FDecl &&
	FDecl->getParamDecl(ExplicitObjectParameterOffset)
	->getDeclName())
	Diag(Args[NumParams]->getBeginLoc(),
	MinArgs == NumParams
	? diag::err_typecheck_call_too_many_args_one
	: diag::err_typecheck_call_too_many_args_at_most_one)
	<< FnKind << FDecl->getParamDecl(ExplicitObjectParameterOffset)
	<< static_cast<unsigned>(Args.size()) -
	ExplicitObjectParameterOffset
	<< HasExplicitObjectParameter << Fn->getSourceRange()
	<< SourceRange(Args[NumParams]->getBeginLoc(),
	Args.back()->getEndLoc());
	else
	Diag(Args[NumParams]->getBeginLoc(),
	MinArgs == NumParams
	? diag::err_typecheck_call_too_many_args
	: diag::err_typecheck_call_too_many_args_at_most)
	<< FnKind << NumParams - ExplicitObjectParameterOffset
	<< static_cast<unsigned>(Args.size()) -
	ExplicitObjectParameterOffset
	<< HasExplicitObjectParameter << Fn->getSourceRange()
	<< SourceRange(Args[NumParams]->getBeginLoc(),
	Args.back()->getEndLoc());

	// Emit the location of the prototype.
	if (!TC && FDecl && !FDecl->getBuiltinID() && !IsExecConfig)
	Diag(FDecl->getLocation(), diag::note_callee_decl)
	<< FDecl << FDecl->getParametersSourceRange();

	// This deletes the extra arguments.
	Call->shrinkNumArgs(NumParams);
	return true;
	}
	}
	SmallVector<Expr *, 8> AllArgs;
	VariadicCallType CallType = getVariadicCallType(FDecl, Proto, Fn);

	Invalid = GatherArgumentsForCall(Call->getBeginLoc(), FDecl, Proto, 0, Args,
	AllArgs, CallType);
	if (Invalid)
	return true;
	unsigned TotalNumArgs = AllArgs.size();
	for (unsigned i = 0; i < TotalNumArgs; ++i)
	Call->setArg(i, AllArgs[i]);

	Call->computeDependence();
	return false;
	}

	bool Sema::GatherArgumentsForCall(SourceLocation CallLoc, FunctionDecl *FDecl,
	const FunctionProtoType *Proto,
	unsigned FirstParam, ArrayRef<Expr *> Args,
	SmallVectorImpl<Expr *> &AllArgs,
	VariadicCallType CallType, bool AllowExplicit,
	bool IsListInitialization) {
	unsigned NumParams = Proto->getNumParams();
	bool Invalid = false;
	size_t ArgIx = 0;
	// Continue to check argument types (even if we have too few/many args).
	for (unsigned i = FirstParam; i < NumParams; i++) {
	QualType ProtoArgType = Proto->getParamType(i);

	Expr *Arg;
	ParmVarDecl *Param = FDecl ? FDecl->getParamDecl(i) : nullptr;
	if (ArgIx < Args.size()) {
	Arg = Args[ArgIx++];

	if (RequireCompleteType(Arg->getBeginLoc(), ProtoArgType,
	diag::err_call_incomplete_argument, Arg))
	return true;

	// Strip the unbridged-cast placeholder expression off, if applicable.
	bool CFAudited = false;
	if (Arg->getType() == Context.ARCUnbridgedCastTy &&
	FDecl && FDecl->hasAttr<CFAuditedTransferAttr>() &&
	(!Param \|\| !Param->hasAttr<CFConsumedAttr>()))
	Arg = ObjC().stripARCUnbridgedCast(Arg);
	else if (getLangOpts().ObjCAutoRefCount &&
	FDecl && FDecl->hasAttr<CFAuditedTransferAttr>() &&
	(!Param \|\| !Param->hasAttr<CFConsumedAttr>()))
	CFAudited = true;

	if (Proto->getExtParameterInfo(i).isNoEscape() &&
	ProtoArgType->isBlockPointerType())
	if (auto *BE = dyn_cast<BlockExpr>(Arg->IgnoreParenNoopCasts(Context)))
	BE->getBlockDecl()->setDoesNotEscape();

	InitializedEntity Entity =
	Param ? InitializedEntity::InitializeParameter(Context, Param,
	ProtoArgType)
	: InitializedEntity::InitializeParameter(
	Context, ProtoArgType, Proto->isParamConsumed(i));

	// Remember that parameter belongs to a CF audited API.
	if (CFAudited)
	Entity.setParameterCFAudited();

	ExprResult ArgE = PerformCopyInitialization(
	Entity, SourceLocation(), Arg, IsListInitialization, AllowExplicit);
	if (ArgE.isInvalid())
	return true;

	Arg = ArgE.getAs<Expr>();
	} else {
	assert(Param && "can't use default arguments without a known callee");

	ExprResult ArgExpr = BuildCXXDefaultArgExpr(CallLoc, FDecl, Param);
	if (ArgExpr.isInvalid())
	return true;

	Arg = ArgExpr.getAs<Expr>();
	}

	// Check for array bounds violations for each argument to the call. This
	// check only triggers warnings when the argument isn't a more complex Expr
	// with its own checking, such as a BinaryOperator.
	CheckArrayAccess(Arg);

	// Check for violations of C99 static array rules (C99 6.7.5.3p7).
	CheckStaticArrayArgument(CallLoc, Param, Arg);

	AllArgs.push_back(Arg);
	}

	// If this is a variadic call, handle args passed through "...".
	if (CallType != VariadicDoesNotApply) {
	// Assume that extern "C" functions with variadic arguments that
	// return __unknown_anytype aren't really variadic.
	if (Proto->getReturnType() == Context.UnknownAnyTy && FDecl &&
	FDecl->isExternC()) {
	for (Expr *A : Args.slice(ArgIx)) {
	QualType paramType; // ignored
	ExprResult arg = checkUnknownAnyArg(CallLoc, A, paramType);
	Invalid \|= arg.isInvalid();
	AllArgs.push_back(arg.get());
	}

	// Otherwise do argument promotion, (C99 6.5.2.2p7).
	} else {
	for (Expr *A : Args.slice(ArgIx)) {
	ExprResult Arg = DefaultVariadicArgumentPromotion(A, CallType, FDecl);
	Invalid \|= Arg.isInvalid();
	AllArgs.push_back(Arg.get());
	}
	}

	// Check for array bounds violations.
	for (Expr *A : Args.slice(ArgIx))
	CheckArrayAccess(A);
	}
	return Invalid;
	}

	static void DiagnoseCalleeStaticArrayParam(Sema &S, ParmVarDecl *PVD) {
	TypeLoc TL = PVD->getTypeSourceInfo()->getTypeLoc();
	if (DecayedTypeLoc DTL = TL.getAs<DecayedTypeLoc>())
	TL = DTL.getOriginalLoc();
	if (ArrayTypeLoc ATL = TL.getAs<ArrayTypeLoc>())
	S.Diag(PVD->getLocation(), diag::note_callee_static_array)
	<< ATL.getLocalSourceRange();
	}

	void
	Sema::CheckStaticArrayArgument(SourceLocation CallLoc,
	ParmVarDecl *Param,
	const Expr *ArgExpr) {
	// Static array parameters are not supported in C++.
	if (!Param \|\| getLangOpts().CPlusPlus)
	return;

	QualType OrigTy = Param->getOriginalType();

	const ArrayType *AT = Context.getAsArrayType(OrigTy);
	if (!AT \|\| AT->getSizeModifier() != ArraySizeModifier::Static)
	return;

	if (ArgExpr->isNullPointerConstant(Context,
	Expr::NPC_NeverValueDependent)) {
	Diag(CallLoc, diag::warn_null_arg) << ArgExpr->getSourceRange();
	DiagnoseCalleeStaticArrayParam(*this, Param);
	return;
	}

	const ConstantArrayType *CAT = dyn_cast<ConstantArrayType>(AT);
	if (!CAT)
	return;

	const ConstantArrayType *ArgCAT =
	Context.getAsConstantArrayType(ArgExpr->IgnoreParenCasts()->getType());
	if (!ArgCAT)
	return;

	if (getASTContext().hasSameUnqualifiedType(CAT->getElementType(),
	ArgCAT->getElementType())) {
	if (ArgCAT->getSize().ult(CAT->getSize())) {
	Diag(CallLoc, diag::warn_static_array_too_small)
	<< ArgExpr->getSourceRange() << (unsigned)ArgCAT->getZExtSize()
	<< (unsigned)CAT->getZExtSize() << 0;
	DiagnoseCalleeStaticArrayParam(*this, Param);
	}
	return;
	}

	std::optional<CharUnits> ArgSize =
	getASTContext().getTypeSizeInCharsIfKnown(ArgCAT);
	std::optional<CharUnits> ParmSize =
	getASTContext().getTypeSizeInCharsIfKnown(CAT);
	if (ArgSize && ParmSize && ArgSize < ParmSize) {
	Diag(CallLoc, diag::warn_static_array_too_small)
	<< ArgExpr->getSourceRange() << (unsigned)ArgSize->getQuantity()
	<< (unsigned)ParmSize->getQuantity() << 1;
	DiagnoseCalleeStaticArrayParam(*this, Param);
	}
	}

	/// Given a function expression of unknown-any type, try to rebuild it
	/// to have a function type.
	static ExprResult rebuildUnknownAnyFunction(Sema &S, Expr *fn);

	/// Is the given type a placeholder that we need to lower out
	/// immediately during argument processing?
	static bool isPlaceholderToRemoveAsArg(QualType type) {
	// Placeholders are never sugared.
	const BuiltinType *placeholder = dyn_cast<BuiltinType>(type);
	if (!placeholder) return false;

	switch (placeholder->getKind()) {
	// Ignore all the non-placeholder types.
	#define IMAGE_TYPE(ImgType, Id, SingletonId, Access, Suffix) \
	case BuiltinType::Id:
	#include "clang/Basic/OpenCLImageTypes.def"
	#define EXT_OPAQUE_TYPE(ExtType, Id, Ext) \
	case BuiltinType::Id:
	#include "clang/Basic/OpenCLExtensionTypes.def"
	// In practice we'll never use this, since all SVE types are sugared
	// via TypedefTypes rather than exposed directly as BuiltinTypes.
	#define SVE_TYPE(Name, Id, SingletonId) \
	case BuiltinType::Id:
	#include "clang/Basic/AArch64SVEACLETypes.def"
	#define PPC_VECTOR_TYPE(Name, Id, Size) \
	case BuiltinType::Id:
	#include "clang/Basic/PPCTypes.def"
	#define RVV_TYPE(Name, Id, SingletonId) case BuiltinType::Id:
	#include "clang/Basic/RISCVVTypes.def"
	#define WASM_TYPE(Name, Id, SingletonId) case BuiltinType::Id:
	#include "clang/Basic/WebAssemblyReferenceTypes.def"
	#define AMDGPU_TYPE(Name, Id, SingletonId) case BuiltinType::Id:
	#include "clang/Basic/AMDGPUTypes.def"
	#define PLACEHOLDER_TYPE(ID, SINGLETON_ID)
	#define BUILTIN_TYPE(ID, SINGLETON_ID) case BuiltinType::ID:
	#include "clang/AST/BuiltinTypes.def"
	return false;

	case BuiltinType::UnresolvedTemplate:
	// We cannot lower out overload sets; they might validly be resolved
	// by the call machinery.
	case BuiltinType::Overload:
	return false;

	// Unbridged casts in ARC can be handled in some call positions and
	// should be left in place.
	case BuiltinType::ARCUnbridgedCast:
	return false;

	// Pseudo-objects should be converted as soon as possible.
	case BuiltinType::PseudoObject:
	return true;

	// The debugger mode could theoretically but currently does not try
	// to resolve unknown-typed arguments based on known parameter types.
	case BuiltinType::UnknownAny:
	return true;

	// These are always invalid as call arguments and should be reported.
	case BuiltinType::BoundMember:
	case BuiltinType::BuiltinFn:
	case BuiltinType::IncompleteMatrixIdx:
	case BuiltinType::ArraySection:
	case BuiltinType::OMPArrayShaping:
	case BuiltinType::OMPIterator:
	return true;

	}
	llvm_unreachable("bad builtin type kind");
	}

	bool Sema::CheckArgsForPlaceholders(MultiExprArg args) {
	// Apply this processing to all the arguments at once instead of
	// dying at the first failure.
	bool hasInvalid = false;
	for (size_t i = 0, e = args.size(); i != e; i++) {
	if (isPlaceholderToRemoveAsArg(args[i]->getType())) {
	ExprResult result = CheckPlaceholderExpr(args[i]);
	if (result.isInvalid()) hasInvalid = true;
	else args[i] = result.get();
	}
	}
	return hasInvalid;
	}

	/// If a builtin function has a pointer argument with no explicit address
	/// space, then it should be able to accept a pointer to any address
	/// space as input. In order to do this, we need to replace the
	/// standard builtin declaration with one that uses the same address space
	/// as the call.
	///
	/// \returns nullptr If this builtin is not a candidate for a rewrite i.e.
	/// it does not contain any pointer arguments without
	/// an address space qualifer. Otherwise the rewritten
	/// FunctionDecl is returned.
	/// TODO: Handle pointer return types.
	static FunctionDecl rewriteBuiltinFunctionDecl(Sema Sema, ASTContext &Context,
	FunctionDecl *FDecl,
	MultiExprArg ArgExprs) {

	QualType DeclType = FDecl->getType();
	const FunctionProtoType *FT = dyn_cast<FunctionProtoType>(DeclType);

	if (!Context.BuiltinInfo.hasPtrArgsOrResult(FDecl->getBuiltinID()) \|\| !FT \|\|
	ArgExprs.size() < FT->getNumParams())
	return nullptr;

	bool NeedsNewDecl = false;
	unsigned i = 0;
	SmallVector<QualType, 8> OverloadParams;

	for (QualType ParamType : FT->param_types()) {

	// Convert array arguments to pointer to simplify type lookup.
	ExprResult ArgRes =
	Sema->DefaultFunctionArrayLvalueConversion(ArgExprs[i++]);
	if (ArgRes.isInvalid())
	return nullptr;
	Expr *Arg = ArgRes.get();
	QualType ArgType = Arg->getType();
	if (!ParamType->isPointerType() \|\| ParamType.hasAddressSpace() \|\|
	!ArgType->isPointerType() \|\|
	!ArgType->getPointeeType().hasAddressSpace() \|\|
	isPtrSizeAddressSpace(ArgType->getPointeeType().getAddressSpace())) {
	OverloadParams.push_back(ParamType);
	continue;
	}

	QualType PointeeType = ParamType->getPointeeType();
	if (PointeeType.hasAddressSpace())
	continue;

	NeedsNewDecl = true;
	LangAS AS = ArgType->getPointeeType().getAddressSpace();

	PointeeType = Context.getAddrSpaceQualType(PointeeType, AS);
	OverloadParams.push_back(Context.getPointerType(PointeeType));
	}

	if (!NeedsNewDecl)
	return nullptr;

	FunctionProtoType::ExtProtoInfo EPI;
	EPI.Variadic = FT->isVariadic();
	QualType OverloadTy = Context.getFunctionType(FT->getReturnType(),
	OverloadParams, EPI);
	DeclContext *Parent = FDecl->getParent();
	FunctionDecl *OverloadDecl = FunctionDecl::Create(
	Context, Parent, FDecl->getLocation(), FDecl->getLocation(),
	FDecl->getIdentifier(), OverloadTy,
	/TInfo=/nullptr, SC_Extern, Sema->getCurFPFeatures().isFPConstrained(),
	false,
	/hasPrototype=/true);
	SmallVector<ParmVarDecl*, 16> Params;
	FT = cast<FunctionProtoType>(OverloadTy);
	for (unsigned i = 0, e = FT->getNumParams(); i != e; ++i) {
	QualType ParamType = FT->getParamType(i);
	ParmVarDecl *Parm =
	ParmVarDecl::Create(Context, OverloadDecl, SourceLocation(),
	SourceLocation(), nullptr, ParamType,
	/TInfo=/nullptr, SC_None, nullptr);
	Parm->setScopeInfo(0, i);
	Params.push_back(Parm);
	}
	OverloadDecl->setParams(Params);
	Sema->mergeDeclAttributes(OverloadDecl, FDecl);
	return OverloadDecl;
	}

	static void checkDirectCallValidity(Sema &S, const Expr *Fn,
	FunctionDecl *Callee,
	MultiExprArg ArgExprs) {
	// `Callee` (when called with ArgExprs) may be ill-formed. enable_if (and
	// similar attributes) really don't like it when functions are called with an
	// invalid number of args.
	if (S.TooManyArguments(Callee->getNumParams(), ArgExprs.size(),
	/PartialOverloading=/false) &&
	!Callee->isVariadic())
	return;
	if (Callee->getMinRequiredArguments() > ArgExprs.size())
	return;

	if (const EnableIfAttr *Attr =
	S.CheckEnableIf(Callee, Fn->getBeginLoc(), ArgExprs, true)) {
	S.Diag(Fn->getBeginLoc(),
	isa<CXXMethodDecl>(Callee)
	? diag::err_ovl_no_viable_member_function_in_call
	: diag::err_ovl_no_viable_function_in_call)
	<< Callee << Callee->getSourceRange();
	S.Diag(Callee->getLocation(),
	diag::note_ovl_candidate_disabled_by_function_cond_attr)
	<< Attr->getCond()->getSourceRange() << Attr->getMessage();
	return;
	}
	}

	static bool enclosingClassIsRelatedToClassInWhichMembersWereFound(
	const UnresolvedMemberExpr *const UME, Sema &S) {

	const auto GetFunctionLevelDCIfCXXClass =
	[](Sema &S) -> const CXXRecordDecl * {
	const DeclContext *const DC = S.getFunctionLevelDeclContext();
	if (!DC \|\| !DC->getParent())
	return nullptr;

	// If the call to some member function was made from within a member
	// function body 'M' return return 'M's parent.
	if (const auto *MD = dyn_cast<CXXMethodDecl>(DC))
	return MD->getParent()->getCanonicalDecl();
	// else the call was made from within a default member initializer of a
	// class, so return the class.
	if (const auto *RD = dyn_cast<CXXRecordDecl>(DC))
	return RD->getCanonicalDecl();
	return nullptr;
	};
	// If our DeclContext is neither a member function nor a class (in the
	// case of a lambda in a default member initializer), we can't have an
	// enclosing 'this'.

	const CXXRecordDecl *const CurParentClass = GetFunctionLevelDCIfCXXClass(S);
	if (!CurParentClass)
	return false;

	// The naming class for implicit member functions call is the class in which
	// name lookup starts.
	const CXXRecordDecl *const NamingClass =
	UME->getNamingClass()->getCanonicalDecl();
	assert(NamingClass && "Must have naming class even for implicit access");

	// If the unresolved member functions were found in a 'naming class' that is
	// related (either the same or derived from) to the class that contains the
	// member function that itself contained the implicit member access.

	return CurParentClass == NamingClass \|\|
	CurParentClass->isDerivedFrom(NamingClass);
	}

	static void
	tryImplicitlyCaptureThisIfImplicitMemberFunctionAccessWithDependentArgs(
	Sema &S, const UnresolvedMemberExpr *const UME, SourceLocation CallLoc) {

	if (!UME)
	return;

	LambdaScopeInfo *const CurLSI = S.getCurLambda();
	// Only try and implicitly capture 'this' within a C++ Lambda if it hasn't
	// already been captured, or if this is an implicit member function call (if
	// it isn't, an attempt to capture 'this' should already have been made).
	if (!CurLSI \|\| CurLSI->ImpCaptureStyle == CurLSI->ImpCap_None \|\|
	!UME->isImplicitAccess() \|\| CurLSI->isCXXThisCaptured())
	return;

	// Check if the naming class in which the unresolved members were found is
	// related (same as or is a base of) to the enclosing class.

	if (!enclosingClassIsRelatedToClassInWhichMembersWereFound(UME, S))
	return;


	DeclContext *EnclosingFunctionCtx = S.CurContext->getParent()->getParent();
	// If the enclosing function is not dependent, then this lambda is
	// capture ready, so if we can capture this, do so.
	if (!EnclosingFunctionCtx->isDependentContext()) {
	// If the current lambda and all enclosing lambdas can capture 'this' -
	// then go ahead and capture 'this' (since our unresolved overload set
	// contains at least one non-static member function).
	if (!S.CheckCXXThisCapture(CallLoc, /Explcit/ false, /Diagnose/ false))
	S.CheckCXXThisCapture(CallLoc);
	} else if (S.CurContext->isDependentContext()) {
	// ... since this is an implicit member reference, that might potentially
	// involve a 'this' capture, mark 'this' for potential capture in
	// enclosing lambdas.
	if (CurLSI->ImpCaptureStyle != CurLSI->ImpCap_None)
	CurLSI->addPotentialThisCapture(CallLoc);
	}
	}

	// Once a call is fully resolved, warn for unqualified calls to specific
	// C++ standard functions, like move and forward.
	static void DiagnosedUnqualifiedCallsToStdFunctions(Sema &S,
	const CallExpr *Call) {
	// We are only checking unary move and forward so exit early here.
	if (Call->getNumArgs() != 1)
	return;

	const Expr *E = Call->getCallee()->IgnoreParenImpCasts();
	if (!E \|\| isa<UnresolvedLookupExpr>(E))
	return;
	const DeclRefExpr *DRE = dyn_cast_if_present<DeclRefExpr>(E);
	if (!DRE \|\| !DRE->getLocation().isValid())
	return;

	if (DRE->getQualifier())
	return;

	const FunctionDecl *FD = Call->getDirectCallee();
	if (!FD)
	return;

	// Only warn for some functions deemed more frequent or problematic.
	unsigned BuiltinID = FD->getBuiltinID();
	if (BuiltinID != Builtin::BImove && BuiltinID != Builtin::BIforward)
	return;

	S.Diag(DRE->getLocation(), diag::warn_unqualified_call_to_std_cast_function)
	<< FD->getQualifiedNameAsString()
	<< FixItHint::CreateInsertion(DRE->getLocation(), "std::");
	}

	ExprResult Sema::ActOnCallExpr(Scope Scope, Expr Fn, SourceLocation LParenLoc,
	MultiExprArg ArgExprs, SourceLocation RParenLoc,
	Expr *ExecConfig) {
	ExprResult Call =
	BuildCallExpr(Scope, Fn, LParenLoc, ArgExprs, RParenLoc, ExecConfig,
	/IsExecConfig=/false, /AllowRecovery=/true);
	if (Call.isInvalid())
	return Call;

	// Diagnose uses of the C++20 "ADL-only template-id call" feature in earlier
	// language modes.
	if (const auto *ULE = dyn_cast<UnresolvedLookupExpr>(Fn);
	ULE && ULE->hasExplicitTemplateArgs() &&
	ULE->decls_begin() == ULE->decls_end()) {
	Diag(Fn->getExprLoc(), getLangOpts().CPlusPlus20
	? diag::warn_cxx17_compat_adl_only_template_id
	: diag::ext_adl_only_template_id)
	<< ULE->getName();
	}

	if (LangOpts.OpenMP)
	Call = OpenMP().ActOnOpenMPCall(Call, Scope, LParenLoc, ArgExprs, RParenLoc,
	ExecConfig);
	if (LangOpts.CPlusPlus) {
	if (const auto *CE = dyn_cast<CallExpr>(Call.get()))
	DiagnosedUnqualifiedCallsToStdFunctions(*this, CE);

	// If we previously found that the id-expression of this call refers to a
	// consteval function but the call is dependent, we should not treat is an
	// an invalid immediate call.
	if (auto *DRE = dyn_cast<DeclRefExpr>(Fn->IgnoreParens());
	DRE && Call.get()->isValueDependent()) {
	currentEvaluationContext().ReferenceToConsteval.erase(DRE);
	}
	}
	return Call;
	}

	ExprResult Sema::BuildCallExpr(Scope Scope, Expr Fn, SourceLocation LParenLoc,
	MultiExprArg ArgExprs, SourceLocation RParenLoc,
	Expr *ExecConfig, bool IsExecConfig,
	bool AllowRecovery) {
	// Since this might be a postfix expression, get rid of ParenListExprs.
	ExprResult Result = MaybeConvertParenListExprToParenExpr(Scope, Fn);
	if (Result.isInvalid()) return ExprError();
	Fn = Result.get();

	if (CheckArgsForPlaceholders(ArgExprs))
	return ExprError();

	if (getLangOpts().CPlusPlus) {
	// If this is a pseudo-destructor expression, build the call immediately.
	if (isa<CXXPseudoDestructorExpr>(Fn)) {
	if (!ArgExprs.empty()) {
	// Pseudo-destructor calls should not have any arguments.
	Diag(Fn->getBeginLoc(), diag::err_pseudo_dtor_call_with_args)
	<< FixItHint::CreateRemoval(
	SourceRange(ArgExprs.front()->getBeginLoc(),
	ArgExprs.back()->getEndLoc()));
	}

	return CallExpr::Create(Context, Fn, /Args=/{}, Context.VoidTy,
	VK_PRValue, RParenLoc, CurFPFeatureOverrides());
	}
	if (Fn->getType() == Context.PseudoObjectTy) {
	ExprResult result = CheckPlaceholderExpr(Fn);
	if (result.isInvalid()) return ExprError();
	Fn = result.get();
	}

	// Determine whether this is a dependent call inside a C++ template,
	// in which case we won't do any semantic analysis now.
	if (Fn->isTypeDependent() \|\| Expr::hasAnyTypeDependentArguments(ArgExprs)) {
	if (ExecConfig) {
	return CUDAKernelCallExpr::Create(Context, Fn,
	cast<CallExpr>(ExecConfig), ArgExprs,
	Context.DependentTy, VK_PRValue,
	RParenLoc, CurFPFeatureOverrides());
	} else {

	tryImplicitlyCaptureThisIfImplicitMemberFunctionAccessWithDependentArgs(
	*this, dyn_cast<UnresolvedMemberExpr>(Fn->IgnoreParens()),
	Fn->getBeginLoc());

	return CallExpr::Create(Context, Fn, ArgExprs, Context.DependentTy,
	VK_PRValue, RParenLoc, CurFPFeatureOverrides());
	}
	}

	// Determine whether this is a call to an object (C++ [over.call.object]).
	if (Fn->getType()->isRecordType())
	return BuildCallToObjectOfClassType(Scope, Fn, LParenLoc, ArgExprs,
	RParenLoc);

	if (Fn->getType() == Context.UnknownAnyTy) {
	ExprResult result = rebuildUnknownAnyFunction(*this, Fn);
	if (result.isInvalid()) return ExprError();
	Fn = result.get();
	}

	if (Fn->getType() == Context.BoundMemberTy) {
	return BuildCallToMemberFunction(Scope, Fn, LParenLoc, ArgExprs,
	RParenLoc, ExecConfig, IsExecConfig,
	AllowRecovery);
	}
	}

	// Check for overloaded calls. This can happen even in C due to extensions.
	if (Fn->getType() == Context.OverloadTy) {
	OverloadExpr::FindResult find = OverloadExpr::find(Fn);

	// We aren't supposed to apply this logic if there's an '&' involved.
	if (!find.HasFormOfMemberPointer \|\| find.IsAddressOfOperandWithParen) {
	if (Expr::hasAnyTypeDependentArguments(ArgExprs))
	return CallExpr::Create(Context, Fn, ArgExprs, Context.DependentTy,
	VK_PRValue, RParenLoc, CurFPFeatureOverrides());
	OverloadExpr *ovl = find.Expression;
	if (UnresolvedLookupExpr *ULE = dyn_cast<UnresolvedLookupExpr>(ovl))
	return BuildOverloadedCallExpr(
	Scope, Fn, ULE, LParenLoc, ArgExprs, RParenLoc, ExecConfig,
	/AllowTypoCorrection=/true, find.IsAddressOfOperand);
	return BuildCallToMemberFunction(Scope, Fn, LParenLoc, ArgExprs,
	RParenLoc, ExecConfig, IsExecConfig,
	AllowRecovery);
	}
	}

	// If we're directly calling a function, get the appropriate declaration.
	if (Fn->getType() == Context.UnknownAnyTy) {
	ExprResult result = rebuildUnknownAnyFunction(*this, Fn);
	if (result.isInvalid()) return ExprError();
	Fn = result.get();
	}

	Expr *NakedFn = Fn->IgnoreParens();

	bool CallingNDeclIndirectly = false;
	NamedDecl *NDecl = nullptr;
	if (UnaryOperator *UnOp = dyn_cast<UnaryOperator>(NakedFn)) {
	if (UnOp->getOpcode() == UO_AddrOf) {
	CallingNDeclIndirectly = true;
	NakedFn = UnOp->getSubExpr()->IgnoreParens();
	}
	}

	if (auto *DRE = dyn_cast<DeclRefExpr>(NakedFn)) {
	NDecl = DRE->getDecl();

	FunctionDecl *FDecl = dyn_cast<FunctionDecl>(NDecl);
	if (FDecl && FDecl->getBuiltinID()) {
	// Rewrite the function decl for this builtin by replacing parameters
	// with no explicit address space with the address space of the arguments
	// in ArgExprs.
	if ((FDecl =
	rewriteBuiltinFunctionDecl(this, Context, FDecl, ArgExprs))) {
	NDecl = FDecl;
	Fn = DeclRefExpr::Create(
	Context, FDecl->getQualifierLoc(), SourceLocation(), FDecl, false,
	SourceLocation(), FDecl->getType(), Fn->getValueKind(), FDecl,
	nullptr, DRE->isNonOdrUse());
	}
	}
	} else if (auto *ME = dyn_cast<MemberExpr>(NakedFn))
	NDecl = ME->getMemberDecl();

	if (FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(NDecl)) {
	if (CallingNDeclIndirectly && !checkAddressOfFunctionIsAvailable(
	FD, /Complain=/true, Fn->getBeginLoc()))
	return ExprError();

	checkDirectCallValidity(*this, Fn, FD, ArgExprs);

	// If this expression is a call to a builtin function in HIP device
	// compilation, allow a pointer-type argument to default address space to be
	// passed as a pointer-type parameter to a non-default address space.
	// If Arg is declared in the default address space and Param is declared
	// in a non-default address space, perform an implicit address space cast to
	// the parameter type.
	if (getLangOpts().HIP && getLangOpts().CUDAIsDevice && FD &&
	FD->getBuiltinID()) {
	for (unsigned Idx = 0; Idx < ArgExprs.size() && Idx < FD->param_size();
	++Idx) {
	ParmVarDecl *Param = FD->getParamDecl(Idx);
	if (!ArgExprs[Idx] \|\| !Param \|\| !Param->getType()->isPointerType() \|\|
	!ArgExprs[Idx]->getType()->isPointerType())
	continue;

	auto ParamAS = Param->getType()->getPointeeType().getAddressSpace();
	auto ArgTy = ArgExprs[Idx]->getType();
	auto ArgPtTy = ArgTy->getPointeeType();
	auto ArgAS = ArgPtTy.getAddressSpace();

	// Add address space cast if target address spaces are different
	bool NeedImplicitASC =
	ParamAS != LangAS::Default && // Pointer params in generic AS don't need special handling.
	( ArgAS == LangAS::Default \|\| // We do allow implicit conversion from generic AS
	// or from specific AS which has target AS matching that of Param.
	getASTContext().getTargetAddressSpace(ArgAS) == getASTContext().getTargetAddressSpace(ParamAS));
	if (!NeedImplicitASC)
	continue;

	// First, ensure that the Arg is an RValue.
	if (ArgExprs[Idx]->isGLValue()) {
	ArgExprs[Idx] = ImplicitCastExpr::Create(
	Context, ArgExprs[Idx]->getType(), CK_NoOp, ArgExprs[Idx],
	nullptr, VK_PRValue, FPOptionsOverride());
	}

	// Construct a new arg type with address space of Param
	Qualifiers ArgPtQuals = ArgPtTy.getQualifiers();
	ArgPtQuals.setAddressSpace(ParamAS);
	auto NewArgPtTy =
	Context.getQualifiedType(ArgPtTy.getUnqualifiedType(), ArgPtQuals);
	auto NewArgTy =
	Context.getQualifiedType(Context.getPointerType(NewArgPtTy),
	ArgTy.getQualifiers());

	// Finally perform an implicit address space cast
	ArgExprs[Idx] = ImpCastExprToType(ArgExprs[Idx], NewArgTy,
	CK_AddressSpaceConversion)
	.get();
	}
	}
	}

	if (Context.isDependenceAllowed() &&
	(Fn->isTypeDependent() \|\| Expr::hasAnyTypeDependentArguments(ArgExprs))) {
	assert(!getLangOpts().CPlusPlus);
	assert((Fn->containsErrors() \|\|
	llvm::any_of(ArgExprs,
	[](clang::Expr *E) { return E->containsErrors(); })) &&
	"should only occur in error-recovery path.");
	return CallExpr::Create(Context, Fn, ArgExprs, Context.DependentTy,
	VK_PRValue, RParenLoc, CurFPFeatureOverrides());
	}
	return BuildResolvedCallExpr(Fn, NDecl, LParenLoc, ArgExprs, RParenLoc,
	ExecConfig, IsExecConfig);
	}

	Expr *Sema::BuildBuiltinCallExpr(SourceLocation Loc, Builtin::ID Id,
	MultiExprArg CallArgs) {
	StringRef Name = Context.BuiltinInfo.getName(Id);
	LookupResult R(*this, &Context.Idents.get(Name), Loc,
	Sema::LookupOrdinaryName);
	LookupName(R, TUScope, /AllowBuiltinCreation=/true);

	auto *BuiltInDecl = R.getAsSingle<FunctionDecl>();
	assert(BuiltInDecl && "failed to find builtin declaration");

	ExprResult DeclRef =
	BuildDeclRefExpr(BuiltInDecl, BuiltInDecl->getType(), VK_LValue, Loc);
	assert(DeclRef.isUsable() && "Builtin reference cannot fail");

	ExprResult Call =
	BuildCallExpr(/Scope=/nullptr, DeclRef.get(), Loc, CallArgs, Loc);

	assert(!Call.isInvalid() && "Call to builtin cannot fail!");
	return Call.get();
	}

	ExprResult Sema::ActOnAsTypeExpr(Expr *E, ParsedType ParsedDestTy,
	SourceLocation BuiltinLoc,
	SourceLocation RParenLoc) {
	QualType DstTy = GetTypeFromParser(ParsedDestTy);
	return BuildAsTypeExpr(E, DstTy, BuiltinLoc, RParenLoc);
	}

	ExprResult Sema::BuildAsTypeExpr(Expr *E, QualType DestTy,
	SourceLocation BuiltinLoc,
	SourceLocation RParenLoc) {
	ExprValueKind VK = VK_PRValue;
	ExprObjectKind OK = OK_Ordinary;
	QualType SrcTy = E->getType();
	if (!SrcTy->isDependentType() &&
	Context.getTypeSize(DestTy) != Context.getTypeSize(SrcTy))
	return ExprError(
	Diag(BuiltinLoc, diag::err_invalid_astype_of_different_size)
	<< DestTy << SrcTy << E->getSourceRange());
	return new (Context) AsTypeExpr(E, DestTy, VK, OK, BuiltinLoc, RParenLoc);
	}

	ExprResult Sema::ActOnConvertVectorExpr(Expr *E, ParsedType ParsedDestTy,
	SourceLocation BuiltinLoc,
	SourceLocation RParenLoc) {
	TypeSourceInfo *TInfo;
	GetTypeFromParser(ParsedDestTy, &TInfo);
	return ConvertVectorExpr(E, TInfo, BuiltinLoc, RParenLoc);
	}

	ExprResult Sema::BuildResolvedCallExpr(Expr Fn, NamedDecl NDecl,
	SourceLocation LParenLoc,
	ArrayRef<Expr *> Args,
	SourceLocation RParenLoc, Expr *Config,
	bool IsExecConfig, ADLCallKind UsesADL) {
	FunctionDecl *FDecl = dyn_cast_or_null<FunctionDecl>(NDecl);
	unsigned BuiltinID = (FDecl ? FDecl->getBuiltinID() : 0);

	// Functions with 'interrupt' attribute cannot be called directly.
	if (FDecl) {
	if (FDecl->hasAttr<AnyX86InterruptAttr>()) {
	Diag(Fn->getExprLoc(), diag::err_anyx86_interrupt_called);
	return ExprError();
	}
	if (FDecl->hasAttr<ARMInterruptAttr>()) {
	Diag(Fn->getExprLoc(), diag::err_arm_interrupt_called);
	return ExprError();
	}
	}

	// X86 interrupt handlers may only call routines with attribute
	// no_caller_saved_registers since there is no efficient way to
	// save and restore the non-GPR state.
	if (auto *Caller = getCurFunctionDecl()) {
	if (Caller->hasAttr<AnyX86InterruptAttr>() \|\|
	Caller->hasAttr<AnyX86NoCallerSavedRegistersAttr>()) {
	const TargetInfo &TI = Context.getTargetInfo();
	bool HasNonGPRRegisters =
	TI.hasFeature("sse") \|\| TI.hasFeature("x87") \|\| TI.hasFeature("mmx");
	if (HasNonGPRRegisters &&
	(!FDecl \|\| !FDecl->hasAttr<AnyX86NoCallerSavedRegistersAttr>())) {
	Diag(Fn->getExprLoc(), diag::warn_anyx86_excessive_regsave)
	<< (Caller->hasAttr<AnyX86InterruptAttr>() ? 0 : 1);
	if (FDecl)
	Diag(FDecl->getLocation(), diag::note_callee_decl) << FDecl;
	}
	}
	}

	// Promote the function operand.
	// We special-case function promotion here because we only allow promoting
	// builtin functions to function pointers in the callee of a call.
	ExprResult Result;
	QualType ResultTy;
	if (BuiltinID &&
	Fn->getType()->isSpecificBuiltinType(BuiltinType::BuiltinFn)) {
	// Extract the return type from the (builtin) function pointer type.
	// FIXME Several builtins still have setType in
	// Sema::CheckBuiltinFunctionCall. One should review their definitions in
	// Builtins.td to ensure they are correct before removing setType calls.
	QualType FnPtrTy = Context.getPointerType(FDecl->getType());
	Result = ImpCastExprToType(Fn, FnPtrTy, CK_BuiltinFnToFnPtr).get();
	ResultTy = FDecl->getCallResultType();
	} else {
	Result = CallExprUnaryConversions(Fn);
	ResultTy = Context.BoolTy;
	}
	if (Result.isInvalid())
	return ExprError();
	Fn = Result.get();

	// Check for a valid function type, but only if it is not a builtin which
	// requires custom type checking. These will be handled by
	// CheckBuiltinFunctionCall below just after creation of the call expression.
	const FunctionType *FuncT = nullptr;
	if (!BuiltinID \|\| !Context.BuiltinInfo.hasCustomTypechecking(BuiltinID)) {
	retry:
	if (const PointerType *PT = Fn->getType()->getAs<PointerType>()) {
	// C99 6.5.2.2p1 - "The expression that denotes the called function shall
	// have type pointer to function".
	FuncT = PT->getPointeeType()->getAs<FunctionType>();
	if (!FuncT)
	return ExprError(Diag(LParenLoc, diag::err_typecheck_call_not_function)
	<< Fn->getType() << Fn->getSourceRange());
	} else if (const BlockPointerType *BPT =
	Fn->getType()->getAs<BlockPointerType>()) {
	FuncT = BPT->getPointeeType()->castAs<FunctionType>();
	} else {
	// Handle calls to expressions of unknown-any type.
	if (Fn->getType() == Context.UnknownAnyTy) {
	ExprResult rewrite = rebuildUnknownAnyFunction(*this, Fn);
	if (rewrite.isInvalid())
	return ExprError();
	Fn = rewrite.get();
	goto retry;
	}

	return ExprError(Diag(LParenLoc, diag::err_typecheck_call_not_function)
	<< Fn->getType() << Fn->getSourceRange());
	}
	}

	// Get the number of parameters in the function prototype, if any.
	// We will allocate space for max(Args.size(), NumParams) arguments
	// in the call expression.
	const auto *Proto = dyn_cast_or_null<FunctionProtoType>(FuncT);
	unsigned NumParams = Proto ? Proto->getNumParams() : 0;

	CallExpr *TheCall;
	if (Config) {
	assert(UsesADL == ADLCallKind::NotADL &&
	"CUDAKernelCallExpr should not use ADL");
	TheCall = CUDAKernelCallExpr::Create(Context, Fn, cast<CallExpr>(Config),
	Args, ResultTy, VK_PRValue, RParenLoc,
	CurFPFeatureOverrides(), NumParams);
	} else {
	TheCall =
	CallExpr::Create(Context, Fn, Args, ResultTy, VK_PRValue, RParenLoc,
	CurFPFeatureOverrides(), NumParams, UsesADL);
	}

	if (!Context.isDependenceAllowed()) {
	// Forget about the nulled arguments since typo correction
	// do not handle them well.
	TheCall->shrinkNumArgs(Args.size());
	// C cannot always handle TypoExpr nodes in builtin calls and direct
	// function calls as their argument checking don't necessarily handle
	// dependent types properly, so make sure any TypoExprs have been
	// dealt with.
	ExprResult Result = CorrectDelayedTyposInExpr(TheCall);
	if (!Result.isUsable()) return ExprError();
	CallExpr *TheOldCall = TheCall;
	TheCall = dyn_cast<CallExpr>(Result.get());
	bool CorrectedTypos = TheCall != TheOldCall;
	if (!TheCall) return Result;
	Args = llvm::ArrayRef(TheCall->getArgs(), TheCall->getNumArgs());

	// A new call expression node was created if some typos were corrected.
	// However it may not have been constructed with enough storage. In this
	// case, rebuild the node with enough storage. The waste of space is
	// immaterial since this only happens when some typos were corrected.
	if (CorrectedTypos && Args.size() < NumParams) {
	if (Config)
	TheCall = CUDAKernelCallExpr::Create(
	Context, Fn, cast<CallExpr>(Config), Args, ResultTy, VK_PRValue,
	RParenLoc, CurFPFeatureOverrides(), NumParams);
	else
	TheCall =
	CallExpr::Create(Context, Fn, Args, ResultTy, VK_PRValue, RParenLoc,
	CurFPFeatureOverrides(), NumParams, UsesADL);
	}
	// We can now handle the nulled arguments for the default arguments.
	TheCall->setNumArgsUnsafe(std::max<unsigned>(Args.size(), NumParams));
	}

	// Bail out early if calling a builtin with custom type checking.
	if (BuiltinID && Context.BuiltinInfo.hasCustomTypechecking(BuiltinID)) {
	ExprResult E = CheckBuiltinFunctionCall(FDecl, BuiltinID, TheCall);
	if (!E.isInvalid() && Context.BuiltinInfo.isImmediate(BuiltinID))
	E = CheckForImmediateInvocation(E, FDecl);
	return E;
	}

	if (getLangOpts().CUDA) {
	if (Config) {
	// CUDA: Kernel calls must be to global functions
	if (FDecl && !FDecl->hasAttr<CUDAGlobalAttr>())
	return ExprError(Diag(LParenLoc,diag::err_kern_call_not_global_function)
	<< FDecl << Fn->getSourceRange());

	// CUDA: Kernel function must have 'void' return type
	if (!FuncT->getReturnType()->isVoidType() &&
	!FuncT->getReturnType()->getAs<AutoType>() &&
	!FuncT->getReturnType()->isInstantiationDependentType())
	return ExprError(Diag(LParenLoc, diag::err_kern_type_not_void_return)
	<< Fn->getType() << Fn->getSourceRange());
	} else {
	// CUDA: Calls to global functions must be configured
	if (FDecl && FDecl->hasAttr<CUDAGlobalAttr>())
	return ExprError(Diag(LParenLoc, diag::err_global_call_not_config)
	<< FDecl << Fn->getSourceRange());
	}
	}

	// Check for a valid return type
	if (CheckCallReturnType(FuncT->getReturnType(), Fn->getBeginLoc(), TheCall,
	FDecl))
	return ExprError();

	// We know the result type of the call, set it.
	TheCall->setType(FuncT->getCallResultType(Context));
	TheCall->setValueKind(Expr::getValueKindForType(FuncT->getReturnType()));

	// WebAssembly tables can't be used as arguments.
	if (Context.getTargetInfo().getTriple().isWasm()) {
	for (const Expr *Arg : Args) {
	if (Arg && Arg->getType()->isWebAssemblyTableType()) {
	return ExprError(Diag(Arg->getExprLoc(),
	diag::err_wasm_table_as_function_parameter));
	}
	}
	}

	if (Proto) {
	if (ConvertArgumentsForCall(TheCall, Fn, FDecl, Proto, Args, RParenLoc,
	IsExecConfig))
	return ExprError();
	} else {
	assert(isa<FunctionNoProtoType>(FuncT) && "Unknown FunctionType!");

	if (FDecl) {
	// Check if we have too few/too many template arguments, based
	// on our knowledge of the function definition.
	const FunctionDecl *Def = nullptr;
	if (FDecl->hasBody(Def) && Args.size() != Def->param_size()) {
	Proto = Def->getType()->getAs<FunctionProtoType>();
	if (!Proto \|\| !(Proto->isVariadic() && Args.size() >= Def->param_size()))
	Diag(RParenLoc, diag::warn_call_wrong_number_of_arguments)
	<< (Args.size() > Def->param_size()) << FDecl << Fn->getSourceRange();
	}

	// If the function we're calling isn't a function prototype, but we have
	// a function prototype from a prior declaratiom, use that prototype.
	if (!FDecl->hasPrototype())
	Proto = FDecl->getType()->getAs<FunctionProtoType>();
	}

	// If we still haven't found a prototype to use but there are arguments to
	// the call, diagnose this as calling a function without a prototype.
	// However, if we found a function declaration, check to see if
	// -Wdeprecated-non-prototype was disabled where the function was declared.
	// If so, we will silence the diagnostic here on the assumption that this
	// interface is intentional and the user knows what they're doing. We will
	// also silence the diagnostic if there is a function declaration but it
	// was implicitly defined (the user already gets diagnostics about the
	// creation of the implicit function declaration, so the additional warning
	// is not helpful).
	if (!Proto && !Args.empty() &&
	(!FDecl \|\| (!FDecl->isImplicit() &&
	!Diags.isIgnored(diag::warn_strict_uses_without_prototype,
	FDecl->getLocation()))))
	Diag(LParenLoc, diag::warn_strict_uses_without_prototype)
	<< (FDecl != nullptr) << FDecl;

	// Promote the arguments (C99 6.5.2.2p6).
	for (unsigned i = 0, e = Args.size(); i != e; i++) {
	Expr *Arg = Args[i];

	if (Proto && i < Proto->getNumParams()) {
	InitializedEntity Entity = InitializedEntity::InitializeParameter(
	Context, Proto->getParamType(i), Proto->isParamConsumed(i));
	ExprResult ArgE =
	PerformCopyInitialization(Entity, SourceLocation(), Arg);
	if (ArgE.isInvalid())
	return true;

	Arg = ArgE.getAs<Expr>();

	} else {
	ExprResult ArgE = DefaultArgumentPromotion(Arg);

	if (ArgE.isInvalid())
	return true;

	Arg = ArgE.getAs<Expr>();
	}

	if (RequireCompleteType(Arg->getBeginLoc(), Arg->getType(),
	diag::err_call_incomplete_argument, Arg))
	return ExprError();

	TheCall->setArg(i, Arg);
	}
	TheCall->computeDependence();
	}

	if (CXXMethodDecl *Method = dyn_cast_or_null<CXXMethodDecl>(FDecl))
	if (!isa<RequiresExprBodyDecl>(CurContext) &&
	Method->isImplicitObjectMemberFunction())
	return ExprError(Diag(LParenLoc, diag::err_member_call_without_object)
	<< Fn->getSourceRange() << 0);

	// Check for sentinels
	if (NDecl)
	DiagnoseSentinelCalls(NDecl, LParenLoc, Args);

	// Warn for unions passing across security boundary (CMSE).
	if (FuncT != nullptr && FuncT->getCmseNSCallAttr()) {
	for (unsigned i = 0, e = Args.size(); i != e; i++) {
	if (const auto *RT =
	dyn_cast<RecordType>(Args[i]->getType().getCanonicalType())) {
	if (RT->getDecl()->isOrContainsUnion())
	Diag(Args[i]->getBeginLoc(), diag::warn_cmse_nonsecure_union)
	<< 0 << i;
	}
	}
	}

	// Do special checking on direct calls to functions.
	if (FDecl) {
	if (CheckFunctionCall(FDecl, TheCall, Proto))
	return ExprError();

	checkFortifiedBuiltinMemoryFunction(FDecl, TheCall);

	if (BuiltinID)
	return CheckBuiltinFunctionCall(FDecl, BuiltinID, TheCall);
	} else if (NDecl) {
	if (CheckPointerCall(NDecl, TheCall, Proto))
	return ExprError();
	} else {
	if (CheckOtherCall(TheCall, Proto))
	return ExprError();
	}

	return CheckForImmediateInvocation(MaybeBindToTemporary(TheCall), FDecl);
	}

	ExprResult
	Sema::ActOnCompoundLiteral(SourceLocation LParenLoc, ParsedType Ty,
	SourceLocation RParenLoc, Expr *InitExpr) {
	assert(Ty && "ActOnCompoundLiteral(): missing type");
	assert(InitExpr && "ActOnCompoundLiteral(): missing expression");

	TypeSourceInfo *TInfo;
	QualType literalType = GetTypeFromParser(Ty, &TInfo);
	if (!TInfo)
	TInfo = Context.getTrivialTypeSourceInfo(literalType);

	return BuildCompoundLiteralExpr(LParenLoc, TInfo, RParenLoc, InitExpr);
	}

	ExprResult
	Sema::BuildCompoundLiteralExpr(SourceLocation LParenLoc, TypeSourceInfo *TInfo,
	SourceLocation RParenLoc, Expr *LiteralExpr) {
	QualType literalType = TInfo->getType();

	if (literalType->isArrayType()) {
	if (RequireCompleteSizedType(
	LParenLoc, Context.getBaseElementType(literalType),
	diag::err_array_incomplete_or_sizeless_type,
	SourceRange(LParenLoc, LiteralExpr->getSourceRange().getEnd())))
	return ExprError();
	if (literalType->isVariableArrayType()) {
	// C23 6.7.10p4: An entity of variable length array type shall not be
	// initialized except by an empty initializer.
	//
	// The C extension warnings are issued from ParseBraceInitializer() and
	// do not need to be issued here. However, we continue to issue an error
	// in the case there are initializers or we are compiling C++. We allow
	// use of VLAs in C++, but it's not clear we want to allow {} to zero
	// init a VLA in C++ in all cases (such as with non-trivial constructors).
	// FIXME: should we allow this construct in C++ when it makes sense to do
	// so?
	//
	// But: C99-C23 6.5.2.5 Compound literals constraint 1: The type name
	// shall specify an object type or an array of unknown size, but not a
	// variable length array type. This seems odd, as it allows 'int a[size] =
	// {}', but forbids 'int *a = (int[size]){}'. As this is what the standard
	// says, this is what's implemented here for C (except for the extension
	// that permits constant foldable size arrays)

	auto diagID = LangOpts.CPlusPlus
	? diag::err_variable_object_no_init
	: diag::err_compound_literal_with_vla_type;
	if (!tryToFixVariablyModifiedVarType(TInfo, literalType, LParenLoc,
	diagID))
	return ExprError();
	}
	} else if (!literalType->isDependentType() &&
	RequireCompleteType(LParenLoc, literalType,
	diag::err_typecheck_decl_incomplete_type,
	SourceRange(LParenLoc, LiteralExpr->getSourceRange().getEnd())))
	return ExprError();

	InitializedEntity Entity
	= InitializedEntity::InitializeCompoundLiteralInit(TInfo);
	InitializationKind Kind
	= InitializationKind::CreateCStyleCast(LParenLoc,
	SourceRange(LParenLoc, RParenLoc),
	/InitList=/true);
	InitializationSequence InitSeq(*this, Entity, Kind, LiteralExpr);
	ExprResult Result = InitSeq.Perform(*this, Entity, Kind, LiteralExpr,
	&literalType);
	if (Result.isInvalid())
	return ExprError();
	LiteralExpr = Result.get();

	bool isFileScope = !CurContext->isFunctionOrMethod();

	// In C, compound literals are l-values for some reason.
	// For GCC compatibility, in C++, file-scope array compound literals with
	// constant initializers are also l-values, and compound literals are
	// otherwise prvalues.
	//
	// (GCC also treats C++ list-initialized file-scope array prvalues with
	// constant initializers as l-values, but that's non-conforming, so we don't
	// follow it there.)
	//
	// FIXME: It would be better to handle the lvalue cases as materializing and
	// lifetime-extending a temporary object, but our materialized temporaries
	// representation only supports lifetime extension from a variable, not "out
	// of thin air".
	// FIXME: For C++, we might want to instead lifetime-extend only if a pointer
	// is bound to the result of applying array-to-pointer decay to the compound
	// literal.
	// FIXME: GCC supports compound literals of reference type, which should
	// obviously have a value kind derived from the kind of reference involved.
	ExprValueKind VK =
	(getLangOpts().CPlusPlus && !(isFileScope && literalType->isArrayType()))
	? VK_PRValue
	: VK_LValue;

	if (isFileScope)
	if (auto ILE = dyn_cast<InitListExpr>(LiteralExpr))
	for (unsigned i = 0, j = ILE->getNumInits(); i != j; i++) {
	Expr *Init = ILE->getInit(i);
	ILE->setInit(i, ConstantExpr::Create(Context, Init));
	}

	auto *E = new (Context) CompoundLiteralExpr(LParenLoc, TInfo, literalType,
	VK, LiteralExpr, isFileScope);
	if (isFileScope) {
	if (!LiteralExpr->isTypeDependent() &&
	!LiteralExpr->isValueDependent() &&
	!literalType->isDependentType()) // C99 6.5.2.5p3
	if (CheckForConstantInitializer(LiteralExpr))
	return ExprError();
	} else if (literalType.getAddressSpace() != LangAS::opencl_private &&
	literalType.getAddressSpace() != LangAS::Default) {
	// Embedded-C extensions to C99 6.5.2.5:
	// "If the compound literal occurs inside the body of a function, the
	// type name shall not be qualified by an address-space qualifier."
	Diag(LParenLoc, diag::err_compound_literal_with_address_space)
	<< SourceRange(LParenLoc, LiteralExpr->getSourceRange().getEnd());
	return ExprError();
	}

	if (!isFileScope && !getLangOpts().CPlusPlus) {
	// Compound literals that have automatic storage duration are destroyed at
	// the end of the scope in C; in C++, they're just temporaries.

	// Emit diagnostics if it is or contains a C union type that is non-trivial
	// to destruct.
	if (E->getType().hasNonTrivialToPrimitiveDestructCUnion())
	checkNonTrivialCUnion(E->getType(), E->getExprLoc(),
	NTCUC_CompoundLiteral, NTCUK_Destruct);

	// Diagnose jumps that enter or exit the lifetime of the compound literal.
	if (literalType.isDestructedType()) {
	Cleanup.setExprNeedsCleanups(true);
	ExprCleanupObjects.push_back(E);
	getCurFunction()->setHasBranchProtectedScope();
	}
	}

	if (E->getType().hasNonTrivialToPrimitiveDefaultInitializeCUnion() \|\|
	E->getType().hasNonTrivialToPrimitiveCopyCUnion())
	checkNonTrivialCUnionInInitializer(E->getInitializer(),
	E->getInitializer()->getExprLoc());

	return MaybeBindToTemporary(E);
	}

	ExprResult
	Sema::ActOnInitList(SourceLocation LBraceLoc, MultiExprArg InitArgList,
	SourceLocation RBraceLoc) {
	// Only produce each kind of designated initialization diagnostic once.
	SourceLocation FirstDesignator;
	bool DiagnosedArrayDesignator = false;
	bool DiagnosedNestedDesignator = false;
	bool DiagnosedMixedDesignator = false;

	// Check that any designated initializers are syntactically valid in the
	// current language mode.
	for (unsigned I = 0, E = InitArgList.size(); I != E; ++I) {
	if (auto *DIE = dyn_cast<DesignatedInitExpr>(InitArgList[I])) {
	if (FirstDesignator.isInvalid())
	FirstDesignator = DIE->getBeginLoc();

	if (!getLangOpts().CPlusPlus)
	break;

	if (!DiagnosedNestedDesignator && DIE->size() > 1) {
	DiagnosedNestedDesignator = true;
	Diag(DIE->getBeginLoc(), diag::ext_designated_init_nested)
	<< DIE->getDesignatorsSourceRange();
	}

	for (auto &Desig : DIE->designators()) {
	if (!Desig.isFieldDesignator() && !DiagnosedArrayDesignator) {
	DiagnosedArrayDesignator = true;
	Diag(Desig.getBeginLoc(), diag::ext_designated_init_array)
	<< Desig.getSourceRange();
	}
	}

	if (!DiagnosedMixedDesignator &&
	!isa<DesignatedInitExpr>(InitArgList[0])) {
	DiagnosedMixedDesignator = true;
	Diag(DIE->getBeginLoc(), diag::ext_designated_init_mixed)
	<< DIE->getSourceRange();
	Diag(InitArgList[0]->getBeginLoc(), diag::note_designated_init_mixed)
	<< InitArgList[0]->getSourceRange();
	}
	} else if (getLangOpts().CPlusPlus && !DiagnosedMixedDesignator &&
	isa<DesignatedInitExpr>(InitArgList[0])) {
	DiagnosedMixedDesignator = true;
	auto *DIE = cast<DesignatedInitExpr>(InitArgList[0]);
	Diag(DIE->getBeginLoc(), diag::ext_designated_init_mixed)
	<< DIE->getSourceRange();
	Diag(InitArgList[I]->getBeginLoc(), diag::note_designated_init_mixed)
	<< InitArgList[I]->getSourceRange();
	}
	}

	if (FirstDesignator.isValid()) {
	// Only diagnose designated initiaization as a C++20 extension if we didn't
	// already diagnose use of (non-C++20) C99 designator syntax.
	if (getLangOpts().CPlusPlus && !DiagnosedArrayDesignator &&
	!DiagnosedNestedDesignator && !DiagnosedMixedDesignator) {
	Diag(FirstDesignator, getLangOpts().CPlusPlus20
	? diag::warn_cxx17_compat_designated_init
	: diag::ext_cxx_designated_init);
	} else if (!getLangOpts().CPlusPlus && !getLangOpts().C99) {
	Diag(FirstDesignator, diag::ext_designated_init);
	}
	}

	return BuildInitList(LBraceLoc, InitArgList, RBraceLoc);
	}

	ExprResult
	Sema::BuildInitList(SourceLocation LBraceLoc, MultiExprArg InitArgList,
	SourceLocation RBraceLoc) {
	// Semantic analysis for initializers is done by ActOnDeclarator() and
	// CheckInitializer() - it requires knowledge of the object being initialized.

	// Immediately handle non-overload placeholders. Overloads can be
	// resolved contextually, but everything else here can't.
	for (unsigned I = 0, E = InitArgList.size(); I != E; ++I) {
	if (InitArgList[I]->getType()->isNonOverloadPlaceholderType()) {
	ExprResult result = CheckPlaceholderExpr(InitArgList[I]);

	// Ignore failures; dropping the entire initializer list because
	// of one failure would be terrible for indexing/etc.
	if (result.isInvalid()) continue;

	InitArgList[I] = result.get();
	}
	}

	InitListExpr *E =
	new (Context) InitListExpr(Context, LBraceLoc, InitArgList, RBraceLoc);
	E->setType(Context.VoidTy); // FIXME: just a place holder for now.
	return E;
	}

	void Sema::maybeExtendBlockObject(ExprResult &E) {
	assert(E.get()->getType()->isBlockPointerType());
	assert(E.get()->isPRValue());

	// Only do this in an r-value context.
	if (!getLangOpts().ObjCAutoRefCount) return;

	E = ImplicitCastExpr::Create(
	Context, E.get()->getType(), CK_ARCExtendBlockObject, E.get(),
	/base path/ nullptr, VK_PRValue, FPOptionsOverride());
	Cleanup.setExprNeedsCleanups(true);
	}

	CastKind Sema::PrepareScalarCast(ExprResult &Src, QualType DestTy) {
	// Both Src and Dest are scalar types, i.e. arithmetic or pointer.
	// Also, callers should have filtered out the invalid cases with
	// pointers. Everything else should be possible.

	QualType SrcTy = Src.get()->getType();
	if (Context.hasSameUnqualifiedType(SrcTy, DestTy))
	return CK_NoOp;

	switch (Type::ScalarTypeKind SrcKind = SrcTy->getScalarTypeKind()) {
	case Type::STK_MemberPointer:
	llvm_unreachable("member pointer type in C");

	case Type::STK_CPointer:
	case Type::STK_BlockPointer:
	case Type::STK_ObjCObjectPointer:
	switch (DestTy->getScalarTypeKind()) {
	case Type::STK_CPointer: {
	LangAS SrcAS = SrcTy->getPointeeType().getAddressSpace();
	LangAS DestAS = DestTy->getPointeeType().getAddressSpace();
	if (SrcAS != DestAS)
	return CK_AddressSpaceConversion;
	if (Context.hasCvrSimilarType(SrcTy, DestTy))
	return CK_NoOp;
	return CK_BitCast;
	}
	case Type::STK_BlockPointer:
	return (SrcKind == Type::STK_BlockPointer
	? CK_BitCast : CK_AnyPointerToBlockPointerCast);
	case Type::STK_ObjCObjectPointer:
	if (SrcKind == Type::STK_ObjCObjectPointer)
	return CK_BitCast;
	if (SrcKind == Type::STK_CPointer)
	return CK_CPointerToObjCPointerCast;
	maybeExtendBlockObject(Src);
	return CK_BlockPointerToObjCPointerCast;
	case Type::STK_Bool:
	return CK_PointerToBoolean;
	case Type::STK_Integral:
	return CK_PointerToIntegral;
	case Type::STK_Floating:
	case Type::STK_FloatingComplex:
	case Type::STK_IntegralComplex:
	case Type::STK_MemberPointer:
	case Type::STK_FixedPoint:
	llvm_unreachable("illegal cast from pointer");
	}
	llvm_unreachable("Should have returned before this");

	case Type::STK_FixedPoint:
	switch (DestTy->getScalarTypeKind()) {
	case Type::STK_FixedPoint:
	return CK_FixedPointCast;
	case Type::STK_Bool:
	return CK_FixedPointToBoolean;
	case Type::STK_Integral:
	return CK_FixedPointToIntegral;
	case Type::STK_Floating:
	return CK_FixedPointToFloating;
	case Type::STK_IntegralComplex:
	case Type::STK_FloatingComplex:
	Diag(Src.get()->getExprLoc(),
	diag::err_unimplemented_conversion_with_fixed_point_type)
	<< DestTy;
	return CK_IntegralCast;
	case Type::STK_CPointer:
	case Type::STK_ObjCObjectPointer:
	case Type::STK_BlockPointer:
	case Type::STK_MemberPointer:
	llvm_unreachable("illegal cast to pointer type");
	}
	llvm_unreachable("Should have returned before this");

	case Type::STK_Bool: // casting from bool is like casting from an integer
	case Type::STK_Integral:
	switch (DestTy->getScalarTypeKind()) {
	case Type::STK_CPointer:
	case Type::STK_ObjCObjectPointer:
	case Type::STK_BlockPointer:
	if (Src.get()->isNullPointerConstant(Context,
	Expr::NPC_ValueDependentIsNull))
	return CK_NullToPointer;
	return CK_IntegralToPointer;
	case Type::STK_Bool:
	return CK_IntegralToBoolean;
	case Type::STK_Integral:
	return CK_IntegralCast;
	case Type::STK_Floating:
	return CK_IntegralToFloating;
	case Type::STK_IntegralComplex:
	Src = ImpCastExprToType(Src.get(),
	DestTy->castAs<ComplexType>()->getElementType(),
	CK_IntegralCast);
	return CK_IntegralRealToComplex;
	case Type::STK_FloatingComplex:
	Src = ImpCastExprToType(Src.get(),
	DestTy->castAs<ComplexType>()->getElementType(),
	CK_IntegralToFloating);
	return CK_FloatingRealToComplex;
	case Type::STK_MemberPointer:
	llvm_unreachable("member pointer type in C");
	case Type::STK_FixedPoint:
	return CK_IntegralToFixedPoint;
	}
	llvm_unreachable("Should have returned before this");

	case Type::STK_Floating:
	switch (DestTy->getScalarTypeKind()) {
	case Type::STK_Floating:
	return CK_FloatingCast;
	case Type::STK_Bool:
	return CK_FloatingToBoolean;
	case Type::STK_Integral:
	return CK_FloatingToIntegral;
	case Type::STK_FloatingComplex:
	Src = ImpCastExprToType(Src.get(),
	DestTy->castAs<ComplexType>()->getElementType(),
	CK_FloatingCast);
	return CK_FloatingRealToComplex;
	case Type::STK_IntegralComplex:
	Src = ImpCastExprToType(Src.get(),
	DestTy->castAs<ComplexType>()->getElementType(),
	CK_FloatingToIntegral);
	return CK_IntegralRealToComplex;
	case Type::STK_CPointer:
	case Type::STK_ObjCObjectPointer:
	case Type::STK_BlockPointer:
	llvm_unreachable("valid float->pointer cast?");
	case Type::STK_MemberPointer:
	llvm_unreachable("member pointer type in C");
	case Type::STK_FixedPoint:
	return CK_FloatingToFixedPoint;
	}
	llvm_unreachable("Should have returned before this");

	case Type::STK_FloatingComplex:
	switch (DestTy->getScalarTypeKind()) {
	case Type::STK_FloatingComplex:
	return CK_FloatingComplexCast;
	case Type::STK_IntegralComplex:
	return CK_FloatingComplexToIntegralComplex;
	case Type::STK_Floating: {
	QualType ET = SrcTy->castAs<ComplexType>()->getElementType();
	if (Context.hasSameType(ET, DestTy))
	return CK_FloatingComplexToReal;
	Src = ImpCastExprToType(Src.get(), ET, CK_FloatingComplexToReal);
	return CK_FloatingCast;
	}
	case Type::STK_Bool:
	return CK_FloatingComplexToBoolean;
	case Type::STK_Integral:
	Src = ImpCastExprToType(Src.get(),
	SrcTy->castAs<ComplexType>()->getElementType(),
	CK_FloatingComplexToReal);
	return CK_FloatingToIntegral;
	case Type::STK_CPointer:
	case Type::STK_ObjCObjectPointer:
	case Type::STK_BlockPointer:
	llvm_unreachable("valid complex float->pointer cast?");
	case Type::STK_MemberPointer:
	llvm_unreachable("member pointer type in C");
	case Type::STK_FixedPoint:
	Diag(Src.get()->getExprLoc(),
	diag::err_unimplemented_conversion_with_fixed_point_type)
	<< SrcTy;
	return CK_IntegralCast;
	}
	llvm_unreachable("Should have returned before this");

	case Type::STK_IntegralComplex:
	switch (DestTy->getScalarTypeKind()) {
	case Type::STK_FloatingComplex:
	return CK_IntegralComplexToFloatingComplex;
	case Type::STK_IntegralComplex:
	return CK_IntegralComplexCast;
	case Type::STK_Integral: {
	QualType ET = SrcTy->castAs<ComplexType>()->getElementType();
	if (Context.hasSameType(ET, DestTy))
	return CK_IntegralComplexToReal;
	Src = ImpCastExprToType(Src.get(), ET, CK_IntegralComplexToReal);
	return CK_IntegralCast;
	}
	case Type::STK_Bool:
	return CK_IntegralComplexToBoolean;
	case Type::STK_Floating:
	Src = ImpCastExprToType(Src.get(),
	SrcTy->castAs<ComplexType>()->getElementType(),
	CK_IntegralComplexToReal);
	return CK_IntegralToFloating;
	case Type::STK_CPointer:
	case Type::STK_ObjCObjectPointer:
	case Type::STK_BlockPointer:
	llvm_unreachable("valid complex int->pointer cast?");
	case Type::STK_MemberPointer:
	llvm_unreachable("member pointer type in C");
	case Type::STK_FixedPoint:
	Diag(Src.get()->getExprLoc(),
	diag::err_unimplemented_conversion_with_fixed_point_type)
	<< SrcTy;
	return CK_IntegralCast;
	}
	llvm_unreachable("Should have returned before this");
	}

	llvm_unreachable("Unhandled scalar cast");
	}

	static bool breakDownVectorType(QualType type, uint64_t &len,
	QualType &eltType) {
	// Vectors are simple.
	if (const VectorType *vecType = type->getAs<VectorType>()) {
	len = vecType->getNumElements();
	eltType = vecType->getElementType();
	assert(eltType->isScalarType());
	return true;
	}

	// We allow lax conversion to and from non-vector types, but only if
	// they're real types (i.e. non-complex, non-pointer scalar types).
	if (!type->isRealType()) return false;

	len = 1;
	eltType = type;
	return true;
	}

	bool Sema::isValidSveBitcast(QualType srcTy, QualType destTy) {
	assert(srcTy->isVectorType() \|\| destTy->isVectorType());

	auto ValidScalableConversion = [](QualType FirstType, QualType SecondType) {
	if (!FirstType->isSVESizelessBuiltinType())
	return false;

	const auto *VecTy = SecondType->getAs<VectorType>();
	return VecTy && VecTy->getVectorKind() == VectorKind::SveFixedLengthData;
	};

	return ValidScalableConversion(srcTy, destTy) \|\|
	ValidScalableConversion(destTy, srcTy);
	}

	bool Sema::areMatrixTypesOfTheSameDimension(QualType srcTy, QualType destTy) {
	if (!destTy->isMatrixType() \|\| !srcTy->isMatrixType())
	return false;

	const ConstantMatrixType *matSrcType = srcTy->getAs<ConstantMatrixType>();
	const ConstantMatrixType *matDestType = destTy->getAs<ConstantMatrixType>();

	return matSrcType->getNumRows() == matDestType->getNumRows() &&
	matSrcType->getNumColumns() == matDestType->getNumColumns();
	}

	bool Sema::areVectorTypesSameSize(QualType SrcTy, QualType DestTy) {
	assert(DestTy->isVectorType() \|\| SrcTy->isVectorType());

	uint64_t SrcLen, DestLen;
	QualType SrcEltTy, DestEltTy;
	if (!breakDownVectorType(SrcTy, SrcLen, SrcEltTy))
	return false;
	if (!breakDownVectorType(DestTy, DestLen, DestEltTy))
	return false;

	// ASTContext::getTypeSize will return the size rounded up to a
	// power of 2, so instead of using that, we need to use the raw
	// element size multiplied by the element count.
	uint64_t SrcEltSize = Context.getTypeSize(SrcEltTy);
	uint64_t DestEltSize = Context.getTypeSize(DestEltTy);

	return (SrcLen * SrcEltSize == DestLen * DestEltSize);
	}

	bool Sema::anyAltivecTypes(QualType SrcTy, QualType DestTy) {
	assert((DestTy->isVectorType() \|\| SrcTy->isVectorType()) &&
	"expected at least one type to be a vector here");

	bool IsSrcTyAltivec =
	SrcTy->isVectorType() && ((SrcTy->castAs<VectorType>()->getVectorKind() ==
	VectorKind::AltiVecVector) \|\|
	(SrcTy->castAs<VectorType>()->getVectorKind() ==
	VectorKind::AltiVecBool) \|\|
	(SrcTy->castAs<VectorType>()->getVectorKind() ==
	VectorKind::AltiVecPixel));

	bool IsDestTyAltivec = DestTy->isVectorType() &&
	((DestTy->castAs<VectorType>()->getVectorKind() ==
	VectorKind::AltiVecVector) \|\|
	(DestTy->castAs<VectorType>()->getVectorKind() ==
	VectorKind::AltiVecBool) \|\|
	(DestTy->castAs<VectorType>()->getVectorKind() ==
	VectorKind::AltiVecPixel));

	return (IsSrcTyAltivec \|\| IsDestTyAltivec);
	}

	bool Sema::areLaxCompatibleVectorTypes(QualType srcTy, QualType destTy) {
	assert(destTy->isVectorType() \|\| srcTy->isVectorType());

	// Disallow lax conversions between scalars and ExtVectors (these
	// conversions are allowed for other vector types because common headers
	// depend on them). Most scalar OP ExtVector cases are handled by the
	// splat path anyway, which does what we want (convert, not bitcast).
	// What this rules out for ExtVectors is crazy things like char4*float.
	if (srcTy->isScalarType() && destTy->isExtVectorType()) return false;
	if (destTy->isScalarType() && srcTy->isExtVectorType()) return false;

	return areVectorTypesSameSize(srcTy, destTy);
	}

	bool Sema::isLaxVectorConversion(QualType srcTy, QualType destTy) {
	assert(destTy->isVectorType() \|\| srcTy->isVectorType());

	switch (Context.getLangOpts().getLaxVectorConversions()) {
	case LangOptions::LaxVectorConversionKind::None:
	return false;

	case LangOptions::LaxVectorConversionKind::Integer:
	if (!srcTy->isIntegralOrEnumerationType()) {
	auto *Vec = srcTy->getAs<VectorType>();
	if (!Vec \|\| !Vec->getElementType()->isIntegralOrEnumerationType())
	return false;
	}
	if (!destTy->isIntegralOrEnumerationType()) {
	auto *Vec = destTy->getAs<VectorType>();
	if (!Vec \|\| !Vec->getElementType()->isIntegralOrEnumerationType())
	return false;
	}
	// OK, integer (vector) -> integer (vector) bitcast.
	break;

	case LangOptions::LaxVectorConversionKind::All:
	break;
	}

	return areLaxCompatibleVectorTypes(srcTy, destTy);
	}

	bool Sema::CheckMatrixCast(SourceRange R, QualType DestTy, QualType SrcTy,
	CastKind &Kind) {
	if (SrcTy->isMatrixType() && DestTy->isMatrixType()) {
	if (!areMatrixTypesOfTheSameDimension(SrcTy, DestTy)) {
	return Diag(R.getBegin(), diag::err_invalid_conversion_between_matrixes)
	<< DestTy << SrcTy << R;
	}
	} else if (SrcTy->isMatrixType()) {
	return Diag(R.getBegin(),
	diag::err_invalid_conversion_between_matrix_and_type)
	<< SrcTy << DestTy << R;
	} else if (DestTy->isMatrixType()) {
	return Diag(R.getBegin(),
	diag::err_invalid_conversion_between_matrix_and_type)
	<< DestTy << SrcTy << R;
	}

	Kind = CK_MatrixCast;
	return false;
	}

	bool Sema::CheckVectorCast(SourceRange R, QualType VectorTy, QualType Ty,
	CastKind &Kind) {
	assert(VectorTy->isVectorType() && "Not a vector type!");

	if (Ty->isVectorType() \|\| Ty->isIntegralType(Context)) {
	if (!areLaxCompatibleVectorTypes(Ty, VectorTy))
	return Diag(R.getBegin(),
	Ty->isVectorType() ?
	diag::err_invalid_conversion_between_vectors :
	diag::err_invalid_conversion_between_vector_and_integer)
	<< VectorTy << Ty << R;
	} else
	return Diag(R.getBegin(),
	diag::err_invalid_conversion_between_vector_and_scalar)
	<< VectorTy << Ty << R;

	Kind = CK_BitCast;
	return false;
	}

	ExprResult Sema::prepareVectorSplat(QualType VectorTy, Expr *SplattedExpr) {
	QualType DestElemTy = VectorTy->castAs<VectorType>()->getElementType();

	if (DestElemTy == SplattedExpr->getType())
	return SplattedExpr;

	assert(DestElemTy->isFloatingType() \|\|
	DestElemTy->isIntegralOrEnumerationType());

	CastKind CK;
	if (VectorTy->isExtVectorType() && SplattedExpr->getType()->isBooleanType()) {
	// OpenCL requires that we convert `true` boolean expressions to -1, but
	// only when splatting vectors.
	if (DestElemTy->isFloatingType()) {
	// To avoid having to have a CK_BooleanToSignedFloating cast kind, we cast
	// in two steps: boolean to signed integral, then to floating.
	ExprResult CastExprRes = ImpCastExprToType(SplattedExpr, Context.IntTy,
	CK_BooleanToSignedIntegral);
	SplattedExpr = CastExprRes.get();
	CK = CK_IntegralToFloating;
	} else {
	CK = CK_BooleanToSignedIntegral;
	}
	} else {
	ExprResult CastExprRes = SplattedExpr;
	CK = PrepareScalarCast(CastExprRes, DestElemTy);
	if (CastExprRes.isInvalid())
	return ExprError();
	SplattedExpr = CastExprRes.get();
	}
	return ImpCastExprToType(SplattedExpr, DestElemTy, CK);
	}

	ExprResult Sema::CheckExtVectorCast(SourceRange R, QualType DestTy,
	Expr *CastExpr, CastKind &Kind) {
	assert(DestTy->isExtVectorType() && "Not an extended vector type!");

	QualType SrcTy = CastExpr->getType();

	// If SrcTy is a VectorType, the total size must match to explicitly cast to
	// an ExtVectorType.
	// In OpenCL, casts between vectors of different types are not allowed.
	// (See OpenCL 6.2).
	if (SrcTy->isVectorType()) {
	if (!areLaxCompatibleVectorTypes(SrcTy, DestTy) \|\|
	(getLangOpts().OpenCL &&
	!Context.hasSameUnqualifiedType(DestTy, SrcTy))) {
	Diag(R.getBegin(),diag::err_invalid_conversion_between_ext_vectors)
	<< DestTy << SrcTy << R;
	return ExprError();
	}
	Kind = CK_BitCast;
	return CastExpr;
	}

	// All non-pointer scalars can be cast to ExtVector type. The appropriate
	// conversion will take place first from scalar to elt type, and then
	// splat from elt type to vector.
	if (SrcTy->isPointerType())
	return Diag(R.getBegin(),
	diag::err_invalid_conversion_between_vector_and_scalar)
	<< DestTy << SrcTy << R;

	Kind = CK_VectorSplat;
	return prepareVectorSplat(DestTy, CastExpr);
	}

	ExprResult
	Sema::ActOnCastExpr(Scope *S, SourceLocation LParenLoc,
	Declarator &D, ParsedType &Ty,
	SourceLocation RParenLoc, Expr *CastExpr) {
	assert(!D.isInvalidType() && (CastExpr != nullptr) &&
	"ActOnCastExpr(): missing type or expr");

	TypeSourceInfo *castTInfo = GetTypeForDeclaratorCast(D, CastExpr->getType());
	if (D.isInvalidType())
	return ExprError();

	if (getLangOpts().CPlusPlus) {
	// Check that there are no default arguments (C++ only).
	CheckExtraCXXDefaultArguments(D);
	} else {
	// Make sure any TypoExprs have been dealt with.
	ExprResult Res = CorrectDelayedTyposInExpr(CastExpr);
	if (!Res.isUsable())
	return ExprError();
	CastExpr = Res.get();
	}

	checkUnusedDeclAttributes(D);

	QualType castType = castTInfo->getType();
	Ty = CreateParsedType(castType, castTInfo);

	bool isVectorLiteral = false;

	// Check for an altivec or OpenCL literal,
	// i.e. all the elements are integer constants.
	ParenExpr *PE = dyn_cast<ParenExpr>(CastExpr);
	ParenListExpr *PLE = dyn_cast<ParenListExpr>(CastExpr);
	if ((getLangOpts().AltiVec \|\| getLangOpts().ZVector \|\| getLangOpts().OpenCL)
	&& castType->isVectorType() && (PE \|\| PLE)) {
	if (PLE && PLE->getNumExprs() == 0) {
	Diag(PLE->getExprLoc(), diag::err_altivec_empty_initializer);
	return ExprError();
	}
	if (PE \|\| PLE->getNumExprs() == 1) {
	Expr *E = (PE ? PE->getSubExpr() : PLE->getExpr(0));
	if (!E->isTypeDependent() && !E->getType()->isVectorType())
	isVectorLiteral = true;
	}
	else
	isVectorLiteral = true;
	}

	// If this is a vector initializer, '(' type ')' '(' init, ..., init ')'
	// then handle it as such.
	if (isVectorLiteral)
	return BuildVectorLiteral(LParenLoc, RParenLoc, CastExpr, castTInfo);

	// If the Expr being casted is a ParenListExpr, handle it specially.
	// This is not an AltiVec-style cast, so turn the ParenListExpr into a
	// sequence of BinOp comma operators.
	if (isa<ParenListExpr>(CastExpr)) {
	ExprResult Result = MaybeConvertParenListExprToParenExpr(S, CastExpr);
	if (Result.isInvalid()) return ExprError();
	CastExpr = Result.get();
	}

	if (getLangOpts().CPlusPlus && !castType->isVoidType())
	Diag(LParenLoc, diag::warn_old_style_cast) << CastExpr->getSourceRange();

	ObjC().CheckTollFreeBridgeCast(castType, CastExpr);

	ObjC().CheckObjCBridgeRelatedCast(castType, CastExpr);

	DiscardMisalignedMemberAddress(castType.getTypePtr(), CastExpr);

	return BuildCStyleCastExpr(LParenLoc, castTInfo, RParenLoc, CastExpr);
	}

	ExprResult Sema::BuildVectorLiteral(SourceLocation LParenLoc,
	SourceLocation RParenLoc, Expr *E,
	TypeSourceInfo *TInfo) {
	assert((isa<ParenListExpr>(E) \|\| isa<ParenExpr>(E)) &&
	"Expected paren or paren list expression");

	Expr **exprs;
	unsigned numExprs;
	Expr *subExpr;
	SourceLocation LiteralLParenLoc, LiteralRParenLoc;
	if (ParenListExpr *PE = dyn_cast<ParenListExpr>(E)) {
	LiteralLParenLoc = PE->getLParenLoc();
	LiteralRParenLoc = PE->getRParenLoc();
	exprs = PE->getExprs();
	numExprs = PE->getNumExprs();
	} else { // isa<ParenExpr> by assertion at function entrance
	LiteralLParenLoc = cast<ParenExpr>(E)->getLParen();
	LiteralRParenLoc = cast<ParenExpr>(E)->getRParen();
	subExpr = cast<ParenExpr>(E)->getSubExpr();
	exprs = &subExpr;
	numExprs = 1;
	}

	QualType Ty = TInfo->getType();
	assert(Ty->isVectorType() && "Expected vector type");

	SmallVector<Expr *, 8> initExprs;
	const VectorType *VTy = Ty->castAs<VectorType>();
	unsigned numElems = VTy->getNumElements();

	// '(...)' form of vector initialization in AltiVec: the number of
	// initializers must be one or must match the size of the vector.
	// If a single value is specified in the initializer then it will be
	// replicated to all the components of the vector
	if (CheckAltivecInitFromScalar(E->getSourceRange(), Ty,
	VTy->getElementType()))
	return ExprError();
	if (ShouldSplatAltivecScalarInCast(VTy)) {
	// The number of initializers must be one or must match the size of the
	// vector. If a single value is specified in the initializer then it will
	// be replicated to all the components of the vector
	if (numExprs == 1) {
	QualType ElemTy = VTy->getElementType();
	ExprResult Literal = DefaultLvalueConversion(exprs[0]);
	if (Literal.isInvalid())
	return ExprError();
	Literal = ImpCastExprToType(Literal.get(), ElemTy,
	PrepareScalarCast(Literal, ElemTy));
	return BuildCStyleCastExpr(LParenLoc, TInfo, RParenLoc, Literal.get());
	}
	else if (numExprs < numElems) {
	Diag(E->getExprLoc(),
	diag::err_incorrect_number_of_vector_initializers);
	return ExprError();
	}
	else
	initExprs.append(exprs, exprs + numExprs);
	}
	else {
	// For OpenCL, when the number of initializers is a single value,
	// it will be replicated to all components of the vector.
	if (getLangOpts().OpenCL && VTy->getVectorKind() == VectorKind::Generic &&
	numExprs == 1) {
	QualType ElemTy = VTy->getElementType();
	ExprResult Literal = DefaultLvalueConversion(exprs[0]);
	if (Literal.isInvalid())
	return ExprError();
	Literal = ImpCastExprToType(Literal.get(), ElemTy,
	PrepareScalarCast(Literal, ElemTy));
	return BuildCStyleCastExpr(LParenLoc, TInfo, RParenLoc, Literal.get());
	}

	initExprs.append(exprs, exprs + numExprs);
	}
	// FIXME: This means that pretty-printing the final AST will produce curly
	// braces instead of the original commas.
	InitListExpr *initE = new (Context) InitListExpr(Context, LiteralLParenLoc,
	initExprs, LiteralRParenLoc);
	initE->setType(Ty);
	return BuildCompoundLiteralExpr(LParenLoc, TInfo, RParenLoc, initE);
	}

	ExprResult
	Sema::MaybeConvertParenListExprToParenExpr(Scope S, Expr OrigExpr) {
	ParenListExpr *E = dyn_cast<ParenListExpr>(OrigExpr);
	if (!E)
	return OrigExpr;

	ExprResult Result(E->getExpr(0));

	for (unsigned i = 1, e = E->getNumExprs(); i != e && !Result.isInvalid(); ++i)
	Result = ActOnBinOp(S, E->getExprLoc(), tok::comma, Result.get(),
	E->getExpr(i));

	if (Result.isInvalid()) return ExprError();

	return ActOnParenExpr(E->getLParenLoc(), E->getRParenLoc(), Result.get());
	}

	ExprResult Sema::ActOnParenListExpr(SourceLocation L,
	SourceLocation R,
	MultiExprArg Val) {
	return ParenListExpr::Create(Context, L, Val, R);
	}

	bool Sema::DiagnoseConditionalForNull(const Expr LHSExpr, const Expr RHSExpr,
	SourceLocation QuestionLoc) {
	const Expr *NullExpr = LHSExpr;
	const Expr *NonPointerExpr = RHSExpr;
	Expr::NullPointerConstantKind NullKind =
	NullExpr->isNullPointerConstant(Context,
	Expr::NPC_ValueDependentIsNotNull);

	if (NullKind == Expr::NPCK_NotNull) {
	NullExpr = RHSExpr;
	NonPointerExpr = LHSExpr;
	NullKind =
	NullExpr->isNullPointerConstant(Context,
	Expr::NPC_ValueDependentIsNotNull);
	}

	if (NullKind == Expr::NPCK_NotNull)
	return false;

	if (NullKind == Expr::NPCK_ZeroExpression)
	return false;

	if (NullKind == Expr::NPCK_ZeroLiteral) {
	// In this case, check to make sure that we got here from a "NULL"
	// string in the source code.
	NullExpr = NullExpr->IgnoreParenImpCasts();
	SourceLocation loc = NullExpr->getExprLoc();
	if (!findMacroSpelling(loc, "NULL"))
	return false;
	}

	int DiagType = (NullKind == Expr::NPCK_CXX11_nullptr);
	Diag(QuestionLoc, diag::err_typecheck_cond_incompatible_operands_null)
	<< NonPointerExpr->getType() << DiagType
	<< NonPointerExpr->getSourceRange();
	return true;
	}

	/// Return false if the condition expression is valid, true otherwise.
	static bool checkCondition(Sema &S, const Expr *Cond,
	SourceLocation QuestionLoc) {
	QualType CondTy = Cond->getType();

	// OpenCL v1.1 s6.3.i says the condition cannot be a floating point type.
	if (S.getLangOpts().OpenCL && CondTy->isFloatingType()) {
	S.Diag(QuestionLoc, diag::err_typecheck_cond_expect_nonfloat)
	<< CondTy << Cond->getSourceRange();
	return true;
	}

	// C99 6.5.15p2
	if (CondTy->isScalarType()) return false;

	S.Diag(QuestionLoc, diag::err_typecheck_cond_expect_scalar)
	<< CondTy << Cond->getSourceRange();
	return true;
	}

	/// Return false if the NullExpr can be promoted to PointerTy,
	/// true otherwise.
	static bool checkConditionalNullPointer(Sema &S, ExprResult &NullExpr,
	QualType PointerTy) {
	if ((!PointerTy->isAnyPointerType() && !PointerTy->isBlockPointerType()) \|\|
	!NullExpr.get()->isNullPointerConstant(S.Context,
	Expr::NPC_ValueDependentIsNull))
	return true;

	NullExpr = S.ImpCastExprToType(NullExpr.get(), PointerTy, CK_NullToPointer);
	return false;
	}

	/// Checks compatibility between two pointers and return the resulting
	/// type.
	static QualType checkConditionalPointerCompatibility(Sema &S, ExprResult &LHS,
	ExprResult &RHS,
	SourceLocation Loc) {
	QualType LHSTy = LHS.get()->getType();
	QualType RHSTy = RHS.get()->getType();

	if (S.Context.hasSameType(LHSTy, RHSTy)) {
	// Two identical pointers types are always compatible.
	return S.Context.getCommonSugaredType(LHSTy, RHSTy);
	}

	QualType lhptee, rhptee;

	// Get the pointee types.
	bool IsBlockPointer = false;
	if (const BlockPointerType *LHSBTy = LHSTy->getAs<BlockPointerType>()) {
	lhptee = LHSBTy->getPointeeType();
	rhptee = RHSTy->castAs<BlockPointerType>()->getPointeeType();
	IsBlockPointer = true;
	} else {
	lhptee = LHSTy->castAs<PointerType>()->getPointeeType();
	rhptee = RHSTy->castAs<PointerType>()->getPointeeType();
	}

	// C99 6.5.15p6: If both operands are pointers to compatible types or to
	// differently qualified versions of compatible types, the result type is
	// a pointer to an appropriately qualified version of the composite
	// type.

	// Only CVR-qualifiers exist in the standard, and the differently-qualified
	// clause doesn't make sense for our extensions. E.g. address space 2 should
	// be incompatible with address space 3: they may live on different devices or
	// anything.
	Qualifiers lhQual = lhptee.getQualifiers();
	Qualifiers rhQual = rhptee.getQualifiers();

	LangAS ResultAddrSpace = LangAS::Default;
	LangAS LAddrSpace = lhQual.getAddressSpace();
	LangAS RAddrSpace = rhQual.getAddressSpace();

	// OpenCL v1.1 s6.5 - Conversion between pointers to distinct address
	// spaces is disallowed.
	if (lhQual.isAddressSpaceSupersetOf(rhQual))
	ResultAddrSpace = LAddrSpace;
	else if (rhQual.isAddressSpaceSupersetOf(lhQual))
	ResultAddrSpace = RAddrSpace;
	else {
	S.Diag(Loc, diag::err_typecheck_op_on_nonoverlapping_address_space_pointers)
	<< LHSTy << RHSTy << 2 << LHS.get()->getSourceRange()
	<< RHS.get()->getSourceRange();
	return QualType();
	}

	unsigned MergedCVRQual = lhQual.getCVRQualifiers() \| rhQual.getCVRQualifiers();
	auto LHSCastKind = CK_BitCast, RHSCastKind = CK_BitCast;
	lhQual.removeCVRQualifiers();
	rhQual.removeCVRQualifiers();

	// OpenCL v2.0 specification doesn't extend compatibility of type qualifiers
	// (C99 6.7.3) for address spaces. We assume that the check should behave in
	// the same manner as it's defined for CVR qualifiers, so for OpenCL two
	// qual types are compatible iff
	// * corresponded types are compatible
	// * CVR qualifiers are equal
	// * address spaces are equal
	// Thus for conditional operator we merge CVR and address space unqualified
	// pointees and if there is a composite type we return a pointer to it with
	// merged qualifiers.
	LHSCastKind =
	LAddrSpace == ResultAddrSpace ? CK_BitCast : CK_AddressSpaceConversion;
	RHSCastKind =
	RAddrSpace == ResultAddrSpace ? CK_BitCast : CK_AddressSpaceConversion;
	lhQual.removeAddressSpace();
	rhQual.removeAddressSpace();

	lhptee = S.Context.getQualifiedType(lhptee.getUnqualifiedType(), lhQual);
	rhptee = S.Context.getQualifiedType(rhptee.getUnqualifiedType(), rhQual);

	QualType CompositeTy = S.Context.mergeTypes(
	lhptee, rhptee, /OfBlockPointer=/false, /Unqualified=/false,
	/BlockReturnType=/false, /IsConditionalOperator=/true);

	if (CompositeTy.isNull()) {
	// In this situation, we assume void* type. No especially good
	// reason, but this is what gcc does, and we do have to pick
	// to get a consistent AST.
	QualType incompatTy;
	incompatTy = S.Context.getPointerType(
	S.Context.getAddrSpaceQualType(S.Context.VoidTy, ResultAddrSpace));
	LHS = S.ImpCastExprToType(LHS.get(), incompatTy, LHSCastKind);
	RHS = S.ImpCastExprToType(RHS.get(), incompatTy, RHSCastKind);

	// FIXME: For OpenCL the warning emission and cast to void* leaves a room
	// for casts between types with incompatible address space qualifiers.
	// For the following code the compiler produces casts between global and
	// local address spaces of the corresponded innermost pointees:
	// local int global a;
	// global int global b;
	// a = (0 ? a : b); // see C99 6.5.16.1.p1.
	S.Diag(Loc, diag::ext_typecheck_cond_incompatible_pointers)
	<< LHSTy << RHSTy << LHS.get()->getSourceRange()
	<< RHS.get()->getSourceRange();

	return incompatTy;
	}

	// The pointer types are compatible.
	// In case of OpenCL ResultTy should have the address space qualifier
	// which is a superset of address spaces of both the 2nd and the 3rd
	// operands of the conditional operator.
	QualType ResultTy = [&, ResultAddrSpace]() {
	if (S.getLangOpts().OpenCL) {
	Qualifiers CompositeQuals = CompositeTy.getQualifiers();
	CompositeQuals.setAddressSpace(ResultAddrSpace);
	return S.Context
	.getQualifiedType(CompositeTy.getUnqualifiedType(), CompositeQuals)
	.withCVRQualifiers(MergedCVRQual);
	}
	return CompositeTy.withCVRQualifiers(MergedCVRQual);
	}();
	if (IsBlockPointer)
	ResultTy = S.Context.getBlockPointerType(ResultTy);
	else
	ResultTy = S.Context.getPointerType(ResultTy);

	LHS = S.ImpCastExprToType(LHS.get(), ResultTy, LHSCastKind);
	RHS = S.ImpCastExprToType(RHS.get(), ResultTy, RHSCastKind);
	return ResultTy;
	}

	/// Return the resulting type when the operands are both block pointers.
	static QualType checkConditionalBlockPointerCompatibility(Sema &S,
	ExprResult &LHS,
	ExprResult &RHS,
	SourceLocation Loc) {
	QualType LHSTy = LHS.get()->getType();
	QualType RHSTy = RHS.get()->getType();

	if (!LHSTy->isBlockPointerType() \|\| !RHSTy->isBlockPointerType()) {
	if (LHSTy->isVoidPointerType() \|\| RHSTy->isVoidPointerType()) {
	QualType destType = S.Context.getPointerType(S.Context.VoidTy);
	LHS = S.ImpCastExprToType(LHS.get(), destType, CK_BitCast);
	RHS = S.ImpCastExprToType(RHS.get(), destType, CK_BitCast);
	return destType;
	}
	S.Diag(Loc, diag::err_typecheck_cond_incompatible_operands)
	<< LHSTy << RHSTy << LHS.get()->getSourceRange()
	<< RHS.get()->getSourceRange();
	return QualType();
	}

	// We have 2 block pointer types.
	return checkConditionalPointerCompatibility(S, LHS, RHS, Loc);
	}

	/// Return the resulting type when the operands are both pointers.
	static QualType
	checkConditionalObjectPointersCompatibility(Sema &S, ExprResult &LHS,
	ExprResult &RHS,
	SourceLocation Loc) {
	// get the pointer types
	QualType LHSTy = LHS.get()->getType();
	QualType RHSTy = RHS.get()->getType();

	// get the "pointed to" types
	QualType lhptee = LHSTy->castAs<PointerType>()->getPointeeType();
	QualType rhptee = RHSTy->castAs<PointerType>()->getPointeeType();

	// ignore qualifiers on void (C99 6.5.15p3, clause 6)
	if (lhptee->isVoidType() && rhptee->isIncompleteOrObjectType()) {
	// Figure out necessary qualifiers (C99 6.5.15p6)
	QualType destPointee
	= S.Context.getQualifiedType(lhptee, rhptee.getQualifiers());
	QualType destType = S.Context.getPointerType(destPointee);
	// Add qualifiers if necessary.
	LHS = S.ImpCastExprToType(LHS.get(), destType, CK_NoOp);
	// Promote to void*.
	RHS = S.ImpCastExprToType(RHS.get(), destType, CK_BitCast);
	return destType;
	}
	if (rhptee->isVoidType() && lhptee->isIncompleteOrObjectType()) {
	QualType destPointee
	= S.Context.getQualifiedType(rhptee, lhptee.getQualifiers());
	QualType destType = S.Context.getPointerType(destPointee);
	// Add qualifiers if necessary.
	RHS = S.ImpCastExprToType(RHS.get(), destType, CK_NoOp);
	// Promote to void*.
	LHS = S.ImpCastExprToType(LHS.get(), destType, CK_BitCast);
	return destType;
	}

	return checkConditionalPointerCompatibility(S, LHS, RHS, Loc);
	}

	/// Return false if the first expression is not an integer and the second
	/// expression is not a pointer, true otherwise.
	static bool checkPointerIntegerMismatch(Sema &S, ExprResult &Int,
	Expr* PointerExpr, SourceLocation Loc,
	bool IsIntFirstExpr) {
	if (!PointerExpr->getType()->isPointerType() \|\|
	!Int.get()->getType()->isIntegerType())
	return false;

	Expr *Expr1 = IsIntFirstExpr ? Int.get() : PointerExpr;
	Expr *Expr2 = IsIntFirstExpr ? PointerExpr : Int.get();

	S.Diag(Loc, diag::ext_typecheck_cond_pointer_integer_mismatch)
	<< Expr1->getType() << Expr2->getType()
	<< Expr1->getSourceRange() << Expr2->getSourceRange();
	Int = S.ImpCastExprToType(Int.get(), PointerExpr->getType(),
	CK_IntegralToPointer);
	return true;
	}

	/// Simple conversion between integer and floating point types.
	///
	/// Used when handling the OpenCL conditional operator where the
	/// condition is a vector while the other operands are scalar.
	///
	/// OpenCL v1.1 s6.3.i and s6.11.6 together require that the scalar
	/// types are either integer or floating type. Between the two
	/// operands, the type with the higher rank is defined as the "result
	/// type". The other operand needs to be promoted to the same type. No
	/// other type promotion is allowed. We cannot use
	/// UsualArithmeticConversions() for this purpose, since it always
	/// promotes promotable types.
	static QualType OpenCLArithmeticConversions(Sema &S, ExprResult &LHS,
	ExprResult &RHS,
	SourceLocation QuestionLoc) {
	LHS = S.DefaultFunctionArrayLvalueConversion(LHS.get());
	if (LHS.isInvalid())
	return QualType();
	RHS = S.DefaultFunctionArrayLvalueConversion(RHS.get());
	if (RHS.isInvalid())
	return QualType();

	// For conversion purposes, we ignore any qualifiers.
	// For example, "const float" and "float" are equivalent.
	QualType LHSType =
	S.Context.getCanonicalType(LHS.get()->getType()).getUnqualifiedType();
	QualType RHSType =
	S.Context.getCanonicalType(RHS.get()->getType()).getUnqualifiedType();

	if (!LHSType->isIntegerType() && !LHSType->isRealFloatingType()) {
	S.Diag(QuestionLoc, diag::err_typecheck_cond_expect_int_float)
	<< LHSType << LHS.get()->getSourceRange();
	return QualType();
	}

	if (!RHSType->isIntegerType() && !RHSType->isRealFloatingType()) {
	S.Diag(QuestionLoc, diag::err_typecheck_cond_expect_int_float)
	<< RHSType << RHS.get()->getSourceRange();
	return QualType();
	}

	// If both types are identical, no conversion is needed.
	if (LHSType == RHSType)
	return LHSType;

	// Now handle "real" floating types (i.e. float, double, long double).
	if (LHSType->isRealFloatingType() \|\| RHSType->isRealFloatingType())
	return handleFloatConversion(S, LHS, RHS, LHSType, RHSType,
	/IsCompAssign = / false);

	// Finally, we have two differing integer types.
	return handleIntegerConversion<doIntegralCast, doIntegralCast>
	(S, LHS, RHS, LHSType, RHSType, /IsCompAssign = / false);
	}

	/// Convert scalar operands to a vector that matches the
	/// condition in length.
	///
	/// Used when handling the OpenCL conditional operator where the
	/// condition is a vector while the other operands are scalar.
	///
	/// We first compute the "result type" for the scalar operands
	/// according to OpenCL v1.1 s6.3.i. Both operands are then converted
	/// into a vector of that type where the length matches the condition
	/// vector type. s6.11.6 requires that the element types of the result
	/// and the condition must have the same number of bits.
	static QualType
	OpenCLConvertScalarsToVectors(Sema &S, ExprResult &LHS, ExprResult &RHS,
	QualType CondTy, SourceLocation QuestionLoc) {
	QualType ResTy = OpenCLArithmeticConversions(S, LHS, RHS, QuestionLoc);
	if (ResTy.isNull()) return QualType();

	const VectorType *CV = CondTy->getAs<VectorType>();
	assert(CV);

	// Determine the vector result type
	unsigned NumElements = CV->getNumElements();
	QualType VectorTy = S.Context.getExtVectorType(ResTy, NumElements);

	// Ensure that all types have the same number of bits
	if (S.Context.getTypeSize(CV->getElementType())
	!= S.Context.getTypeSize(ResTy)) {
	// Since VectorTy is created internally, it does not pretty print
	// with an OpenCL name. Instead, we just print a description.
	std::string EleTyName = ResTy.getUnqualifiedType().getAsString();
	SmallString<64> Str;
	llvm::raw_svector_ostream OS(Str);
	OS << "(vector of " << NumElements << " '" << EleTyName << "' values)";
	S.Diag(QuestionLoc, diag::err_conditional_vector_element_size)
	<< CondTy << OS.str();
	return QualType();
	}

	// Convert operands to the vector result type
	LHS = S.ImpCastExprToType(LHS.get(), VectorTy, CK_VectorSplat);
	RHS = S.ImpCastExprToType(RHS.get(), VectorTy, CK_VectorSplat);

	return VectorTy;
	}

	/// Return false if this is a valid OpenCL condition vector
	static bool checkOpenCLConditionVector(Sema &S, Expr *Cond,
	SourceLocation QuestionLoc) {
	// OpenCL v1.1 s6.11.6 says the elements of the vector must be of
	// integral type.
	const VectorType *CondTy = Cond->getType()->getAs<VectorType>();
	assert(CondTy);
	QualType EleTy = CondTy->getElementType();
	if (EleTy->isIntegerType()) return false;

	S.Diag(QuestionLoc, diag::err_typecheck_cond_expect_nonfloat)
	<< Cond->getType() << Cond->getSourceRange();
	return true;
	}

	/// Return false if the vector condition type and the vector
	/// result type are compatible.
	///
	/// OpenCL v1.1 s6.11.6 requires that both vector types have the same
	/// number of elements, and their element types have the same number
	/// of bits.
	static bool checkVectorResult(Sema &S, QualType CondTy, QualType VecResTy,
	SourceLocation QuestionLoc) {
	const VectorType *CV = CondTy->getAs<VectorType>();
	const VectorType *RV = VecResTy->getAs<VectorType>();
	assert(CV && RV);

	if (CV->getNumElements() != RV->getNumElements()) {
	S.Diag(QuestionLoc, diag::err_conditional_vector_size)
	<< CondTy << VecResTy;
	return true;
	}

	QualType CVE = CV->getElementType();
	QualType RVE = RV->getElementType();

	if (S.Context.getTypeSize(CVE) != S.Context.getTypeSize(RVE)) {
	S.Diag(QuestionLoc, diag::err_conditional_vector_element_size)
	<< CondTy << VecResTy;
	return true;
	}

	return false;
	}

	/// Return the resulting type for the conditional operator in
	/// OpenCL (aka "ternary selection operator", OpenCL v1.1
	/// s6.3.i) when the condition is a vector type.
	static QualType
	OpenCLCheckVectorConditional(Sema &S, ExprResult &Cond,
	ExprResult &LHS, ExprResult &RHS,
	SourceLocation QuestionLoc) {
	Cond = S.DefaultFunctionArrayLvalueConversion(Cond.get());
	if (Cond.isInvalid())
	return QualType();
	QualType CondTy = Cond.get()->getType();

	if (checkOpenCLConditionVector(S, Cond.get(), QuestionLoc))
	return QualType();

	// If either operand is a vector then find the vector type of the
	// result as specified in OpenCL v1.1 s6.3.i.
	if (LHS.get()->getType()->isVectorType() \|\|
	RHS.get()->getType()->isVectorType()) {
	bool IsBoolVecLang =
	!S.getLangOpts().OpenCL && !S.getLangOpts().OpenCLCPlusPlus;
	QualType VecResTy =
	S.CheckVectorOperands(LHS, RHS, QuestionLoc,
	/isCompAssign/ false,
	/AllowBothBool/ true,
	/AllowBoolConversions/ false,
	/AllowBooleanOperation/ IsBoolVecLang,
	/ReportInvalid/ true);
	if (VecResTy.isNull())
	return QualType();
	// The result type must match the condition type as specified in
	// OpenCL v1.1 s6.11.6.
	if (checkVectorResult(S, CondTy, VecResTy, QuestionLoc))
	return QualType();
	return VecResTy;
	}

	// Both operands are scalar.
	return OpenCLConvertScalarsToVectors(S, LHS, RHS, CondTy, QuestionLoc);
	}

	/// Return true if the Expr is block type
	static bool checkBlockType(Sema &S, const Expr *E) {
	if (const CallExpr *CE = dyn_cast<CallExpr>(E)) {
	QualType Ty = CE->getCallee()->getType();
	if (Ty->isBlockPointerType()) {
	S.Diag(E->getExprLoc(), diag::err_opencl_ternary_with_block);
	return true;
	}
	}
	return false;
	}

	/// Note that LHS is not null here, even if this is the gnu "x ?: y" extension.
	/// In that case, LHS = cond.
	/// C99 6.5.15
	QualType Sema::CheckConditionalOperands(ExprResult &Cond, ExprResult &LHS,
	ExprResult &RHS, ExprValueKind &VK,
	ExprObjectKind &OK,
	SourceLocation QuestionLoc) {

	ExprResult LHSResult = CheckPlaceholderExpr(LHS.get());
	if (!LHSResult.isUsable()) return QualType();
	LHS = LHSResult;

	ExprResult RHSResult = CheckPlaceholderExpr(RHS.get());
	if (!RHSResult.isUsable()) return QualType();
	RHS = RHSResult;

	// C++ is sufficiently different to merit its own checker.
	if (getLangOpts().CPlusPlus)
	return CXXCheckConditionalOperands(Cond, LHS, RHS, VK, OK, QuestionLoc);

	VK = VK_PRValue;
	OK = OK_Ordinary;

	if (Context.isDependenceAllowed() &&
	(Cond.get()->isTypeDependent() \|\| LHS.get()->isTypeDependent() \|\|
	RHS.get()->isTypeDependent())) {
	assert(!getLangOpts().CPlusPlus);
	assert((Cond.get()->containsErrors() \|\| LHS.get()->containsErrors() \|\|
	RHS.get()->containsErrors()) &&
	"should only occur in error-recovery path.");
	return Context.DependentTy;
	}

	// The OpenCL operator with a vector condition is sufficiently
	// different to merit its own checker.
	if ((getLangOpts().OpenCL && Cond.get()->getType()->isVectorType()) \|\|
	Cond.get()->getType()->isExtVectorType())
	return OpenCLCheckVectorConditional(*this, Cond, LHS, RHS, QuestionLoc);

	// First, check the condition.
	Cond = UsualUnaryConversions(Cond.get());
	if (Cond.isInvalid())
	return QualType();
	if (checkCondition(*this, Cond.get(), QuestionLoc))
	return QualType();

	// Handle vectors.
	if (LHS.get()->getType()->isVectorType() \|\|
	RHS.get()->getType()->isVectorType())
	return CheckVectorOperands(LHS, RHS, QuestionLoc, /isCompAssign/ false,
	/AllowBothBool/ true,
	/AllowBoolConversions/ false,
	/AllowBooleanOperation/ false,
	/ReportInvalid/ true);

	QualType ResTy =
	UsualArithmeticConversions(LHS, RHS, QuestionLoc, ACK_Conditional);
	if (LHS.isInvalid() \|\| RHS.isInvalid())
	return QualType();

	// WebAssembly tables are not allowed as conditional LHS or RHS.
	QualType LHSTy = LHS.get()->getType();
	QualType RHSTy = RHS.get()->getType();
	if (LHSTy->isWebAssemblyTableType() \|\| RHSTy->isWebAssemblyTableType()) {
	Diag(QuestionLoc, diag::err_wasm_table_conditional_expression)
	<< LHS.get()->getSourceRange() << RHS.get()->getSourceRange();
	return QualType();
	}

	// Diagnose attempts to convert between __ibm128, __float128 and long double
	// where such conversions currently can't be handled.
	if (unsupportedTypeConversion(*this, LHSTy, RHSTy)) {
	Diag(QuestionLoc,
	diag::err_typecheck_cond_incompatible_operands) << LHSTy << RHSTy
	<< LHS.get()->getSourceRange() << RHS.get()->getSourceRange();
	return QualType();
	}

	// OpenCL v2.0 s6.12.5 - Blocks cannot be used as expressions of the ternary
	// selection operator (?:).
	if (getLangOpts().OpenCL &&
	((int)checkBlockType(this, LHS.get()) \| (int)checkBlockType(this, RHS.get()))) {
	return QualType();
	}

	// If both operands have arithmetic type, do the usual arithmetic conversions
	// to find a common type: C99 6.5.15p3,5.
	if (LHSTy->isArithmeticType() && RHSTy->isArithmeticType()) {
	// Disallow invalid arithmetic conversions, such as those between bit-
	// precise integers types of different sizes, or between a bit-precise
	// integer and another type.
	if (ResTy.isNull() && (LHSTy->isBitIntType() \|\| RHSTy->isBitIntType())) {
	Diag(QuestionLoc, diag::err_typecheck_cond_incompatible_operands)
	<< LHSTy << RHSTy << LHS.get()->getSourceRange()
	<< RHS.get()->getSourceRange();
	return QualType();
	}

	LHS = ImpCastExprToType(LHS.get(), ResTy, PrepareScalarCast(LHS, ResTy));
	RHS = ImpCastExprToType(RHS.get(), ResTy, PrepareScalarCast(RHS, ResTy));

	return ResTy;
	}

	// If both operands are the same structure or union type, the result is that
	// type.
	if (const RecordType *LHSRT = LHSTy->getAs<RecordType>()) { // C99 6.5.15p3
	if (const RecordType *RHSRT = RHSTy->getAs<RecordType>())
	if (LHSRT->getDecl() == RHSRT->getDecl())
	// "If both the operands have structure or union type, the result has
	// that type." This implies that CV qualifiers are dropped.
	return Context.getCommonSugaredType(LHSTy.getUnqualifiedType(),
	RHSTy.getUnqualifiedType());
	// FIXME: Type of conditional expression must be complete in C mode.
	}

	// C99 6.5.15p5: "If both operands have void type, the result has void type."
	// The following \|\| allows only one side to be void (a GCC-ism).
	if (LHSTy->isVoidType() \|\| RHSTy->isVoidType()) {
	QualType ResTy;
	if (LHSTy->isVoidType() && RHSTy->isVoidType()) {
	ResTy = Context.getCommonSugaredType(LHSTy, RHSTy);
	} else if (RHSTy->isVoidType()) {
	ResTy = RHSTy;
	Diag(RHS.get()->getBeginLoc(), diag::ext_typecheck_cond_one_void)
	<< RHS.get()->getSourceRange();
	} else {
	ResTy = LHSTy;
	Diag(LHS.get()->getBeginLoc(), diag::ext_typecheck_cond_one_void)
	<< LHS.get()->getSourceRange();
	}
	LHS = ImpCastExprToType(LHS.get(), ResTy, CK_ToVoid);
	RHS = ImpCastExprToType(RHS.get(), ResTy, CK_ToVoid);
	return ResTy;
	}

	// C23 6.5.15p7:
	// ... if both the second and third operands have nullptr_t type, the
	// result also has that type.
	if (LHSTy->isNullPtrType() && Context.hasSameType(LHSTy, RHSTy))
	return ResTy;

	// C99 6.5.15p6 - "if one operand is a null pointer constant, the result has
	// the type of the other operand."
	if (!checkConditionalNullPointer(*this, RHS, LHSTy)) return LHSTy;
	if (!checkConditionalNullPointer(*this, LHS, RHSTy)) return RHSTy;

	// All objective-c pointer type analysis is done here.
	QualType compositeType =
	ObjC().FindCompositeObjCPointerType(LHS, RHS, QuestionLoc);
	if (LHS.isInvalid() \|\| RHS.isInvalid())
	return QualType();
	if (!compositeType.isNull())
	return compositeType;


	// Handle block pointer types.
	if (LHSTy->isBlockPointerType() \|\| RHSTy->isBlockPointerType())
	return checkConditionalBlockPointerCompatibility(*this, LHS, RHS,
	QuestionLoc);

	// Check constraints for C object pointers types (C99 6.5.15p3,6).
	if (LHSTy->isPointerType() && RHSTy->isPointerType())
	return checkConditionalObjectPointersCompatibility(*this, LHS, RHS,
	QuestionLoc);

	// GCC compatibility: soften pointer/integer mismatch. Note that
	// null pointers have been filtered out by this point.
	if (checkPointerIntegerMismatch(*this, LHS, RHS.get(), QuestionLoc,
	/IsIntFirstExpr=/true))
	return RHSTy;
	if (checkPointerIntegerMismatch(*this, RHS, LHS.get(), QuestionLoc,
	/IsIntFirstExpr=/false))
	return LHSTy;

	// Emit a better diagnostic if one of the expressions is a null pointer
	// constant and the other is not a pointer type. In this case, the user most
	// likely forgot to take the address of the other expression.
	if (DiagnoseConditionalForNull(LHS.get(), RHS.get(), QuestionLoc))
	return QualType();

	// Finally, if the LHS and RHS types are canonically the same type, we can
	// use the common sugared type.
	if (Context.hasSameType(LHSTy, RHSTy))
	return Context.getCommonSugaredType(LHSTy, RHSTy);

	// Otherwise, the operands are not compatible.
	Diag(QuestionLoc, diag::err_typecheck_cond_incompatible_operands)
	<< LHSTy << RHSTy << LHS.get()->getSourceRange()
	<< RHS.get()->getSourceRange();
	return QualType();
	}

	/// SuggestParentheses - Emit a note with a fixit hint that wraps
	/// ParenRange in parentheses.
	static void SuggestParentheses(Sema &Self, SourceLocation Loc,
	const PartialDiagnostic &Note,
	SourceRange ParenRange) {
	SourceLocation EndLoc = Self.getLocForEndOfToken(ParenRange.getEnd());
	if (ParenRange.getBegin().isFileID() && ParenRange.getEnd().isFileID() &&
	EndLoc.isValid()) {
	Self.Diag(Loc, Note)
	<< FixItHint::CreateInsertion(ParenRange.getBegin(), "(")
	<< FixItHint::CreateInsertion(EndLoc, ")");
	} else {
	// We can't display the parentheses, so just show the bare note.
	Self.Diag(Loc, Note) << ParenRange;
	}
	}

	static bool IsArithmeticOp(BinaryOperatorKind Opc) {
	return BinaryOperator::isAdditiveOp(Opc) \|\|
	BinaryOperator::isMultiplicativeOp(Opc) \|\|
	BinaryOperator::isShiftOp(Opc) \|\| Opc == BO_And \|\| Opc == BO_Or;
	// This only checks for bitwise-or and bitwise-and, but not bitwise-xor and
	// not any of the logical operators. Bitwise-xor is commonly used as a
	// logical-xor because there is no logical-xor operator. The logical
	// operators, including uses of xor, have a high false positive rate for
	// precedence warnings.
	}

	/// IsArithmeticBinaryExpr - Returns true if E is an arithmetic binary
	/// expression, either using a built-in or overloaded operator,
	/// and sets OpCode to the opcode and RHSExprs to the right-hand side
	/// expression.
	static bool IsArithmeticBinaryExpr(const Expr E, BinaryOperatorKind Opcode,
	const Expr **RHSExprs) {
	// Don't strip parenthesis: we should not warn if E is in parenthesis.
	E = E->IgnoreImpCasts();
	E = E->IgnoreConversionOperatorSingleStep();
	E = E->IgnoreImpCasts();
	if (const auto *MTE = dyn_cast<MaterializeTemporaryExpr>(E)) {
	E = MTE->getSubExpr();
	E = E->IgnoreImpCasts();
	}

	// Built-in binary operator.
	if (const auto *OP = dyn_cast<BinaryOperator>(E);
	OP && IsArithmeticOp(OP->getOpcode())) {
	*Opcode = OP->getOpcode();
	*RHSExprs = OP->getRHS();
	return true;
	}

	// Overloaded operator.
	if (const auto *Call = dyn_cast<CXXOperatorCallExpr>(E)) {
	if (Call->getNumArgs() != 2)
	return false;

	// Make sure this is really a binary operator that is safe to pass into
	// BinaryOperator::getOverloadedOpcode(), e.g. it's not a subscript op.
	OverloadedOperatorKind OO = Call->getOperator();
	if (OO < OO_Plus \|\| OO > OO_Arrow \|\|
	OO == OO_PlusPlus \|\| OO == OO_MinusMinus)
	return false;

	BinaryOperatorKind OpKind = BinaryOperator::getOverloadedOpcode(OO);
	if (IsArithmeticOp(OpKind)) {
	*Opcode = OpKind;
	*RHSExprs = Call->getArg(1);
	return true;
	}
	}

	return false;
	}

	/// ExprLooksBoolean - Returns true if E looks boolean, i.e. it has boolean type
	/// or is a logical expression such as (x==y) which has int type, but is
	/// commonly interpreted as boolean.
	static bool ExprLooksBoolean(const Expr *E) {
	E = E->IgnoreParenImpCasts();

	if (E->getType()->isBooleanType())
	return true;
	if (const auto *OP = dyn_cast<BinaryOperator>(E))
	return OP->isComparisonOp() \|\| OP->isLogicalOp();
	if (const auto *OP = dyn_cast<UnaryOperator>(E))
	return OP->getOpcode() == UO_LNot;
	if (E->getType()->isPointerType())
	return true;
	// FIXME: What about overloaded operator calls returning "unspecified boolean
	// type"s (commonly pointer-to-members)?

	return false;
	}

	/// DiagnoseConditionalPrecedence - Emit a warning when a conditional operator
	/// and binary operator are mixed in a way that suggests the programmer assumed
	/// the conditional operator has higher precedence, for example:
	/// "int x = a + someBinaryCondition ? 1 : 2".
	static void DiagnoseConditionalPrecedence(Sema &Self, SourceLocation OpLoc,
	Expr Condition, const Expr LHSExpr,
	const Expr *RHSExpr) {
	BinaryOperatorKind CondOpcode;
	const Expr *CondRHS;

	if (!IsArithmeticBinaryExpr(Condition, &CondOpcode, &CondRHS))
	return;
	if (!ExprLooksBoolean(CondRHS))
	return;

	// The condition is an arithmetic binary expression, with a right-
	// hand side that looks boolean, so warn.

	unsigned DiagID = BinaryOperator::isBitwiseOp(CondOpcode)
	? diag::warn_precedence_bitwise_conditional
	: diag::warn_precedence_conditional;

	Self.Diag(OpLoc, DiagID)
	<< Condition->getSourceRange()
	<< BinaryOperator::getOpcodeStr(CondOpcode);

	SuggestParentheses(
	Self, OpLoc,
	Self.PDiag(diag::note_precedence_silence)
	<< BinaryOperator::getOpcodeStr(CondOpcode),
	SourceRange(Condition->getBeginLoc(), Condition->getEndLoc()));

	SuggestParentheses(Self, OpLoc,
	Self.PDiag(diag::note_precedence_conditional_first),
	SourceRange(CondRHS->getBeginLoc(), RHSExpr->getEndLoc()));
	}

	/// Compute the nullability of a conditional expression.
	static QualType computeConditionalNullability(QualType ResTy, bool IsBin,
	QualType LHSTy, QualType RHSTy,
	ASTContext &Ctx) {
	if (!ResTy->isAnyPointerType())
	return ResTy;

	auto GetNullability = [](QualType Ty) {
	std::optional<NullabilityKind> Kind = Ty->getNullability();
	if (Kind) {
	// For our purposes, treat _Nullable_result as _Nullable.
	if (*Kind == NullabilityKind::NullableResult)
	return NullabilityKind::Nullable;
	return *Kind;
	}
	return NullabilityKind::Unspecified;
	};

	auto LHSKind = GetNullability(LHSTy), RHSKind = GetNullability(RHSTy);
	NullabilityKind MergedKind;

	// Compute nullability of a binary conditional expression.
	if (IsBin) {
	if (LHSKind == NullabilityKind::NonNull)
	MergedKind = NullabilityKind::NonNull;
	else
	MergedKind = RHSKind;
	// Compute nullability of a normal conditional expression.
	} else {
	if (LHSKind == NullabilityKind::Nullable \|\|
	RHSKind == NullabilityKind::Nullable)
	MergedKind = NullabilityKind::Nullable;
	else if (LHSKind == NullabilityKind::NonNull)
	MergedKind = RHSKind;
	else if (RHSKind == NullabilityKind::NonNull)
	MergedKind = LHSKind;
	else
	MergedKind = NullabilityKind::Unspecified;
	}

	// Return if ResTy already has the correct nullability.
	if (GetNullability(ResTy) == MergedKind)
	return ResTy;

	// Strip all nullability from ResTy.
	while (ResTy->getNullability())
	ResTy = ResTy.getSingleStepDesugaredType(Ctx);

	// Create a new AttributedType with the new nullability kind.
	auto NewAttr = AttributedType::getNullabilityAttrKind(MergedKind);
	return Ctx.getAttributedType(NewAttr, ResTy, ResTy);
	}

	ExprResult Sema::ActOnConditionalOp(SourceLocation QuestionLoc,
	SourceLocation ColonLoc,
	Expr CondExpr, Expr LHSExpr,
	Expr *RHSExpr) {
	if (!Context.isDependenceAllowed()) {
	// C cannot handle TypoExpr nodes in the condition because it
	// doesn't handle dependent types properly, so make sure any TypoExprs have
	// been dealt with before checking the operands.
	ExprResult CondResult = CorrectDelayedTyposInExpr(CondExpr);
	ExprResult LHSResult = CorrectDelayedTyposInExpr(LHSExpr);
	ExprResult RHSResult = CorrectDelayedTyposInExpr(RHSExpr);

	if (!CondResult.isUsable())
	return ExprError();

	if (LHSExpr) {
	if (!LHSResult.isUsable())
	return ExprError();
	}

	if (!RHSResult.isUsable())
	return ExprError();

	CondExpr = CondResult.get();
	LHSExpr = LHSResult.get();
	RHSExpr = RHSResult.get();
	}

	// If this is the gnu "x ?: y" extension, analyze the types as though the LHS
	// was the condition.
	OpaqueValueExpr *opaqueValue = nullptr;
	Expr *commonExpr = nullptr;
	if (!LHSExpr) {
	commonExpr = CondExpr;
	// Lower out placeholder types first. This is important so that we don't
	// try to capture a placeholder. This happens in few cases in C++; such
	// as Objective-C++'s dictionary subscripting syntax.
	if (commonExpr->hasPlaceholderType()) {
	ExprResult result = CheckPlaceholderExpr(commonExpr);
	if (!result.isUsable()) return ExprError();
	commonExpr = result.get();
	}
	// We usually want to apply unary conversions before saving, except
	// in the special case of a C++ l-value conditional.
	if (!(getLangOpts().CPlusPlus
	&& !commonExpr->isTypeDependent()
	&& commonExpr->getValueKind() == RHSExpr->getValueKind()
	&& commonExpr->isGLValue()
	&& commonExpr->isOrdinaryOrBitFieldObject()
	&& RHSExpr->isOrdinaryOrBitFieldObject()
	&& Context.hasSameType(commonExpr->getType(), RHSExpr->getType()))) {
	ExprResult commonRes = UsualUnaryConversions(commonExpr);
	if (commonRes.isInvalid())
	return ExprError();
	commonExpr = commonRes.get();
	}

	// If the common expression is a class or array prvalue, materialize it
	// so that we can safely refer to it multiple times.
	if (commonExpr->isPRValue() && (commonExpr->getType()->isRecordType() \|\|
	commonExpr->getType()->isArrayType())) {
	ExprResult MatExpr = TemporaryMaterializationConversion(commonExpr);
	if (MatExpr.isInvalid())
	return ExprError();
	commonExpr = MatExpr.get();
	}

	opaqueValue = new (Context) OpaqueValueExpr(commonExpr->getExprLoc(),
	commonExpr->getType(),
	commonExpr->getValueKind(),
	commonExpr->getObjectKind(),
	commonExpr);
	LHSExpr = CondExpr = opaqueValue;
	}

	QualType LHSTy = LHSExpr->getType(), RHSTy = RHSExpr->getType();
	ExprValueKind VK = VK_PRValue;
	ExprObjectKind OK = OK_Ordinary;
	ExprResult Cond = CondExpr, LHS = LHSExpr, RHS = RHSExpr;
	QualType result = CheckConditionalOperands(Cond, LHS, RHS,
	VK, OK, QuestionLoc);
	if (result.isNull() \|\| Cond.isInvalid() \|\| LHS.isInvalid() \|\|
	RHS.isInvalid())
	return ExprError();

	DiagnoseConditionalPrecedence(*this, QuestionLoc, Cond.get(), LHS.get(),
	RHS.get());

	CheckBoolLikeConversion(Cond.get(), QuestionLoc);

	result = computeConditionalNullability(result, commonExpr, LHSTy, RHSTy,
	Context);

	if (!commonExpr)
	return new (Context)
	ConditionalOperator(Cond.get(), QuestionLoc, LHS.get(), ColonLoc,
	RHS.get(), result, VK, OK);

	return new (Context) BinaryConditionalOperator(
	commonExpr, opaqueValue, Cond.get(), LHS.get(), RHS.get(), QuestionLoc,
	ColonLoc, result, VK, OK);
	}

	bool Sema::IsInvalidSMECallConversion(QualType FromType, QualType ToType) {
	unsigned FromAttributes = 0, ToAttributes = 0;
	if (const auto *FromFn =
	dyn_cast<FunctionProtoType>(Context.getCanonicalType(FromType)))
	FromAttributes =
	FromFn->getAArch64SMEAttributes() & FunctionType::SME_AttributeMask;
	if (const auto *ToFn =
	dyn_cast<FunctionProtoType>(Context.getCanonicalType(ToType)))
	ToAttributes =
	ToFn->getAArch64SMEAttributes() & FunctionType::SME_AttributeMask;

	return FromAttributes != ToAttributes;
	}

	// Check if we have a conversion between incompatible cmse function pointer
	// types, that is, a conversion between a function pointer with the
	// cmse_nonsecure_call attribute and one without.
	static bool IsInvalidCmseNSCallConversion(Sema &S, QualType FromType,
	QualType ToType) {
	if (const auto *ToFn =
	dyn_cast<FunctionType>(S.Context.getCanonicalType(ToType))) {
	if (const auto *FromFn =
	dyn_cast<FunctionType>(S.Context.getCanonicalType(FromType))) {
	FunctionType::ExtInfo ToEInfo = ToFn->getExtInfo();
	FunctionType::ExtInfo FromEInfo = FromFn->getExtInfo();

	return ToEInfo.getCmseNSCall() != FromEInfo.getCmseNSCall();
	}
	}
	return false;
	}

	// checkPointerTypesForAssignment - This is a very tricky routine (despite
	// being closely modeled after the C99 spec:-). The odd characteristic of this
	// routine is it effectively iqnores the qualifiers on the top level pointee.
	// This circumvents the usual type rules specified in 6.2.7p1 & 6.7.5.[1-3].
	// FIXME: add a couple examples in this comment.
	static Sema::AssignConvertType
	checkPointerTypesForAssignment(Sema &S, QualType LHSType, QualType RHSType,
	SourceLocation Loc) {
	assert(LHSType.isCanonical() && "LHS not canonicalized!");
	assert(RHSType.isCanonical() && "RHS not canonicalized!");

	// get the "pointed to" type (ignoring qualifiers at the top level)
	const Type lhptee, rhptee;
	Qualifiers lhq, rhq;
	std::tie(lhptee, lhq) =
	cast<PointerType>(LHSType)->getPointeeType().split().asPair();
	std::tie(rhptee, rhq) =
	cast<PointerType>(RHSType)->getPointeeType().split().asPair();

	Sema::AssignConvertType ConvTy = Sema::Compatible;

	// C99 6.5.16.1p1: This following citation is common to constraints
	// 3 & 4 (below). ...and the type pointed to by the left has all the
	// qualifiers of the type pointed to by the right;

	// As a special case, 'non-__weak A ' -> 'non-__weak const ' is okay.
	if (lhq.getObjCLifetime() != rhq.getObjCLifetime() &&
	lhq.compatiblyIncludesObjCLifetime(rhq)) {
	// Ignore lifetime for further calculation.
	lhq.removeObjCLifetime();
	rhq.removeObjCLifetime();
	}

	if (!lhq.compatiblyIncludes(rhq)) {
	// Treat address-space mismatches as fatal.
	if (!lhq.isAddressSpaceSupersetOf(rhq))
	return Sema::IncompatiblePointerDiscardsQualifiers;

	// It's okay to add or remove GC or lifetime qualifiers when converting to
	// and from void*.
	else if (lhq.withoutObjCGCAttr().withoutObjCLifetime()
	.compatiblyIncludes(
	rhq.withoutObjCGCAttr().withoutObjCLifetime())
	&& (lhptee->isVoidType() \|\| rhptee->isVoidType()))
	; // keep old

	// Treat lifetime mismatches as fatal.
	else if (lhq.getObjCLifetime() != rhq.getObjCLifetime())
	ConvTy = Sema::IncompatiblePointerDiscardsQualifiers;

	// For GCC/MS compatibility, other qualifier mismatches are treated
	// as still compatible in C.
	else ConvTy = Sema::CompatiblePointerDiscardsQualifiers;
	}

	// C99 6.5.16.1p1 (constraint 4): If one operand is a pointer to an object or
	// incomplete type and the other is a pointer to a qualified or unqualified
	// version of void...
	if (lhptee->isVoidType()) {
	if (rhptee->isIncompleteOrObjectType())
	return ConvTy;

	// As an extension, we allow cast to/from void* to function pointer.
	assert(rhptee->isFunctionType());
	return Sema::FunctionVoidPointer;
	}

	if (rhptee->isVoidType()) {
	if (lhptee->isIncompleteOrObjectType())
	return ConvTy;

	// As an extension, we allow cast to/from void* to function pointer.
	assert(lhptee->isFunctionType());
	return Sema::FunctionVoidPointer;
	}

	if (!S.Diags.isIgnored(
	diag::warn_typecheck_convert_incompatible_function_pointer_strict,
	Loc) &&
	RHSType->isFunctionPointerType() && LHSType->isFunctionPointerType() &&
	!S.IsFunctionConversion(RHSType, LHSType, RHSType))
	return Sema::IncompatibleFunctionPointerStrict;

	// C99 6.5.16.1p1 (constraint 3): both operands are pointers to qualified or
	// unqualified versions of compatible types, ...
	QualType ltrans = QualType(lhptee, 0), rtrans = QualType(rhptee, 0);
	if (!S.Context.typesAreCompatible(ltrans, rtrans)) {
	// Check if the pointee types are compatible ignoring the sign.
	// We explicitly check for char so that we catch "char" vs
	// "unsigned char" on systems where "char" is unsigned.
	if (lhptee->isCharType())
	ltrans = S.Context.UnsignedCharTy;
	else if (lhptee->hasSignedIntegerRepresentation())
	ltrans = S.Context.getCorrespondingUnsignedType(ltrans);

	if (rhptee->isCharType())
	rtrans = S.Context.UnsignedCharTy;
	else if (rhptee->hasSignedIntegerRepresentation())
	rtrans = S.Context.getCorrespondingUnsignedType(rtrans);

	if (ltrans == rtrans) {
	// Types are compatible ignoring the sign. Qualifier incompatibility
	// takes priority over sign incompatibility because the sign
	// warning can be disabled.
	if (ConvTy != Sema::Compatible)
	return ConvTy;

	return Sema::IncompatiblePointerSign;
	}

	// If we are a multi-level pointer, it's possible that our issue is simply
	// one of qualification - e.g. char -> const char is not allowed. If
	// the eventual target type is the same and the pointers have the same
	// level of indirection, this must be the issue.
	if (isa<PointerType>(lhptee) && isa<PointerType>(rhptee)) {
	do {
	std::tie(lhptee, lhq) =
	cast<PointerType>(lhptee)->getPointeeType().split().asPair();
	std::tie(rhptee, rhq) =
	cast<PointerType>(rhptee)->getPointeeType().split().asPair();

	// Inconsistent address spaces at this point is invalid, even if the
	// address spaces would be compatible.
	// FIXME: This doesn't catch address space mismatches for pointers of
	// different nesting levels, like:
	// __local int *** a;
	// int ** b = a;
	// It's not clear how to actually determine when such pointers are
	// invalidly incompatible.
	if (lhq.getAddressSpace() != rhq.getAddressSpace())
	return Sema::IncompatibleNestedPointerAddressSpaceMismatch;

	} while (isa<PointerType>(lhptee) && isa<PointerType>(rhptee));

	if (lhptee == rhptee)
	return Sema::IncompatibleNestedPointerQualifiers;
	}

	// General pointer incompatibility takes priority over qualifiers.
	if (RHSType->isFunctionPointerType() && LHSType->isFunctionPointerType())
	return Sema::IncompatibleFunctionPointer;
	return Sema::IncompatiblePointer;
	}
	if (!S.getLangOpts().CPlusPlus &&
	S.IsFunctionConversion(ltrans, rtrans, ltrans))
	return Sema::IncompatibleFunctionPointer;
	if (IsInvalidCmseNSCallConversion(S, ltrans, rtrans))
	return Sema::IncompatibleFunctionPointer;
	if (S.IsInvalidSMECallConversion(rtrans, ltrans))
	return Sema::IncompatibleFunctionPointer;
	return ConvTy;
	}

	/// checkBlockPointerTypesForAssignment - This routine determines whether two
	/// block pointer types are compatible or whether a block and normal pointer
	/// are compatible. It is more restrict than comparing two function pointer
	// types.
	static Sema::AssignConvertType
	checkBlockPointerTypesForAssignment(Sema &S, QualType LHSType,
	QualType RHSType) {
	assert(LHSType.isCanonical() && "LHS not canonicalized!");
	assert(RHSType.isCanonical() && "RHS not canonicalized!");

	QualType lhptee, rhptee;

	// get the "pointed to" type (ignoring qualifiers at the top level)
	lhptee = cast<BlockPointerType>(LHSType)->getPointeeType();
	rhptee = cast<BlockPointerType>(RHSType)->getPointeeType();

	// In C++, the types have to match exactly.
	if (S.getLangOpts().CPlusPlus)
	return Sema::IncompatibleBlockPointer;

	Sema::AssignConvertType ConvTy = Sema::Compatible;

	// For blocks we enforce that qualifiers are identical.
	Qualifiers LQuals = lhptee.getLocalQualifiers();
	Qualifiers RQuals = rhptee.getLocalQualifiers();
	if (S.getLangOpts().OpenCL) {
	LQuals.removeAddressSpace();
	RQuals.removeAddressSpace();
	}
	if (LQuals != RQuals)
	ConvTy = Sema::CompatiblePointerDiscardsQualifiers;

	// FIXME: OpenCL doesn't define the exact compile time semantics for a block
	// assignment.
	// The current behavior is similar to C++ lambdas. A block might be
	// assigned to a variable iff its return type and parameters are compatible
	// (C99 6.2.7) with the corresponding return type and parameters of the LHS of
	// an assignment. Presumably it should behave in way that a function pointer
	// assignment does in C, so for each parameter and return type:
	// * CVR and address space of LHS should be a superset of CVR and address
	// space of RHS.
	// * unqualified types should be compatible.
	if (S.getLangOpts().OpenCL) {
	if (!S.Context.typesAreBlockPointerCompatible(
	S.Context.getQualifiedType(LHSType.getUnqualifiedType(), LQuals),
	S.Context.getQualifiedType(RHSType.getUnqualifiedType(), RQuals)))
	return Sema::IncompatibleBlockPointer;
	} else if (!S.Context.typesAreBlockPointerCompatible(LHSType, RHSType))
	return Sema::IncompatibleBlockPointer;

	return ConvTy;
	}

	/// checkObjCPointerTypesForAssignment - Compares two objective-c pointer types
	/// for assignment compatibility.
	static Sema::AssignConvertType
	checkObjCPointerTypesForAssignment(Sema &S, QualType LHSType,
	QualType RHSType) {
	assert(LHSType.isCanonical() && "LHS was not canonicalized!");
	assert(RHSType.isCanonical() && "RHS was not canonicalized!");

	if (LHSType->isObjCBuiltinType()) {
	// Class is not compatible with ObjC object pointers.
	if (LHSType->isObjCClassType() && !RHSType->isObjCBuiltinType() &&
	!RHSType->isObjCQualifiedClassType())
	return Sema::IncompatiblePointer;
	return Sema::Compatible;
	}
	if (RHSType->isObjCBuiltinType()) {
	if (RHSType->isObjCClassType() && !LHSType->isObjCBuiltinType() &&
	!LHSType->isObjCQualifiedClassType())
	return Sema::IncompatiblePointer;
	return Sema::Compatible;
	}
	QualType lhptee = LHSType->castAs<ObjCObjectPointerType>()->getPointeeType();
	QualType rhptee = RHSType->castAs<ObjCObjectPointerType>()->getPointeeType();

	if (!lhptee.isAtLeastAsQualifiedAs(rhptee) &&
	// make an exception for id<P>
	!LHSType->isObjCQualifiedIdType())
	return Sema::CompatiblePointerDiscardsQualifiers;

	if (S.Context.typesAreCompatible(LHSType, RHSType))
	return Sema::Compatible;
	if (LHSType->isObjCQualifiedIdType() \|\| RHSType->isObjCQualifiedIdType())
	return Sema::IncompatibleObjCQualifiedId;
	return Sema::IncompatiblePointer;
	}

	Sema::AssignConvertType
	Sema::CheckAssignmentConstraints(SourceLocation Loc,
	QualType LHSType, QualType RHSType) {
	// Fake up an opaque expression. We don't actually care about what
	// cast operations are required, so if CheckAssignmentConstraints
	// adds casts to this they'll be wasted, but fortunately that doesn't
	// usually happen on valid code.
	OpaqueValueExpr RHSExpr(Loc, RHSType, VK_PRValue);
	ExprResult RHSPtr = &RHSExpr;
	CastKind K;

	return CheckAssignmentConstraints(LHSType, RHSPtr, K, /ConvertRHS=/false);
	}

	/// This helper function returns true if QT is a vector type that has element
	/// type ElementType.
	static bool isVector(QualType QT, QualType ElementType) {
	if (const VectorType *VT = QT->getAs<VectorType>())
	return VT->getElementType().getCanonicalType() == ElementType;
	return false;
	}

	/// CheckAssignmentConstraints (C99 6.5.16) - This routine currently
	/// has code to accommodate several GCC extensions when type checking
	/// pointers. Here are some objectionable examples that GCC considers warnings:
	///
	/// int a, *pint;
	/// short *pshort;
	/// struct foo *pfoo;
	///
	/// pint = pshort; // warning: assignment from incompatible pointer type
	/// a = pint; // warning: assignment makes integer from pointer without a cast
	/// pint = a; // warning: assignment makes pointer from integer without a cast
	/// pint = pfoo; // warning: assignment from incompatible pointer type
	///
	/// As a result, the code for dealing with pointers is more complex than the
	/// C99 spec dictates.
	///
	/// Sets 'Kind' for any result kind except Incompatible.
	Sema::AssignConvertType
	Sema::CheckAssignmentConstraints(QualType LHSType, ExprResult &RHS,
	CastKind &Kind, bool ConvertRHS) {
	QualType RHSType = RHS.get()->getType();
	QualType OrigLHSType = LHSType;

	// Get canonical types. We're not formatting these types, just comparing
	// them.
	LHSType = Context.getCanonicalType(LHSType).getUnqualifiedType();
	RHSType = Context.getCanonicalType(RHSType).getUnqualifiedType();

	// Common case: no conversion required.
	if (LHSType == RHSType) {
	Kind = CK_NoOp;
	return Compatible;
	}

	// If the LHS has an __auto_type, there are no additional type constraints
	// to be worried about.
	if (const auto *AT = dyn_cast<AutoType>(LHSType)) {
	if (AT->isGNUAutoType()) {
	Kind = CK_NoOp;
	return Compatible;
	}
	}

	// If we have an atomic type, try a non-atomic assignment, then just add an
	// atomic qualification step.
	if (const AtomicType *AtomicTy = dyn_cast<AtomicType>(LHSType)) {
	Sema::AssignConvertType result =
	CheckAssignmentConstraints(AtomicTy->getValueType(), RHS, Kind);
	if (result != Compatible)
	return result;
	if (Kind != CK_NoOp && ConvertRHS)
	RHS = ImpCastExprToType(RHS.get(), AtomicTy->getValueType(), Kind);
	Kind = CK_NonAtomicToAtomic;
	return Compatible;
	}

	// If the left-hand side is a reference type, then we are in a
	// (rare!) case where we've allowed the use of references in C,
	// e.g., as a parameter type in a built-in function. In this case,
	// just make sure that the type referenced is compatible with the
	// right-hand side type. The caller is responsible for adjusting
	// LHSType so that the resulting expression does not have reference
	// type.
	if (const ReferenceType *LHSTypeRef = LHSType->getAs<ReferenceType>()) {
	if (Context.typesAreCompatible(LHSTypeRef->getPointeeType(), RHSType)) {
	Kind = CK_LValueBitCast;
	return Compatible;
	}
	return Incompatible;
	}

	// Allow scalar to ExtVector assignments, and assignments of an ExtVector type
	// to the same ExtVector type.
	if (LHSType->isExtVectorType()) {
	if (RHSType->isExtVectorType())
	return Incompatible;
	if (RHSType->isArithmeticType()) {
	// CK_VectorSplat does T -> vector T, so first cast to the element type.
	if (ConvertRHS)
	RHS = prepareVectorSplat(LHSType, RHS.get());
	Kind = CK_VectorSplat;
	return Compatible;
	}
	}

	// Conversions to or from vector type.
	if (LHSType->isVectorType() \|\| RHSType->isVectorType()) {
	if (LHSType->isVectorType() && RHSType->isVectorType()) {
	// Allow assignments of an AltiVec vector type to an equivalent GCC
	// vector type and vice versa
	if (Context.areCompatibleVectorTypes(LHSType, RHSType)) {
	Kind = CK_BitCast;
	return Compatible;
	}

	// If we are allowing lax vector conversions, and LHS and RHS are both
	// vectors, the total size only needs to be the same. This is a bitcast;
	// no bits are changed but the result type is different.
	if (isLaxVectorConversion(RHSType, LHSType)) {
	// The default for lax vector conversions with Altivec vectors will
	// change, so if we are converting between vector types where
	// at least one is an Altivec vector, emit a warning.
	if (Context.getTargetInfo().getTriple().isPPC() &&
	anyAltivecTypes(RHSType, LHSType) &&
	!Context.areCompatibleVectorTypes(RHSType, LHSType))
	Diag(RHS.get()->getExprLoc(), diag::warn_deprecated_lax_vec_conv_all)
	<< RHSType << LHSType;
	Kind = CK_BitCast;
	return IncompatibleVectors;
	}
	}

	// When the RHS comes from another lax conversion (e.g. binops between
	// scalars and vectors) the result is canonicalized as a vector. When the
	// LHS is also a vector, the lax is allowed by the condition above. Handle
	// the case where LHS is a scalar.
	if (LHSType->isScalarType()) {
	const VectorType *VecType = RHSType->getAs<VectorType>();
	if (VecType && VecType->getNumElements() == 1 &&
	isLaxVectorConversion(RHSType, LHSType)) {
	if (Context.getTargetInfo().getTriple().isPPC() &&
	(VecType->getVectorKind() == VectorKind::AltiVecVector \|\|
	VecType->getVectorKind() == VectorKind::AltiVecBool \|\|
	VecType->getVectorKind() == VectorKind::AltiVecPixel))
	Diag(RHS.get()->getExprLoc(), diag::warn_deprecated_lax_vec_conv_all)
	<< RHSType << LHSType;
	ExprResult *VecExpr = &RHS;
	*VecExpr = ImpCastExprToType(VecExpr->get(), LHSType, CK_BitCast);
	Kind = CK_BitCast;
	return Compatible;
	}
	}

	// Allow assignments between fixed-length and sizeless SVE vectors.
	if ((LHSType->isSVESizelessBuiltinType() && RHSType->isVectorType()) \|\|
	(LHSType->isVectorType() && RHSType->isSVESizelessBuiltinType()))
	if (Context.areCompatibleSveTypes(LHSType, RHSType) \|\|
	Context.areLaxCompatibleSveTypes(LHSType, RHSType)) {
	Kind = CK_BitCast;
	return Compatible;
	}

	// Allow assignments between fixed-length and sizeless RVV vectors.
	if ((LHSType->isRVVSizelessBuiltinType() && RHSType->isVectorType()) \|\|
	(LHSType->isVectorType() && RHSType->isRVVSizelessBuiltinType())) {
	if (Context.areCompatibleRVVTypes(LHSType, RHSType) \|\|
	Context.areLaxCompatibleRVVTypes(LHSType, RHSType)) {
	Kind = CK_BitCast;
	return Compatible;
	}
	}

	return Incompatible;
	}

	// Diagnose attempts to convert between __ibm128, __float128 and long double
	// where such conversions currently can't be handled.
	if (unsupportedTypeConversion(*this, LHSType, RHSType))
	return Incompatible;

	// Disallow assigning a _Complex to a real type in C++ mode since it simply
	// discards the imaginary part.
	if (getLangOpts().CPlusPlus && RHSType->getAs<ComplexType>() &&
	!LHSType->getAs<ComplexType>())
	return Incompatible;

	// Arithmetic conversions.
	if (LHSType->isArithmeticType() && RHSType->isArithmeticType() &&
	!(getLangOpts().CPlusPlus && LHSType->isEnumeralType())) {
	if (ConvertRHS)
	Kind = PrepareScalarCast(RHS, LHSType);
	return Compatible;
	}

	// Conversions to normal pointers.
	if (const PointerType *LHSPointer = dyn_cast<PointerType>(LHSType)) {
	// U* -> T*
	if (isa<PointerType>(RHSType)) {
	LangAS AddrSpaceL = LHSPointer->getPointeeType().getAddressSpace();
	LangAS AddrSpaceR = RHSType->getPointeeType().getAddressSpace();
	if (AddrSpaceL != AddrSpaceR)
	Kind = CK_AddressSpaceConversion;
	else if (Context.hasCvrSimilarType(RHSType, LHSType))
	Kind = CK_NoOp;
	else
	Kind = CK_BitCast;
	return checkPointerTypesForAssignment(*this, LHSType, RHSType,
	RHS.get()->getBeginLoc());
	}

	// int -> T*
	if (RHSType->isIntegerType()) {
	Kind = CK_IntegralToPointer; // FIXME: null?
	return IntToPointer;
	}

	// C pointers are not compatible with ObjC object pointers,
	// with two exceptions:
	if (isa<ObjCObjectPointerType>(RHSType)) {
	// - conversions to void*
	if (LHSPointer->getPointeeType()->isVoidType()) {
	Kind = CK_BitCast;
	return Compatible;
	}

	// - conversions from 'Class' to the redefinition type
	if (RHSType->isObjCClassType() &&
	Context.hasSameType(LHSType,
	Context.getObjCClassRedefinitionType())) {
	Kind = CK_BitCast;
	return Compatible;
	}

	Kind = CK_BitCast;
	return IncompatiblePointer;
	}

	// U^ -> void*
	if (RHSType->getAs<BlockPointerType>()) {
	if (LHSPointer->getPointeeType()->isVoidType()) {
	LangAS AddrSpaceL = LHSPointer->getPointeeType().getAddressSpace();
	LangAS AddrSpaceR = RHSType->getAs<BlockPointerType>()
	->getPointeeType()
	.getAddressSpace();
	Kind =
	AddrSpaceL != AddrSpaceR ? CK_AddressSpaceConversion : CK_BitCast;
	return Compatible;
	}
	}

	return Incompatible;
	}

	// Conversions to block pointers.
	if (isa<BlockPointerType>(LHSType)) {
	// U^ -> T^
	if (RHSType->isBlockPointerType()) {
	LangAS AddrSpaceL = LHSType->getAs<BlockPointerType>()
	->getPointeeType()
	.getAddressSpace();
	LangAS AddrSpaceR = RHSType->getAs<BlockPointerType>()
	->getPointeeType()
	.getAddressSpace();
	Kind = AddrSpaceL != AddrSpaceR ? CK_AddressSpaceConversion : CK_BitCast;
	return checkBlockPointerTypesForAssignment(*this, LHSType, RHSType);
	}

	// int or null -> T^
	if (RHSType->isIntegerType()) {
	Kind = CK_IntegralToPointer; // FIXME: null
	return IntToBlockPointer;
	}

	// id -> T^
	if (getLangOpts().ObjC && RHSType->isObjCIdType()) {
	Kind = CK_AnyPointerToBlockPointerCast;
	return Compatible;
	}

	// void* -> T^
	if (const PointerType *RHSPT = RHSType->getAs<PointerType>())
	if (RHSPT->getPointeeType()->isVoidType()) {
	Kind = CK_AnyPointerToBlockPointerCast;
	return Compatible;
	}

	return Incompatible;
	}

	// Conversions to Objective-C pointers.
	if (isa<ObjCObjectPointerType>(LHSType)) {
	// A* -> B*
	if (RHSType->isObjCObjectPointerType()) {
	Kind = CK_BitCast;
	Sema::AssignConvertType result =
	checkObjCPointerTypesForAssignment(*this, LHSType, RHSType);
	if (getLangOpts().allowsNonTrivialObjCLifetimeQualifiers() &&
	result == Compatible &&
	!ObjC().CheckObjCARCUnavailableWeakConversion(OrigLHSType, RHSType))
	result = IncompatibleObjCWeakRef;
	return result;
	}

	// int or null -> A*
	if (RHSType->isIntegerType()) {
	Kind = CK_IntegralToPointer; // FIXME: null
	return IntToPointer;
	}

	// In general, C pointers are not compatible with ObjC object pointers,
	// with two exceptions:
	if (isa<PointerType>(RHSType)) {
	Kind = CK_CPointerToObjCPointerCast;

	// - conversions from 'void*'
	if (RHSType->isVoidPointerType()) {
	return Compatible;
	}

	// - conversions to 'Class' from its redefinition type
	if (LHSType->isObjCClassType() &&
	Context.hasSameType(RHSType,
	Context.getObjCClassRedefinitionType())) {
	return Compatible;
	}

	return IncompatiblePointer;
	}

	// Only under strict condition T^ is compatible with an Objective-C pointer.
	if (RHSType->isBlockPointerType() &&
	LHSType->isBlockCompatibleObjCPointerType(Context)) {
	if (ConvertRHS)
	maybeExtendBlockObject(RHS);
	Kind = CK_BlockPointerToObjCPointerCast;
	return Compatible;
	}

	return Incompatible;
	}

	// Conversion to nullptr_t (C23 only)
	if (getLangOpts().C23 && LHSType->isNullPtrType() &&
	RHS.get()->isNullPointerConstant(Context,
	Expr::NPC_ValueDependentIsNull)) {
	// null -> nullptr_t
	Kind = CK_NullToPointer;
	return Compatible;
	}

	// Conversions from pointers that are not covered by the above.
	if (isa<PointerType>(RHSType)) {
	// T* -> _Bool
	if (LHSType == Context.BoolTy) {
	Kind = CK_PointerToBoolean;
	return Compatible;
	}

	// T* -> int
	if (LHSType->isIntegerType()) {
	Kind = CK_PointerToIntegral;
	return PointerToInt;
	}

	return Incompatible;
	}

	// Conversions from Objective-C pointers that are not covered by the above.
	if (isa<ObjCObjectPointerType>(RHSType)) {
	// T* -> _Bool
	if (LHSType == Context.BoolTy) {
	Kind = CK_PointerToBoolean;
	return Compatible;
	}

	// T* -> int
	if (LHSType->isIntegerType()) {
	Kind = CK_PointerToIntegral;
	return PointerToInt;
	}

	return Incompatible;
	}

	// struct A -> struct B
	if (isa<TagType>(LHSType) && isa<TagType>(RHSType)) {
	if (Context.typesAreCompatible(LHSType, RHSType)) {
	Kind = CK_NoOp;
	return Compatible;
	}
	}

	if (LHSType->isSamplerT() && RHSType->isIntegerType()) {
	Kind = CK_IntToOCLSampler;
	return Compatible;
	}

	return Incompatible;
	}

	/// Constructs a transparent union from an expression that is
	/// used to initialize the transparent union.
	static void ConstructTransparentUnion(Sema &S, ASTContext &C,
	ExprResult &EResult, QualType UnionType,
	FieldDecl *Field) {
	// Build an initializer list that designates the appropriate member
	// of the transparent union.
	Expr *E = EResult.get();
	InitListExpr *Initializer = new (C) InitListExpr(C, SourceLocation(),
	E, SourceLocation());
	Initializer->setType(UnionType);
	Initializer->setInitializedFieldInUnion(Field);

	// Build a compound literal constructing a value of the transparent
	// union type from this initializer list.
	TypeSourceInfo *unionTInfo = C.getTrivialTypeSourceInfo(UnionType);
	EResult = new (C) CompoundLiteralExpr(SourceLocation(), unionTInfo, UnionType,
	VK_PRValue, Initializer, false);
	}

	Sema::AssignConvertType
	Sema::CheckTransparentUnionArgumentConstraints(QualType ArgType,
	ExprResult &RHS) {
	QualType RHSType = RHS.get()->getType();

	// If the ArgType is a Union type, we want to handle a potential
	// transparent_union GCC extension.
	const RecordType *UT = ArgType->getAsUnionType();
	if (!UT \|\| !UT->getDecl()->hasAttr<TransparentUnionAttr>())
	return Incompatible;

	// The field to initialize within the transparent union.
	RecordDecl *UD = UT->getDecl();
	FieldDecl *InitField = nullptr;
	// It's compatible if the expression matches any of the fields.
	for (auto *it : UD->fields()) {
	if (it->getType()->isPointerType()) {
	// If the transparent union contains a pointer type, we allow:
	// 1) void pointer
	// 2) null pointer constant
	if (RHSType->isPointerType())
	if (RHSType->castAs<PointerType>()->getPointeeType()->isVoidType()) {
	RHS = ImpCastExprToType(RHS.get(), it->getType(), CK_BitCast);
	InitField = it;
	break;
	}

	if (RHS.get()->isNullPointerConstant(Context,
	Expr::NPC_ValueDependentIsNull)) {
	RHS = ImpCastExprToType(RHS.get(), it->getType(),
	CK_NullToPointer);
	InitField = it;
	break;
	}
	}

	CastKind Kind;
	if (CheckAssignmentConstraints(it->getType(), RHS, Kind)
	== Compatible) {
	RHS = ImpCastExprToType(RHS.get(), it->getType(), Kind);
	InitField = it;
	break;
	}
	}

	if (!InitField)
	return Incompatible;

	ConstructTransparentUnion(*this, Context, RHS, ArgType, InitField);
	return Compatible;
	}

	Sema::AssignConvertType
	Sema::CheckSingleAssignmentConstraints(QualType LHSType, ExprResult &CallerRHS,
	bool Diagnose,
	bool DiagnoseCFAudited,
	bool ConvertRHS) {
	// We need to be able to tell the caller whether we diagnosed a problem, if
	// they ask us to issue diagnostics.
	assert((ConvertRHS \|\| !Diagnose) && "can't indicate whether we diagnosed");

	// If ConvertRHS is false, we want to leave the caller's RHS untouched. Sadly,
	// we can't avoid all modifications at the moment, so we need some somewhere
	// to put the updated value.
	ExprResult LocalRHS = CallerRHS;
	ExprResult &RHS = ConvertRHS ? CallerRHS : LocalRHS;

	if (const auto *LHSPtrType = LHSType->getAs<PointerType>()) {
	if (const auto *RHSPtrType = RHS.get()->getType()->getAs<PointerType>()) {
	if (RHSPtrType->getPointeeType()->hasAttr(attr::NoDeref) &&
	!LHSPtrType->getPointeeType()->hasAttr(attr::NoDeref)) {
	Diag(RHS.get()->getExprLoc(),
	diag::warn_noderef_to_dereferenceable_pointer)
	<< RHS.get()->getSourceRange();
	}
	}
	}

	if (getLangOpts().CPlusPlus) {
	if (!LHSType->isRecordType() && !LHSType->isAtomicType()) {
	// C++ 5.17p3: If the left operand is not of class type, the
	// expression is implicitly converted (C++ 4) to the
	// cv-unqualified type of the left operand.
	QualType RHSType = RHS.get()->getType();
	if (Diagnose) {
	RHS = PerformImplicitConversion(RHS.get(), LHSType.getUnqualifiedType(),
	AA_Assigning);
	} else {
	ImplicitConversionSequence ICS =
	TryImplicitConversion(RHS.get(), LHSType.getUnqualifiedType(),
	/SuppressUserConversions=/false,
	AllowedExplicit::None,
	/InOverloadResolution=/false,
	/CStyle=/false,
	/AllowObjCWritebackConversion=/false);
	if (ICS.isFailure())
	return Incompatible;
	RHS = PerformImplicitConversion(RHS.get(), LHSType.getUnqualifiedType(),
	ICS, AA_Assigning);
	}
	if (RHS.isInvalid())
	return Incompatible;
	Sema::AssignConvertType result = Compatible;
	if (getLangOpts().allowsNonTrivialObjCLifetimeQualifiers() &&
	!ObjC().CheckObjCARCUnavailableWeakConversion(LHSType, RHSType))
	result = IncompatibleObjCWeakRef;
	return result;
	}

	// FIXME: Currently, we fall through and treat C++ classes like C
	// structures.
	// FIXME: We also fall through for atomics; not sure what should
	// happen there, though.
	} else if (RHS.get()->getType() == Context.OverloadTy) {
	// As a set of extensions to C, we support overloading on functions. These
	// functions need to be resolved here.
	DeclAccessPair DAP;
	if (FunctionDecl *FD = ResolveAddressOfOverloadedFunction(
	RHS.get(), LHSType, /Complain=/false, DAP))
	RHS = FixOverloadedFunctionReference(RHS.get(), DAP, FD);
	else
	return Incompatible;
	}

	// This check seems unnatural, however it is necessary to ensure the proper
	// conversion of functions/arrays. If the conversion were done for all
	// DeclExpr's (created by ActOnIdExpression), it would mess up the unary
	// expressions that suppress this implicit conversion (&, sizeof). This needs
	// to happen before we check for null pointer conversions because C does not
	// undergo the same implicit conversions as C++ does above (by the calls to
	// TryImplicitConversion() and PerformImplicitConversion()) which insert the
	// lvalue to rvalue cast before checking for null pointer constraints. This
	// addresses code like: nullptr_t val; int *ptr; ptr = val;
	//
	// Suppress this for references: C++ 8.5.3p5.
	if (!LHSType->isReferenceType()) {
	// FIXME: We potentially allocate here even if ConvertRHS is false.
	RHS = DefaultFunctionArrayLvalueConversion(RHS.get(), Diagnose);
	if (RHS.isInvalid())
	return Incompatible;
	}

	// The constraints are expressed in terms of the atomic, qualified, or
	// unqualified type of the LHS.
	QualType LHSTypeAfterConversion = LHSType.getAtomicUnqualifiedType();

	// C99 6.5.16.1p1: the left operand is a pointer and the right is
	// a null pointer constant <C23>or its type is nullptr_t;</C23>.
	if ((LHSTypeAfterConversion->isPointerType() \|\|
	LHSTypeAfterConversion->isObjCObjectPointerType() \|\|
	LHSTypeAfterConversion->isBlockPointerType()) &&
	((getLangOpts().C23 && RHS.get()->getType()->isNullPtrType()) \|\|
	RHS.get()->isNullPointerConstant(Context,
	Expr::NPC_ValueDependentIsNull))) {
	if (Diagnose \|\| ConvertRHS) {
	CastKind Kind;
	CXXCastPath Path;
	CheckPointerConversion(RHS.get(), LHSType, Kind, Path,
	/IgnoreBaseAccess=/false, Diagnose);
	if (ConvertRHS)
	RHS = ImpCastExprToType(RHS.get(), LHSType, Kind, VK_PRValue, &Path);
	}
	return Compatible;
	}
	// C23 6.5.16.1p1: the left operand has type atomic, qualified, or
	// unqualified bool, and the right operand is a pointer or its type is
	// nullptr_t.
	if (getLangOpts().C23 && LHSType->isBooleanType() &&
	RHS.get()->getType()->isNullPtrType()) {
	// NB: T* -> _Bool is handled in CheckAssignmentConstraints, this only
	// only handles nullptr -> _Bool due to needing an extra conversion
	// step.
	// We model this by converting from nullptr -> void * and then let the
	// conversion from void * -> _Bool happen naturally.
	if (Diagnose \|\| ConvertRHS) {
	CastKind Kind;
	CXXCastPath Path;
	CheckPointerConversion(RHS.get(), Context.VoidPtrTy, Kind, Path,
	/IgnoreBaseAccess=/false, Diagnose);
	if (ConvertRHS)
	RHS = ImpCastExprToType(RHS.get(), Context.VoidPtrTy, Kind, VK_PRValue,
	&Path);
	}
	}

	// OpenCL queue_t type assignment.
	if (LHSType->isQueueT() && RHS.get()->isNullPointerConstant(
	Context, Expr::NPC_ValueDependentIsNull)) {
	RHS = ImpCastExprToType(RHS.get(), LHSType, CK_NullToPointer);
	return Compatible;
	}

	CastKind Kind;
	Sema::AssignConvertType result =
	CheckAssignmentConstraints(LHSType, RHS, Kind, ConvertRHS);

	// C99 6.5.16.1p2: The value of the right operand is converted to the
	// type of the assignment expression.
	// CheckAssignmentConstraints allows the left-hand side to be a reference,
	// so that we can use references in built-in functions even in C.
	// The getNonReferenceType() call makes sure that the resulting expression
	// does not have reference type.
	if (result != Incompatible && RHS.get()->getType() != LHSType) {
	QualType Ty = LHSType.getNonLValueExprType(Context);
	Expr *E = RHS.get();

	// Check for various Objective-C errors. If we are not reporting
	// diagnostics and just checking for errors, e.g., during overload
	// resolution, return Incompatible to indicate the failure.
	if (getLangOpts().allowsNonTrivialObjCLifetimeQualifiers() &&
	ObjC().CheckObjCConversion(SourceRange(), Ty, E,
	CheckedConversionKind::Implicit, Diagnose,
	DiagnoseCFAudited) != SemaObjC::ACR_okay) {
	if (!Diagnose)
	return Incompatible;
	}
	if (getLangOpts().ObjC &&
	(ObjC().CheckObjCBridgeRelatedConversions(E->getBeginLoc(), LHSType,
	E->getType(), E, Diagnose) \|\|
	ObjC().CheckConversionToObjCLiteral(LHSType, E, Diagnose))) {
	if (!Diagnose)
	return Incompatible;
	// Replace the expression with a corrected version and continue so we
	// can find further errors.
	RHS = E;
	return Compatible;
	}

	if (ConvertRHS)
	RHS = ImpCastExprToType(E, Ty, Kind);
	}

	return result;
	}

	namespace {
	/// The original operand to an operator, prior to the application of the usual
	/// arithmetic conversions and converting the arguments of a builtin operator
	/// candidate.
	struct OriginalOperand {
	explicit OriginalOperand(Expr *Op) : Orig(Op), Conversion(nullptr) {
	if (auto *MTE = dyn_cast<MaterializeTemporaryExpr>(Op))
	Op = MTE->getSubExpr();
	if (auto *BTE = dyn_cast<CXXBindTemporaryExpr>(Op))
	Op = BTE->getSubExpr();
	if (auto *ICE = dyn_cast<ImplicitCastExpr>(Op)) {
	Orig = ICE->getSubExprAsWritten();
	Conversion = ICE->getConversionFunction();
	}
	}

	QualType getType() const { return Orig->getType(); }

	Expr *Orig;
	NamedDecl *Conversion;
	};
	}

	QualType Sema::InvalidOperands(SourceLocation Loc, ExprResult &LHS,
	ExprResult &RHS) {
	OriginalOperand OrigLHS(LHS.get()), OrigRHS(RHS.get());

	Diag(Loc, diag::err_typecheck_invalid_operands)
	<< OrigLHS.getType() << OrigRHS.getType()
	<< LHS.get()->getSourceRange() << RHS.get()->getSourceRange();

	// If a user-defined conversion was applied to either of the operands prior
	// to applying the built-in operator rules, tell the user about it.
	if (OrigLHS.Conversion) {
	Diag(OrigLHS.Conversion->getLocation(),
	diag::note_typecheck_invalid_operands_converted)
	<< 0 << LHS.get()->getType();
	}
	if (OrigRHS.Conversion) {
	Diag(OrigRHS.Conversion->getLocation(),
	diag::note_typecheck_invalid_operands_converted)
	<< 1 << RHS.get()->getType();
	}

	return QualType();
	}

	QualType Sema::InvalidLogicalVectorOperands(SourceLocation Loc, ExprResult &LHS,
	ExprResult &RHS) {
	QualType LHSType = LHS.get()->IgnoreImpCasts()->getType();
	QualType RHSType = RHS.get()->IgnoreImpCasts()->getType();

	bool LHSNatVec = LHSType->isVectorType();
	bool RHSNatVec = RHSType->isVectorType();

	if (!(LHSNatVec && RHSNatVec)) {
	Expr *Vector = LHSNatVec ? LHS.get() : RHS.get();
	Expr *NonVector = !LHSNatVec ? LHS.get() : RHS.get();
	Diag(Loc, diag::err_typecheck_logical_vector_expr_gnu_cpp_restrict)
	<< 0 << Vector->getType() << NonVector->IgnoreImpCasts()->getType()
	<< Vector->getSourceRange();
	return QualType();
	}

	Diag(Loc, diag::err_typecheck_logical_vector_expr_gnu_cpp_restrict)
	<< 1 << LHSType << RHSType << LHS.get()->getSourceRange()
	<< RHS.get()->getSourceRange();

	return QualType();
	}

	/// Try to convert a value of non-vector type to a vector type by converting
	/// the type to the element type of the vector and then performing a splat.
	/// If the language is OpenCL, we only use conversions that promote scalar
	/// rank; for C, Obj-C, and C++ we allow any real scalar conversion except
	/// for float->int.
	///
	/// OpenCL V2.0 6.2.6.p2:
	/// An error shall occur if any scalar operand type has greater rank
	/// than the type of the vector element.
	///
	/// \param scalar - if non-null, actually perform the conversions
	/// \return true if the operation fails (but without diagnosing the failure)
	static bool tryVectorConvertAndSplat(Sema &S, ExprResult *scalar,
	QualType scalarTy,
	QualType vectorEltTy,
	QualType vectorTy,
	unsigned &DiagID) {
	// The conversion to apply to the scalar before splatting it,
	// if necessary.
	CastKind scalarCast = CK_NoOp;

	if (vectorEltTy->isIntegralType(S.Context)) {
	if (S.getLangOpts().OpenCL && (scalarTy->isRealFloatingType() \|\|
	(scalarTy->isIntegerType() &&
	S.Context.getIntegerTypeOrder(vectorEltTy, scalarTy) < 0))) {
	DiagID = diag::err_opencl_scalar_type_rank_greater_than_vector_type;
	return true;
	}
	if (!scalarTy->isIntegralType(S.Context))
	return true;
	scalarCast = CK_IntegralCast;
	} else if (vectorEltTy->isRealFloatingType()) {
	if (scalarTy->isRealFloatingType()) {
	if (S.getLangOpts().OpenCL &&
	S.Context.getFloatingTypeOrder(vectorEltTy, scalarTy) < 0) {
	DiagID = diag::err_opencl_scalar_type_rank_greater_than_vector_type;
	return true;
	}
	scalarCast = CK_FloatingCast;
	}
	else if (scalarTy->isIntegralType(S.Context))
	scalarCast = CK_IntegralToFloating;
	else
	return true;
	} else {
	return true;
	}

	// Adjust scalar if desired.
	if (scalar) {
	if (scalarCast != CK_NoOp)
	*scalar = S.ImpCastExprToType(scalar->get(), vectorEltTy, scalarCast);
	*scalar = S.ImpCastExprToType(scalar->get(), vectorTy, CK_VectorSplat);
	}
	return false;
	}

	/// Convert vector E to a vector with the same number of elements but different
	/// element type.
	static ExprResult convertVector(Expr *E, QualType ElementType, Sema &S) {
	const auto *VecTy = E->getType()->getAs<VectorType>();
	assert(VecTy && "Expression E must be a vector");
	QualType NewVecTy =
	VecTy->isExtVectorType()
	? S.Context.getExtVectorType(ElementType, VecTy->getNumElements())
	: S.Context.getVectorType(ElementType, VecTy->getNumElements(),
	VecTy->getVectorKind());

	// Look through the implicit cast. Return the subexpression if its type is
	// NewVecTy.
	if (auto *ICE = dyn_cast<ImplicitCastExpr>(E))
	if (ICE->getSubExpr()->getType() == NewVecTy)
	return ICE->getSubExpr();

	auto Cast = ElementType->isIntegerType() ? CK_IntegralCast : CK_FloatingCast;
	return S.ImpCastExprToType(E, NewVecTy, Cast);
	}

	/// Test if a (constant) integer Int can be casted to another integer type
	/// IntTy without losing precision.
	static bool canConvertIntToOtherIntTy(Sema &S, ExprResult *Int,
	QualType OtherIntTy) {
	QualType IntTy = Int->get()->getType().getUnqualifiedType();

	// Reject cases where the value of the Int is unknown as that would
	// possibly cause truncation, but accept cases where the scalar can be
	// demoted without loss of precision.
	Expr::EvalResult EVResult;
	bool CstInt = Int->get()->EvaluateAsInt(EVResult, S.Context);
	int Order = S.Context.getIntegerTypeOrder(OtherIntTy, IntTy);
	bool IntSigned = IntTy->hasSignedIntegerRepresentation();
	bool OtherIntSigned = OtherIntTy->hasSignedIntegerRepresentation();

	if (CstInt) {
	// If the scalar is constant and is of a higher order and has more active
	// bits that the vector element type, reject it.
	llvm::APSInt Result = EVResult.Val.getInt();
	unsigned NumBits = IntSigned
	? (Result.isNegative() ? Result.getSignificantBits()
	: Result.getActiveBits())
	: Result.getActiveBits();
	if (Order < 0 && S.Context.getIntWidth(OtherIntTy) < NumBits)
	return true;

	// If the signedness of the scalar type and the vector element type
	// differs and the number of bits is greater than that of the vector
	// element reject it.
	return (IntSigned != OtherIntSigned &&
	NumBits > S.Context.getIntWidth(OtherIntTy));
	}

	// Reject cases where the value of the scalar is not constant and it's
	// order is greater than that of the vector element type.
	return (Order < 0);
	}

	/// Test if a (constant) integer Int can be casted to floating point type
	/// FloatTy without losing precision.
	static bool canConvertIntTyToFloatTy(Sema &S, ExprResult *Int,
	QualType FloatTy) {
	QualType IntTy = Int->get()->getType().getUnqualifiedType();

	// Determine if the integer constant can be expressed as a floating point
	// number of the appropriate type.
	Expr::EvalResult EVResult;
	bool CstInt = Int->get()->EvaluateAsInt(EVResult, S.Context);

	uint64_t Bits = 0;
	if (CstInt) {
	// Reject constants that would be truncated if they were converted to
	// the floating point type. Test by simple to/from conversion.
	// FIXME: Ideally the conversion to an APFloat and from an APFloat
	// could be avoided if there was a convertFromAPInt method
	// which could signal back if implicit truncation occurred.
	llvm::APSInt Result = EVResult.Val.getInt();
	llvm::APFloat Float(S.Context.getFloatTypeSemantics(FloatTy));
	Float.convertFromAPInt(Result, IntTy->hasSignedIntegerRepresentation(),
	llvm::APFloat::rmTowardZero);
	llvm::APSInt ConvertBack(S.Context.getIntWidth(IntTy),
	!IntTy->hasSignedIntegerRepresentation());
	bool Ignored = false;
	Float.convertToInteger(ConvertBack, llvm::APFloat::rmNearestTiesToEven,
	&Ignored);
	if (Result != ConvertBack)
	return true;
	} else {
	// Reject types that cannot be fully encoded into the mantissa of
	// the float.
	Bits = S.Context.getTypeSize(IntTy);
	unsigned FloatPrec = llvm::APFloat::semanticsPrecision(
	S.Context.getFloatTypeSemantics(FloatTy));
	if (Bits > FloatPrec)
	return true;
	}

	return false;
	}

	/// Attempt to convert and splat Scalar into a vector whose types matches
	/// Vector following GCC conversion rules. The rule is that implicit
	/// conversion can occur when Scalar can be casted to match Vector's element
	/// type without causing truncation of Scalar.
	static bool tryGCCVectorConvertAndSplat(Sema &S, ExprResult *Scalar,
	ExprResult *Vector) {
	QualType ScalarTy = Scalar->get()->getType().getUnqualifiedType();
	QualType VectorTy = Vector->get()->getType().getUnqualifiedType();
	QualType VectorEltTy;

	if (const auto *VT = VectorTy->getAs<VectorType>()) {
	assert(!isa<ExtVectorType>(VT) &&
	"ExtVectorTypes should not be handled here!");
	VectorEltTy = VT->getElementType();
	} else if (VectorTy->isSveVLSBuiltinType()) {
	VectorEltTy =
	VectorTy->castAs<BuiltinType>()->getSveEltType(S.getASTContext());
	} else {
	llvm_unreachable("Only Fixed-Length and SVE Vector types are handled here");
	}

	// Reject cases where the vector element type or the scalar element type are
	// not integral or floating point types.
	if (!VectorEltTy->isArithmeticType() \|\| !ScalarTy->isArithmeticType())
	return true;

	// The conversion to apply to the scalar before splatting it,
	// if necessary.
	CastKind ScalarCast = CK_NoOp;

	// Accept cases where the vector elements are integers and the scalar is
	// an integer.
	// FIXME: Notionally if the scalar was a floating point value with a precise
	// integral representation, we could cast it to an appropriate integer
	// type and then perform the rest of the checks here. GCC will perform
	// this conversion in some cases as determined by the input language.
	// We should accept it on a language independent basis.
	if (VectorEltTy->isIntegralType(S.Context) &&
	ScalarTy->isIntegralType(S.Context) &&
	S.Context.getIntegerTypeOrder(VectorEltTy, ScalarTy)) {

	if (canConvertIntToOtherIntTy(S, Scalar, VectorEltTy))
	return true;

	ScalarCast = CK_IntegralCast;
	} else if (VectorEltTy->isIntegralType(S.Context) &&
	ScalarTy->isRealFloatingType()) {
	if (S.Context.getTypeSize(VectorEltTy) == S.Context.getTypeSize(ScalarTy))
	ScalarCast = CK_FloatingToIntegral;
	else
	return true;
	} else if (VectorEltTy->isRealFloatingType()) {
	if (ScalarTy->isRealFloatingType()) {

	// Reject cases where the scalar type is not a constant and has a higher
	// Order than the vector element type.
	llvm::APFloat Result(0.0);

	// Determine whether this is a constant scalar. In the event that the
	// value is dependent (and thus cannot be evaluated by the constant
	// evaluator), skip the evaluation. This will then diagnose once the
	// expression is instantiated.
	bool CstScalar = Scalar->get()->isValueDependent() \|\|
	Scalar->get()->EvaluateAsFloat(Result, S.Context);
	int Order = S.Context.getFloatingTypeOrder(VectorEltTy, ScalarTy);
	if (!CstScalar && Order < 0)
	return true;

	// If the scalar cannot be safely casted to the vector element type,
	// reject it.
	if (CstScalar) {
	bool Truncated = false;
	Result.convert(S.Context.getFloatTypeSemantics(VectorEltTy),
	llvm::APFloat::rmNearestTiesToEven, &Truncated);
	if (Truncated)
	return true;
	}

	ScalarCast = CK_FloatingCast;
	} else if (ScalarTy->isIntegralType(S.Context)) {
	if (canConvertIntTyToFloatTy(S, Scalar, VectorEltTy))
	return true;

	ScalarCast = CK_IntegralToFloating;
	} else
	return true;
	} else if (ScalarTy->isEnumeralType())
	return true;

	// Adjust scalar if desired.
	if (ScalarCast != CK_NoOp)
	*Scalar = S.ImpCastExprToType(Scalar->get(), VectorEltTy, ScalarCast);
	*Scalar = S.ImpCastExprToType(Scalar->get(), VectorTy, CK_VectorSplat);
	return false;
	}

	QualType Sema::CheckVectorOperands(ExprResult &LHS, ExprResult &RHS,
	SourceLocation Loc, bool IsCompAssign,
	bool AllowBothBool,
	bool AllowBoolConversions,
	bool AllowBoolOperation,
	bool ReportInvalid) {
	if (!IsCompAssign) {
	LHS = DefaultFunctionArrayLvalueConversion(LHS.get());
	if (LHS.isInvalid())
	return QualType();
	}
	RHS = DefaultFunctionArrayLvalueConversion(RHS.get());
	if (RHS.isInvalid())
	return QualType();

	// For conversion purposes, we ignore any qualifiers.
	// For example, "const float" and "float" are equivalent.
	QualType LHSType = LHS.get()->getType().getUnqualifiedType();
	QualType RHSType = RHS.get()->getType().getUnqualifiedType();

	const VectorType *LHSVecType = LHSType->getAs<VectorType>();
	const VectorType *RHSVecType = RHSType->getAs<VectorType>();
	assert(LHSVecType \|\| RHSVecType);

	// AltiVec-style "vector bool op vector bool" combinations are allowed
	// for some operators but not others.
	if (!AllowBothBool && LHSVecType &&
	LHSVecType->getVectorKind() == VectorKind::AltiVecBool && RHSVecType &&
	RHSVecType->getVectorKind() == VectorKind::AltiVecBool)
	return ReportInvalid ? InvalidOperands(Loc, LHS, RHS) : QualType();

	// This operation may not be performed on boolean vectors.
	if (!AllowBoolOperation &&
	(LHSType->isExtVectorBoolType() \|\| RHSType->isExtVectorBoolType()))
	return ReportInvalid ? InvalidOperands(Loc, LHS, RHS) : QualType();

	// If the vector types are identical, return.
	if (Context.hasSameType(LHSType, RHSType))
	return Context.getCommonSugaredType(LHSType, RHSType);

	// If we have compatible AltiVec and GCC vector types, use the AltiVec type.
	if (LHSVecType && RHSVecType &&
	Context.areCompatibleVectorTypes(LHSType, RHSType)) {
	if (isa<ExtVectorType>(LHSVecType)) {
	RHS = ImpCastExprToType(RHS.get(), LHSType, CK_BitCast);
	return LHSType;
	}

	if (!IsCompAssign)
	LHS = ImpCastExprToType(LHS.get(), RHSType, CK_BitCast);
	return RHSType;
	}

	// AllowBoolConversions says that bool and non-bool AltiVec vectors
	// can be mixed, with the result being the non-bool type. The non-bool
	// operand must have integer element type.
	if (AllowBoolConversions && LHSVecType && RHSVecType &&
	LHSVecType->getNumElements() == RHSVecType->getNumElements() &&
	(Context.getTypeSize(LHSVecType->getElementType()) ==
	Context.getTypeSize(RHSVecType->getElementType()))) {
	if (LHSVecType->getVectorKind() == VectorKind::AltiVecVector &&
	LHSVecType->getElementType()->isIntegerType() &&
	RHSVecType->getVectorKind() == VectorKind::AltiVecBool) {
	RHS = ImpCastExprToType(RHS.get(), LHSType, CK_BitCast);
	return LHSType;
	}
	if (!IsCompAssign &&
	LHSVecType->getVectorKind() == VectorKind::AltiVecBool &&
	RHSVecType->getVectorKind() == VectorKind::AltiVecVector &&
	RHSVecType->getElementType()->isIntegerType()) {
	LHS = ImpCastExprToType(LHS.get(), RHSType, CK_BitCast);
	return RHSType;
	}
	}

	// Expressions containing fixed-length and sizeless SVE/RVV vectors are
	// invalid since the ambiguity can affect the ABI.
	auto IsSveRVVConversion = [](QualType FirstType, QualType SecondType,
	unsigned &SVEorRVV) {
	const VectorType *VecType = SecondType->getAs<VectorType>();
	SVEorRVV = 0;
	if (FirstType->isSizelessBuiltinType() && VecType) {
	if (VecType->getVectorKind() == VectorKind::SveFixedLengthData \|\|
	VecType->getVectorKind() == VectorKind::SveFixedLengthPredicate)
	return true;
	if (VecType->getVectorKind() == VectorKind::RVVFixedLengthData \|\|
	VecType->getVectorKind() == VectorKind::RVVFixedLengthMask) {
	SVEorRVV = 1;
	return true;
	}
	}

	return false;
	};

	unsigned SVEorRVV;
	if (IsSveRVVConversion(LHSType, RHSType, SVEorRVV) \|\|
	IsSveRVVConversion(RHSType, LHSType, SVEorRVV)) {
	Diag(Loc, diag::err_typecheck_sve_rvv_ambiguous)
	<< SVEorRVV << LHSType << RHSType;
	return QualType();
	}

	// Expressions containing GNU and SVE or RVV (fixed or sizeless) vectors are
	// invalid since the ambiguity can affect the ABI.
	auto IsSveRVVGnuConversion = [](QualType FirstType, QualType SecondType,
	unsigned &SVEorRVV) {
	const VectorType *FirstVecType = FirstType->getAs<VectorType>();
	const VectorType *SecondVecType = SecondType->getAs<VectorType>();

	SVEorRVV = 0;
	if (FirstVecType && SecondVecType) {
	if (FirstVecType->getVectorKind() == VectorKind::Generic) {
	if (SecondVecType->getVectorKind() == VectorKind::SveFixedLengthData \|\|
	SecondVecType->getVectorKind() ==
	VectorKind::SveFixedLengthPredicate)
	return true;
	if (SecondVecType->getVectorKind() == VectorKind::RVVFixedLengthData \|\|
	SecondVecType->getVectorKind() == VectorKind::RVVFixedLengthMask) {
	SVEorRVV = 1;
	return true;
	}
	}
	return false;
	}

	if (SecondVecType &&
	SecondVecType->getVectorKind() == VectorKind::Generic) {
	if (FirstType->isSVESizelessBuiltinType())
	return true;
	if (FirstType->isRVVSizelessBuiltinType()) {
	SVEorRVV = 1;
	return true;
	}
	}

	return false;
	};

	if (IsSveRVVGnuConversion(LHSType, RHSType, SVEorRVV) \|\|
	IsSveRVVGnuConversion(RHSType, LHSType, SVEorRVV)) {
	Diag(Loc, diag::err_typecheck_sve_rvv_gnu_ambiguous)
	<< SVEorRVV << LHSType << RHSType;
	return QualType();
	}

	// If there's a vector type and a scalar, try to convert the scalar to
	// the vector element type and splat.
	unsigned DiagID = diag::err_typecheck_vector_not_convertable;
	if (!RHSVecType) {
	if (isa<ExtVectorType>(LHSVecType)) {
	if (!tryVectorConvertAndSplat(*this, &RHS, RHSType,
	LHSVecType->getElementType(), LHSType,
	DiagID))
	return LHSType;
	} else {
	if (!tryGCCVectorConvertAndSplat(*this, &RHS, &LHS))
	return LHSType;
	}
	}
	if (!LHSVecType) {
	if (isa<ExtVectorType>(RHSVecType)) {
	if (!tryVectorConvertAndSplat(*this, (IsCompAssign ? nullptr : &LHS),
	LHSType, RHSVecType->getElementType(),
	RHSType, DiagID))
	return RHSType;
	} else {
	if (LHS.get()->isLValue() \|\|
	!tryGCCVectorConvertAndSplat(*this, &LHS, &RHS))
	return RHSType;
	}
	}

	// FIXME: The code below also handles conversion between vectors and
	// non-scalars, we should break this down into fine grained specific checks
	// and emit proper diagnostics.
	QualType VecType = LHSVecType ? LHSType : RHSType;
	const VectorType *VT = LHSVecType ? LHSVecType : RHSVecType;
	QualType OtherType = LHSVecType ? RHSType : LHSType;
	ExprResult *OtherExpr = LHSVecType ? &RHS : &LHS;
	if (isLaxVectorConversion(OtherType, VecType)) {
	if (Context.getTargetInfo().getTriple().isPPC() &&
	anyAltivecTypes(RHSType, LHSType) &&
	!Context.areCompatibleVectorTypes(RHSType, LHSType))
	Diag(Loc, diag::warn_deprecated_lax_vec_conv_all) << RHSType << LHSType;
	// If we're allowing lax vector conversions, only the total (data) size
	// needs to be the same. For non compound assignment, if one of the types is
	// scalar, the result is always the vector type.
	if (!IsCompAssign) {
	*OtherExpr = ImpCastExprToType(OtherExpr->get(), VecType, CK_BitCast);
	return VecType;
	// In a compound assignment, lhs += rhs, 'lhs' is a lvalue src, forbidding
	// any implicit cast. Here, the 'rhs' should be implicit casted to 'lhs'
	// type. Note that this is already done by non-compound assignments in
	// CheckAssignmentConstraints. If it's a scalar type, only bitcast for
	// <1 x T> -> T. The result is also a vector type.
	} else if (OtherType->isExtVectorType() \|\| OtherType->isVectorType() \|\|
	(OtherType->isScalarType() && VT->getNumElements() == 1)) {
	ExprResult *RHSExpr = &RHS;
	*RHSExpr = ImpCastExprToType(RHSExpr->get(), LHSType, CK_BitCast);
	return VecType;
	}
	}

	// Okay, the expression is invalid.

	// If there's a non-vector, non-real operand, diagnose that.
	if ((!RHSVecType && !RHSType->isRealType()) \|\|
	(!LHSVecType && !LHSType->isRealType())) {
	Diag(Loc, diag::err_typecheck_vector_not_convertable_non_scalar)
	<< LHSType << RHSType
	<< LHS.get()->getSourceRange() << RHS.get()->getSourceRange();
	return QualType();
	}

	// OpenCL V1.1 6.2.6.p1:
	// If the operands are of more than one vector type, then an error shall
	// occur. Implicit conversions between vector types are not permitted, per
	// section 6.2.1.
	if (getLangOpts().OpenCL &&
	RHSVecType && isa<ExtVectorType>(RHSVecType) &&
	LHSVecType && isa<ExtVectorType>(LHSVecType)) {
	Diag(Loc, diag::err_opencl_implicit_vector_conversion) << LHSType
	<< RHSType;
	return QualType();
	}


	// If there is a vector type that is not a ExtVector and a scalar, we reach
	// this point if scalar could not be converted to the vector's element type
	// without truncation.
	if ((RHSVecType && !isa<ExtVectorType>(RHSVecType)) \|\|
	(LHSVecType && !isa<ExtVectorType>(LHSVecType))) {
	QualType Scalar = LHSVecType ? RHSType : LHSType;
	QualType Vector = LHSVecType ? LHSType : RHSType;
	unsigned ScalarOrVector = LHSVecType && RHSVecType ? 1 : 0;
	Diag(Loc,
	diag::err_typecheck_vector_not_convertable_implict_truncation)
	<< ScalarOrVector << Scalar << Vector;

	return QualType();
	}

	// Otherwise, use the generic diagnostic.
	Diag(Loc, DiagID)
	<< LHSType << RHSType
	<< LHS.get()->getSourceRange() << RHS.get()->getSourceRange();
	return QualType();
	}

	QualType Sema::CheckSizelessVectorOperands(ExprResult &LHS, ExprResult &RHS,
	SourceLocation Loc,
	bool IsCompAssign,
	ArithConvKind OperationKind) {
	if (!IsCompAssign) {
	LHS = DefaultFunctionArrayLvalueConversion(LHS.get());
	if (LHS.isInvalid())
	return QualType();
	}
	RHS = DefaultFunctionArrayLvalueConversion(RHS.get());
	if (RHS.isInvalid())
	return QualType();

	QualType LHSType = LHS.get()->getType().getUnqualifiedType();
	QualType RHSType = RHS.get()->getType().getUnqualifiedType();

	const BuiltinType *LHSBuiltinTy = LHSType->getAs<BuiltinType>();
	const BuiltinType *RHSBuiltinTy = RHSType->getAs<BuiltinType>();

	unsigned DiagID = diag::err_typecheck_invalid_operands;
	if ((OperationKind == ACK_Arithmetic) &&
	((LHSBuiltinTy && LHSBuiltinTy->isSVEBool()) \|\|
	(RHSBuiltinTy && RHSBuiltinTy->isSVEBool()))) {
	Diag(Loc, DiagID) << LHSType << RHSType << LHS.get()->getSourceRange()
	<< RHS.get()->getSourceRange();
	return QualType();
	}

	if (Context.hasSameType(LHSType, RHSType))
	return LHSType;

	if (LHSType->isSveVLSBuiltinType() && !RHSType->isSveVLSBuiltinType()) {
	if (!tryGCCVectorConvertAndSplat(*this, &RHS, &LHS))
	return LHSType;
	}
	if (RHSType->isSveVLSBuiltinType() && !LHSType->isSveVLSBuiltinType()) {
	if (LHS.get()->isLValue() \|\|
	!tryGCCVectorConvertAndSplat(*this, &LHS, &RHS))
	return RHSType;
	}

	if ((!LHSType->isSveVLSBuiltinType() && !LHSType->isRealType()) \|\|
	(!RHSType->isSveVLSBuiltinType() && !RHSType->isRealType())) {
	Diag(Loc, diag::err_typecheck_vector_not_convertable_non_scalar)
	<< LHSType << RHSType << LHS.get()->getSourceRange()
	<< RHS.get()->getSourceRange();
	return QualType();
	}

	if (LHSType->isSveVLSBuiltinType() && RHSType->isSveVLSBuiltinType() &&
	Context.getBuiltinVectorTypeInfo(LHSBuiltinTy).EC !=
	Context.getBuiltinVectorTypeInfo(RHSBuiltinTy).EC) {
	Diag(Loc, diag::err_typecheck_vector_lengths_not_equal)
	<< LHSType << RHSType << LHS.get()->getSourceRange()
	<< RHS.get()->getSourceRange();
	return QualType();
	}

	if (LHSType->isSveVLSBuiltinType() \|\| RHSType->isSveVLSBuiltinType()) {
	QualType Scalar = LHSType->isSveVLSBuiltinType() ? RHSType : LHSType;
	QualType Vector = LHSType->isSveVLSBuiltinType() ? LHSType : RHSType;
	bool ScalarOrVector =
	LHSType->isSveVLSBuiltinType() && RHSType->isSveVLSBuiltinType();

	Diag(Loc, diag::err_typecheck_vector_not_convertable_implict_truncation)
	<< ScalarOrVector << Scalar << Vector;

	return QualType();
	}

	Diag(Loc, DiagID) << LHSType << RHSType << LHS.get()->getSourceRange()
	<< RHS.get()->getSourceRange();
	return QualType();
	}

	// checkArithmeticNull - Detect when a NULL constant is used improperly in an
	// expression. These are mainly cases where the null pointer is used as an
	// integer instead of a pointer.
	static void checkArithmeticNull(Sema &S, ExprResult &LHS, ExprResult &RHS,
	SourceLocation Loc, bool IsCompare) {
	// The canonical way to check for a GNU null is with isNullPointerConstant,
	// but we use a bit of a hack here for speed; this is a relatively
	// hot path, and isNullPointerConstant is slow.
	bool LHSNull = isa<GNUNullExpr>(LHS.get()->IgnoreParenImpCasts());
	bool RHSNull = isa<GNUNullExpr>(RHS.get()->IgnoreParenImpCasts());

	QualType NonNullType = LHSNull ? RHS.get()->getType() : LHS.get()->getType();

	// Avoid analyzing cases where the result will either be invalid (and
	// diagnosed as such) or entirely valid and not something to warn about.
	if ((!LHSNull && !RHSNull) \|\| NonNullType->isBlockPointerType() \|\|
	NonNullType->isMemberPointerType() \|\| NonNullType->isFunctionType())
	return;

	// Comparison operations would not make sense with a null pointer no matter
	// what the other expression is.
	if (!IsCompare) {
	S.Diag(Loc, diag::warn_null_in_arithmetic_operation)
	<< (LHSNull ? LHS.get()->getSourceRange() : SourceRange())
	<< (RHSNull ? RHS.get()->getSourceRange() : SourceRange());
	return;
	}

	// The rest of the operations only make sense with a null pointer
	// if the other expression is a pointer.
	if (LHSNull == RHSNull \|\| NonNullType->isAnyPointerType() \|\|
	NonNullType->canDecayToPointerType())
	return;

	S.Diag(Loc, diag::warn_null_in_comparison_operation)
	<< LHSNull /* LHS is NULL */ << NonNullType
	<< LHS.get()->getSourceRange() << RHS.get()->getSourceRange();
	}

	static void DiagnoseDivisionSizeofPointerOrArray(Sema &S, Expr LHS, Expr RHS,
	SourceLocation Loc) {
	const auto *LUE = dyn_cast<UnaryExprOrTypeTraitExpr>(LHS);
	const auto *RUE = dyn_cast<UnaryExprOrTypeTraitExpr>(RHS);
	if (!LUE \|\| !RUE)
	return;
	if (LUE->getKind() != UETT_SizeOf \|\| LUE->isArgumentType() \|\|
	RUE->getKind() != UETT_SizeOf)
	return;

	const Expr *LHSArg = LUE->getArgumentExpr()->IgnoreParens();
	QualType LHSTy = LHSArg->getType();
	QualType RHSTy;

	if (RUE->isArgumentType())
	RHSTy = RUE->getArgumentType().getNonReferenceType();
	else
	RHSTy = RUE->getArgumentExpr()->IgnoreParens()->getType();

	if (LHSTy->isPointerType() && !RHSTy->isPointerType()) {
	if (!S.Context.hasSameUnqualifiedType(LHSTy->getPointeeType(), RHSTy))
	return;

	S.Diag(Loc, diag::warn_division_sizeof_ptr) << LHS << LHS->getSourceRange();
	if (const auto *DRE = dyn_cast<DeclRefExpr>(LHSArg)) {
	if (const ValueDecl *LHSArgDecl = DRE->getDecl())
	S.Diag(LHSArgDecl->getLocation(), diag::note_pointer_declared_here)
	<< LHSArgDecl;
	}
	} else if (const auto *ArrayTy = S.Context.getAsArrayType(LHSTy)) {
	QualType ArrayElemTy = ArrayTy->getElementType();
	if (ArrayElemTy != S.Context.getBaseElementType(ArrayTy) \|\|
	ArrayElemTy->isDependentType() \|\| RHSTy->isDependentType() \|\|
	RHSTy->isReferenceType() \|\| ArrayElemTy->isCharType() \|\|
	S.Context.getTypeSize(ArrayElemTy) == S.Context.getTypeSize(RHSTy))
	return;
	S.Diag(Loc, diag::warn_division_sizeof_array)
	<< LHSArg->getSourceRange() << ArrayElemTy << RHSTy;
	if (const auto *DRE = dyn_cast<DeclRefExpr>(LHSArg)) {
	if (const ValueDecl *LHSArgDecl = DRE->getDecl())
	S.Diag(LHSArgDecl->getLocation(), diag::note_array_declared_here)
	<< LHSArgDecl;
	}

	S.Diag(Loc, diag::note_precedence_silence) << RHS;
	}
	}

	static void DiagnoseBadDivideOrRemainderValues(Sema& S, ExprResult &LHS,
	ExprResult &RHS,
	SourceLocation Loc, bool IsDiv) {
	// Check for division/remainder by zero.
	Expr::EvalResult RHSValue;
	if (!RHS.get()->isValueDependent() &&
	RHS.get()->EvaluateAsInt(RHSValue, S.Context) &&
	RHSValue.Val.getInt() == 0)
	S.DiagRuntimeBehavior(Loc, RHS.get(),
	S.PDiag(diag::warn_remainder_division_by_zero)
	<< IsDiv << RHS.get()->getSourceRange());
	}

	QualType Sema::CheckMultiplyDivideOperands(ExprResult &LHS, ExprResult &RHS,
	SourceLocation Loc,
	bool IsCompAssign, bool IsDiv) {
	checkArithmeticNull(this, LHS, RHS, Loc, /IsCompare=*/false);

	QualType LHSTy = LHS.get()->getType();
	QualType RHSTy = RHS.get()->getType();
	if (LHSTy->isVectorType() \|\| RHSTy->isVectorType())
	return CheckVectorOperands(LHS, RHS, Loc, IsCompAssign,
	/AllowBothBool/ getLangOpts().AltiVec,
	/AllowBoolConversions/ false,
	/AllowBooleanOperation/ false,
	/ReportInvalid/ true);
	if (LHSTy->isSveVLSBuiltinType() \|\| RHSTy->isSveVLSBuiltinType())
	return CheckSizelessVectorOperands(LHS, RHS, Loc, IsCompAssign,
	ACK_Arithmetic);
	if (!IsDiv &&
	(LHSTy->isConstantMatrixType() \|\| RHSTy->isConstantMatrixType()))
	return CheckMatrixMultiplyOperands(LHS, RHS, Loc, IsCompAssign);
	// For division, only matrix-by-scalar is supported. Other combinations with
	// matrix types are invalid.
	if (IsDiv && LHSTy->isConstantMatrixType() && RHSTy->isArithmeticType())
	return CheckMatrixElementwiseOperands(LHS, RHS, Loc, IsCompAssign);

	QualType compType = UsualArithmeticConversions(
	LHS, RHS, Loc, IsCompAssign ? ACK_CompAssign : ACK_Arithmetic);
	if (LHS.isInvalid() \|\| RHS.isInvalid())
	return QualType();


	if (compType.isNull() \|\| !compType->isArithmeticType())
	return InvalidOperands(Loc, LHS, RHS);
	if (IsDiv) {
	DiagnoseBadDivideOrRemainderValues(*this, LHS, RHS, Loc, IsDiv);
	DiagnoseDivisionSizeofPointerOrArray(*this, LHS.get(), RHS.get(), Loc);
	}
	return compType;
	}

	QualType Sema::CheckRemainderOperands(
	ExprResult &LHS, ExprResult &RHS, SourceLocation Loc, bool IsCompAssign) {
	checkArithmeticNull(this, LHS, RHS, Loc, /IsCompare=*/false);

	if (LHS.get()->getType()->isVectorType() \|\|
	RHS.get()->getType()->isVectorType()) {
	if (LHS.get()->getType()->hasIntegerRepresentation() &&
	RHS.get()->getType()->hasIntegerRepresentation())
	return CheckVectorOperands(LHS, RHS, Loc, IsCompAssign,
	/AllowBothBool/ getLangOpts().AltiVec,
	/AllowBoolConversions/ false,
	/AllowBooleanOperation/ false,
	/ReportInvalid/ true);
	return InvalidOperands(Loc, LHS, RHS);
	}

	if (LHS.get()->getType()->isSveVLSBuiltinType() \|\|
	RHS.get()->getType()->isSveVLSBuiltinType()) {
	if (LHS.get()->getType()->hasIntegerRepresentation() &&
	RHS.get()->getType()->hasIntegerRepresentation())
	return CheckSizelessVectorOperands(LHS, RHS, Loc, IsCompAssign,
	ACK_Arithmetic);

	return InvalidOperands(Loc, LHS, RHS);
	}

	QualType compType = UsualArithmeticConversions(
	LHS, RHS, Loc, IsCompAssign ? ACK_CompAssign : ACK_Arithmetic);
	if (LHS.isInvalid() \|\| RHS.isInvalid())
	return QualType();

	if (compType.isNull() \|\| !compType->isIntegerType())
	return InvalidOperands(Loc, LHS, RHS);
	DiagnoseBadDivideOrRemainderValues(this, LHS, RHS, Loc, false / IsDiv */);
	return compType;
	}

	/// Diagnose invalid arithmetic on two void pointers.
	static void diagnoseArithmeticOnTwoVoidPointers(Sema &S, SourceLocation Loc,
	Expr LHSExpr, Expr RHSExpr) {
	S.Diag(Loc, S.getLangOpts().CPlusPlus
	? diag::err_typecheck_pointer_arith_void_type
	: diag::ext_gnu_void_ptr)
	<< 1 /* two pointers */ << LHSExpr->getSourceRange()
	<< RHSExpr->getSourceRange();
	}

	/// Diagnose invalid arithmetic on a void pointer.
	static void diagnoseArithmeticOnVoidPointer(Sema &S, SourceLocation Loc,
	Expr *Pointer) {
	S.Diag(Loc, S.getLangOpts().CPlusPlus
	? diag::err_typecheck_pointer_arith_void_type
	: diag::ext_gnu_void_ptr)
	<< 0 /* one pointer */ << Pointer->getSourceRange();
	}

	/// Diagnose invalid arithmetic on a null pointer.
	///
	/// If \p IsGNUIdiom is true, the operation is using the 'p = (i8*)nullptr + n'
	/// idiom, which we recognize as a GNU extension.
	///
	static void diagnoseArithmeticOnNullPointer(Sema &S, SourceLocation Loc,
	Expr *Pointer, bool IsGNUIdiom) {
	if (IsGNUIdiom)
	S.Diag(Loc, diag::warn_gnu_null_ptr_arith)
	<< Pointer->getSourceRange();
	else
	S.Diag(Loc, diag::warn_pointer_arith_null_ptr)
	<< S.getLangOpts().CPlusPlus << Pointer->getSourceRange();
	}

	/// Diagnose invalid subraction on a null pointer.
	///
	static void diagnoseSubtractionOnNullPointer(Sema &S, SourceLocation Loc,
	Expr *Pointer, bool BothNull) {
	// Null - null is valid in C++ [expr.add]p7
	if (BothNull && S.getLangOpts().CPlusPlus)
	return;

	// Is this s a macro from a system header?
	if (S.Diags.getSuppressSystemWarnings() && S.SourceMgr.isInSystemMacro(Loc))
	return;

	S.DiagRuntimeBehavior(Loc, Pointer,
	S.PDiag(diag::warn_pointer_sub_null_ptr)
	<< S.getLangOpts().CPlusPlus
	<< Pointer->getSourceRange());
	}

	/// Diagnose invalid arithmetic on two function pointers.
	static void diagnoseArithmeticOnTwoFunctionPointers(Sema &S, SourceLocation Loc,
	Expr LHS, Expr RHS) {
	assert(LHS->getType()->isAnyPointerType());
	assert(RHS->getType()->isAnyPointerType());
	S.Diag(Loc, S.getLangOpts().CPlusPlus
	? diag::err_typecheck_pointer_arith_function_type
	: diag::ext_gnu_ptr_func_arith)
	<< 1 /* two pointers */ << LHS->getType()->getPointeeType()
	// We only show the second type if it differs from the first.
	<< (unsigned)!S.Context.hasSameUnqualifiedType(LHS->getType(),
	RHS->getType())
	<< RHS->getType()->getPointeeType()
	<< LHS->getSourceRange() << RHS->getSourceRange();
	}

	/// Diagnose invalid arithmetic on a function pointer.
	static void diagnoseArithmeticOnFunctionPointer(Sema &S, SourceLocation Loc,
	Expr *Pointer) {
	assert(Pointer->getType()->isAnyPointerType());
	S.Diag(Loc, S.getLangOpts().CPlusPlus
	? diag::err_typecheck_pointer_arith_function_type
	: diag::ext_gnu_ptr_func_arith)
	<< 0 /* one pointer */ << Pointer->getType()->getPointeeType()
	<< 0 /* one pointer, so only one type */
	<< Pointer->getSourceRange();
	}

	/// Emit error if Operand is incomplete pointer type
	///
	/// \returns True if pointer has incomplete type
	static bool checkArithmeticIncompletePointerType(Sema &S, SourceLocation Loc,
	Expr *Operand) {
	QualType ResType = Operand->getType();
	if (const AtomicType *ResAtomicType = ResType->getAs<AtomicType>())
	ResType = ResAtomicType->getValueType();

	assert(ResType->isAnyPointerType());
	QualType PointeeTy = ResType->getPointeeType();
	return S.RequireCompleteSizedType(
	Loc, PointeeTy,
	diag::err_typecheck_arithmetic_incomplete_or_sizeless_type,
	Operand->getSourceRange());
	}

	/// Check the validity of an arithmetic pointer operand.
	///
	/// If the operand has pointer type, this code will check for pointer types
	/// which are invalid in arithmetic operations. These will be diagnosed
	/// appropriately, including whether or not the use is supported as an
	/// extension.
	///
	/// \returns True when the operand is valid to use (even if as an extension).
	static bool checkArithmeticOpPointerOperand(Sema &S, SourceLocation Loc,
	Expr *Operand) {
	QualType ResType = Operand->getType();
	if (const AtomicType *ResAtomicType = ResType->getAs<AtomicType>())
	ResType = ResAtomicType->getValueType();

	if (!ResType->isAnyPointerType()) return true;

	QualType PointeeTy = ResType->getPointeeType();
	if (PointeeTy->isVoidType()) {
	diagnoseArithmeticOnVoidPointer(S, Loc, Operand);
	return !S.getLangOpts().CPlusPlus;
	}
	if (PointeeTy->isFunctionType()) {
	diagnoseArithmeticOnFunctionPointer(S, Loc, Operand);
	return !S.getLangOpts().CPlusPlus;
	}

	if (checkArithmeticIncompletePointerType(S, Loc, Operand)) return false;

	return true;
	}

	/// Check the validity of a binary arithmetic operation w.r.t. pointer
	/// operands.
	///
	/// This routine will diagnose any invalid arithmetic on pointer operands much
	/// like \see checkArithmeticOpPointerOperand. However, it has special logic
	/// for emitting a single diagnostic even for operations where both LHS and RHS
	/// are (potentially problematic) pointers.
	///
	/// \returns True when the operand is valid to use (even if as an extension).
	static bool checkArithmeticBinOpPointerOperands(Sema &S, SourceLocation Loc,
	Expr LHSExpr, Expr RHSExpr) {
	bool isLHSPointer = LHSExpr->getType()->isAnyPointerType();
	bool isRHSPointer = RHSExpr->getType()->isAnyPointerType();
	if (!isLHSPointer && !isRHSPointer) return true;

	QualType LHSPointeeTy, RHSPointeeTy;
	if (isLHSPointer) LHSPointeeTy = LHSExpr->getType()->getPointeeType();
	if (isRHSPointer) RHSPointeeTy = RHSExpr->getType()->getPointeeType();

	// if both are pointers check if operation is valid wrt address spaces
	if (isLHSPointer && isRHSPointer) {
	if (!LHSPointeeTy.isAddressSpaceOverlapping(RHSPointeeTy)) {
	S.Diag(Loc,
	diag::err_typecheck_op_on_nonoverlapping_address_space_pointers)
	<< LHSExpr->getType() << RHSExpr->getType() << 1 /arithmetic op/
	<< LHSExpr->getSourceRange() << RHSExpr->getSourceRange();
	return false;
	}
	}

	// Check for arithmetic on pointers to incomplete types.
	bool isLHSVoidPtr = isLHSPointer && LHSPointeeTy->isVoidType();
	bool isRHSVoidPtr = isRHSPointer && RHSPointeeTy->isVoidType();
	if (isLHSVoidPtr \|\| isRHSVoidPtr) {
	if (!isRHSVoidPtr) diagnoseArithmeticOnVoidPointer(S, Loc, LHSExpr);
	else if (!isLHSVoidPtr) diagnoseArithmeticOnVoidPointer(S, Loc, RHSExpr);
	else diagnoseArithmeticOnTwoVoidPointers(S, Loc, LHSExpr, RHSExpr);

	return !S.getLangOpts().CPlusPlus;
	}

	bool isLHSFuncPtr = isLHSPointer && LHSPointeeTy->isFunctionType();
	bool isRHSFuncPtr = isRHSPointer && RHSPointeeTy->isFunctionType();
	if (isLHSFuncPtr \|\| isRHSFuncPtr) {
	if (!isRHSFuncPtr) diagnoseArithmeticOnFunctionPointer(S, Loc, LHSExpr);
	else if (!isLHSFuncPtr) diagnoseArithmeticOnFunctionPointer(S, Loc,
	RHSExpr);
	else diagnoseArithmeticOnTwoFunctionPointers(S, Loc, LHSExpr, RHSExpr);

	return !S.getLangOpts().CPlusPlus;
	}

	if (isLHSPointer && checkArithmeticIncompletePointerType(S, Loc, LHSExpr))
	return false;
	if (isRHSPointer && checkArithmeticIncompletePointerType(S, Loc, RHSExpr))
	return false;

	return true;
	}

	/// diagnoseStringPlusInt - Emit a warning when adding an integer to a string
	/// literal.
	static void diagnoseStringPlusInt(Sema &Self, SourceLocation OpLoc,
	Expr LHSExpr, Expr RHSExpr) {
	StringLiteral* StrExpr = dyn_cast<StringLiteral>(LHSExpr->IgnoreImpCasts());
	Expr* IndexExpr = RHSExpr;
	if (!StrExpr) {
	StrExpr = dyn_cast<StringLiteral>(RHSExpr->IgnoreImpCasts());
	IndexExpr = LHSExpr;
	}

	bool IsStringPlusInt = StrExpr &&
	IndexExpr->getType()->isIntegralOrUnscopedEnumerationType();
	if (!IsStringPlusInt \|\| IndexExpr->isValueDependent())
	return;

	SourceRange DiagRange(LHSExpr->getBeginLoc(), RHSExpr->getEndLoc());
	Self.Diag(OpLoc, diag::warn_string_plus_int)
	<< DiagRange << IndexExpr->IgnoreImpCasts()->getType();

	// Only print a fixit for "str" + int, not for int + "str".
	if (IndexExpr == RHSExpr) {
	SourceLocation EndLoc = Self.getLocForEndOfToken(RHSExpr->getEndLoc());
	Self.Diag(OpLoc, diag::note_string_plus_scalar_silence)
	<< FixItHint::CreateInsertion(LHSExpr->getBeginLoc(), "&")
	<< FixItHint::CreateReplacement(SourceRange(OpLoc), "[")
	<< FixItHint::CreateInsertion(EndLoc, "]");
	} else
	Self.Diag(OpLoc, diag::note_string_plus_scalar_silence);
	}

	/// Emit a warning when adding a char literal to a string.
	static void diagnoseStringPlusChar(Sema &Self, SourceLocation OpLoc,
	Expr LHSExpr, Expr RHSExpr) {
	const Expr *StringRefExpr = LHSExpr;
	const CharacterLiteral *CharExpr =
	dyn_cast<CharacterLiteral>(RHSExpr->IgnoreImpCasts());

	if (!CharExpr) {
	CharExpr = dyn_cast<CharacterLiteral>(LHSExpr->IgnoreImpCasts());
	StringRefExpr = RHSExpr;
	}

	if (!CharExpr \|\| !StringRefExpr)
	return;

	const QualType StringType = StringRefExpr->getType();

	// Return if not a PointerType.
	if (!StringType->isAnyPointerType())
	return;

	// Return if not a CharacterType.
	if (!StringType->getPointeeType()->isAnyCharacterType())
	return;

	ASTContext &Ctx = Self.getASTContext();
	SourceRange DiagRange(LHSExpr->getBeginLoc(), RHSExpr->getEndLoc());

	const QualType CharType = CharExpr->getType();
	if (!CharType->isAnyCharacterType() &&
	CharType->isIntegerType() &&
	llvm::isUIntN(Ctx.getCharWidth(), CharExpr->getValue())) {
	Self.Diag(OpLoc, diag::warn_string_plus_char)
	<< DiagRange << Ctx.CharTy;
	} else {
	Self.Diag(OpLoc, diag::warn_string_plus_char)
	<< DiagRange << CharExpr->getType();
	}

	// Only print a fixit for str + char, not for char + str.
	if (isa<CharacterLiteral>(RHSExpr->IgnoreImpCasts())) {
	SourceLocation EndLoc = Self.getLocForEndOfToken(RHSExpr->getEndLoc());
	Self.Diag(OpLoc, diag::note_string_plus_scalar_silence)
	<< FixItHint::CreateInsertion(LHSExpr->getBeginLoc(), "&")
	<< FixItHint::CreateReplacement(SourceRange(OpLoc), "[")
	<< FixItHint::CreateInsertion(EndLoc, "]");
	} else {
	Self.Diag(OpLoc, diag::note_string_plus_scalar_silence);
	}
	}

	/// Emit error when two pointers are incompatible.
	static void diagnosePointerIncompatibility(Sema &S, SourceLocation Loc,
	Expr LHSExpr, Expr RHSExpr) {
	assert(LHSExpr->getType()->isAnyPointerType());
	assert(RHSExpr->getType()->isAnyPointerType());
	S.Diag(Loc, diag::err_typecheck_sub_ptr_compatible)
	<< LHSExpr->getType() << RHSExpr->getType() << LHSExpr->getSourceRange()
	<< RHSExpr->getSourceRange();
	}

	// C99 6.5.6
	QualType Sema::CheckAdditionOperands(ExprResult &LHS, ExprResult &RHS,
	SourceLocation Loc, BinaryOperatorKind Opc,
	QualType* CompLHSTy) {
	checkArithmeticNull(this, LHS, RHS, Loc, /IsCompare=*/false);

	if (LHS.get()->getType()->isVectorType() \|\|
	RHS.get()->getType()->isVectorType()) {
	QualType compType =
	CheckVectorOperands(LHS, RHS, Loc, CompLHSTy,
	/AllowBothBool/ getLangOpts().AltiVec,
	/AllowBoolConversions/ getLangOpts().ZVector,
	/AllowBooleanOperation/ false,
	/ReportInvalid/ true);
	if (CompLHSTy) *CompLHSTy = compType;
	return compType;
	}

	if (LHS.get()->getType()->isSveVLSBuiltinType() \|\|
	RHS.get()->getType()->isSveVLSBuiltinType()) {
	QualType compType =
	CheckSizelessVectorOperands(LHS, RHS, Loc, CompLHSTy, ACK_Arithmetic);
	if (CompLHSTy)
	*CompLHSTy = compType;
	return compType;
	}

	if (LHS.get()->getType()->isConstantMatrixType() \|\|
	RHS.get()->getType()->isConstantMatrixType()) {
	QualType compType =
	CheckMatrixElementwiseOperands(LHS, RHS, Loc, CompLHSTy);
	if (CompLHSTy)
	*CompLHSTy = compType;
	return compType;
	}

	QualType compType = UsualArithmeticConversions(
	LHS, RHS, Loc, CompLHSTy ? ACK_CompAssign : ACK_Arithmetic);
	if (LHS.isInvalid() \|\| RHS.isInvalid())
	return QualType();

	// Diagnose "string literal" '+' int and string '+' "char literal".
	if (Opc == BO_Add) {
	diagnoseStringPlusInt(*this, Loc, LHS.get(), RHS.get());
	diagnoseStringPlusChar(*this, Loc, LHS.get(), RHS.get());
	}

	// handle the common case first (both operands are arithmetic).
	if (!compType.isNull() && compType->isArithmeticType()) {
	if (CompLHSTy) *CompLHSTy = compType;
	return compType;
	}

	// Type-checking. Ultimately the pointer's going to be in PExp;
	// note that we bias towards the LHS being the pointer.
	Expr PExp = LHS.get(), IExp = RHS.get();

	bool isObjCPointer;
	if (PExp->getType()->isPointerType()) {
	isObjCPointer = false;
	} else if (PExp->getType()->isObjCObjectPointerType()) {
	isObjCPointer = true;
	} else {
	std::swap(PExp, IExp);
	if (PExp->getType()->isPointerType()) {
	isObjCPointer = false;
	} else if (PExp->getType()->isObjCObjectPointerType()) {
	isObjCPointer = true;
	} else {
	return InvalidOperands(Loc, LHS, RHS);
	}
	}
	assert(PExp->getType()->isAnyPointerType());

	if (!IExp->getType()->isIntegerType())
	return InvalidOperands(Loc, LHS, RHS);

	// Adding to a null pointer results in undefined behavior.
	if (PExp->IgnoreParenCasts()->isNullPointerConstant(
	Context, Expr::NPC_ValueDependentIsNotNull)) {
	// In C++ adding zero to a null pointer is defined.
	Expr::EvalResult KnownVal;
	if (!getLangOpts().CPlusPlus \|\|
	(!IExp->isValueDependent() &&
	(!IExp->EvaluateAsInt(KnownVal, Context) \|\|
	KnownVal.Val.getInt() != 0))) {
	// Check the conditions to see if this is the 'p = nullptr + n' idiom.
	bool IsGNUIdiom = BinaryOperator::isNullPointerArithmeticExtension(
	Context, BO_Add, PExp, IExp);
	diagnoseArithmeticOnNullPointer(*this, Loc, PExp, IsGNUIdiom);
	}
	}

	if (!checkArithmeticOpPointerOperand(*this, Loc, PExp))
	return QualType();

	if (isObjCPointer && checkArithmeticOnObjCPointer(*this, Loc, PExp))
	return QualType();

	// Arithmetic on label addresses is normally allowed, except when we add
	// a ptrauth signature to the addresses.
	if (isa<AddrLabelExpr>(PExp) && getLangOpts().PointerAuthIndirectGotos) {
	Diag(Loc, diag::err_ptrauth_indirect_goto_addrlabel_arithmetic)
	<< /addition/ 1;
	return QualType();
	}

	// Check array bounds for pointer arithemtic
	CheckArrayAccess(PExp, IExp);

	if (CompLHSTy) {
	QualType LHSTy = Context.isPromotableBitField(LHS.get());
	if (LHSTy.isNull()) {
	LHSTy = LHS.get()->getType();
	if (Context.isPromotableIntegerType(LHSTy))
	LHSTy = Context.getPromotedIntegerType(LHSTy);
	}
	*CompLHSTy = LHSTy;
	}

	return PExp->getType();
	}

	// C99 6.5.6
	QualType Sema::CheckSubtractionOperands(ExprResult &LHS, ExprResult &RHS,
	SourceLocation Loc,
	QualType* CompLHSTy) {
	checkArithmeticNull(this, LHS, RHS, Loc, /IsCompare=*/false);

	if (LHS.get()->getType()->isVectorType() \|\|
	RHS.get()->getType()->isVectorType()) {
	QualType compType =
	CheckVectorOperands(LHS, RHS, Loc, CompLHSTy,
	/AllowBothBool/ getLangOpts().AltiVec,
	/AllowBoolConversions/ getLangOpts().ZVector,
	/AllowBooleanOperation/ false,
	/ReportInvalid/ true);
	if (CompLHSTy) *CompLHSTy = compType;
	return compType;
	}

	if (LHS.get()->getType()->isSveVLSBuiltinType() \|\|
	RHS.get()->getType()->isSveVLSBuiltinType()) {
	QualType compType =
	CheckSizelessVectorOperands(LHS, RHS, Loc, CompLHSTy, ACK_Arithmetic);
	if (CompLHSTy)
	*CompLHSTy = compType;
	return compType;
	}

	if (LHS.get()->getType()->isConstantMatrixType() \|\|
	RHS.get()->getType()->isConstantMatrixType()) {
	QualType compType =
	CheckMatrixElementwiseOperands(LHS, RHS, Loc, CompLHSTy);
	if (CompLHSTy)
	*CompLHSTy = compType;
	return compType;
	}

	QualType compType = UsualArithmeticConversions(
	LHS, RHS, Loc, CompLHSTy ? ACK_CompAssign : ACK_Arithmetic);
	if (LHS.isInvalid() \|\| RHS.isInvalid())
	return QualType();

	// Enforce type constraints: C99 6.5.6p3.

	// Handle the common case first (both operands are arithmetic).
	if (!compType.isNull() && compType->isArithmeticType()) {
	if (CompLHSTy) *CompLHSTy = compType;
	return compType;
	}

	// Either ptr - int or ptr - ptr.
	if (LHS.get()->getType()->isAnyPointerType()) {
	QualType lpointee = LHS.get()->getType()->getPointeeType();

	// Diagnose bad cases where we step over interface counts.
	if (LHS.get()->getType()->isObjCObjectPointerType() &&
	checkArithmeticOnObjCPointer(*this, Loc, LHS.get()))
	return QualType();

	// Arithmetic on label addresses is normally allowed, except when we add
	// a ptrauth signature to the addresses.
	if (isa<AddrLabelExpr>(LHS.get()) &&
	getLangOpts().PointerAuthIndirectGotos) {
	Diag(Loc, diag::err_ptrauth_indirect_goto_addrlabel_arithmetic)
	<< /subtraction/ 0;
	return QualType();
	}

	// The result type of a pointer-int computation is the pointer type.
	if (RHS.get()->getType()->isIntegerType()) {
	// Subtracting from a null pointer should produce a warning.
	// The last argument to the diagnose call says this doesn't match the
	// GNU int-to-pointer idiom.
	if (LHS.get()->IgnoreParenCasts()->isNullPointerConstant(Context,
	Expr::NPC_ValueDependentIsNotNull)) {
	// In C++ adding zero to a null pointer is defined.
	Expr::EvalResult KnownVal;
	if (!getLangOpts().CPlusPlus \|\|
	(!RHS.get()->isValueDependent() &&
	(!RHS.get()->EvaluateAsInt(KnownVal, Context) \|\|
	KnownVal.Val.getInt() != 0))) {
	diagnoseArithmeticOnNullPointer(*this, Loc, LHS.get(), false);
	}
	}

	if (!checkArithmeticOpPointerOperand(*this, Loc, LHS.get()))
	return QualType();

	// Check array bounds for pointer arithemtic
	CheckArrayAccess(LHS.get(), RHS.get(), /ArraySubscriptExpr/nullptr,
	/AllowOnePastEnd/true, /IndexNegated/true);

	if (CompLHSTy) *CompLHSTy = LHS.get()->getType();
	return LHS.get()->getType();
	}

	// Handle pointer-pointer subtractions.
	if (const PointerType *RHSPTy
	= RHS.get()->getType()->getAs<PointerType>()) {
	QualType rpointee = RHSPTy->getPointeeType();

	if (getLangOpts().CPlusPlus) {
	// Pointee types must be the same: C++ [expr.add]
	if (!Context.hasSameUnqualifiedType(lpointee, rpointee)) {
	diagnosePointerIncompatibility(*this, Loc, LHS.get(), RHS.get());
	}
	} else {
	// Pointee types must be compatible C99 6.5.6p3
	if (!Context.typesAreCompatible(
	Context.getCanonicalType(lpointee).getUnqualifiedType(),
	Context.getCanonicalType(rpointee).getUnqualifiedType())) {
	diagnosePointerIncompatibility(*this, Loc, LHS.get(), RHS.get());
	return QualType();
	}
	}

	if (!checkArithmeticBinOpPointerOperands(*this, Loc,
	LHS.get(), RHS.get()))
	return QualType();

	bool LHSIsNullPtr = LHS.get()->IgnoreParenCasts()->isNullPointerConstant(
	Context, Expr::NPC_ValueDependentIsNotNull);
	bool RHSIsNullPtr = RHS.get()->IgnoreParenCasts()->isNullPointerConstant(
	Context, Expr::NPC_ValueDependentIsNotNull);

	// Subtracting nullptr or from nullptr is suspect
	if (LHSIsNullPtr)
	diagnoseSubtractionOnNullPointer(*this, Loc, LHS.get(), RHSIsNullPtr);
	if (RHSIsNullPtr)
	diagnoseSubtractionOnNullPointer(*this, Loc, RHS.get(), LHSIsNullPtr);

	// The pointee type may have zero size. As an extension, a structure or
	// union may have zero size or an array may have zero length. In this
	// case subtraction does not make sense.
	if (!rpointee->isVoidType() && !rpointee->isFunctionType()) {
	CharUnits ElementSize = Context.getTypeSizeInChars(rpointee);
	if (ElementSize.isZero()) {
	Diag(Loc,diag::warn_sub_ptr_zero_size_types)
	<< rpointee.getUnqualifiedType()
	<< LHS.get()->getSourceRange() << RHS.get()->getSourceRange();
	}
	}

	if (CompLHSTy) *CompLHSTy = LHS.get()->getType();
	return Context.getPointerDiffType();
	}
	}

	return InvalidOperands(Loc, LHS, RHS);
	}

	static bool isScopedEnumerationType(QualType T) {
	if (const EnumType *ET = T->getAs<EnumType>())
	return ET->getDecl()->isScoped();
	return false;
	}

	static void DiagnoseBadShiftValues(Sema& S, ExprResult &LHS, ExprResult &RHS,
	SourceLocation Loc, BinaryOperatorKind Opc,
	QualType LHSType) {
	// OpenCL 6.3j: shift values are effectively % word size of LHS (more defined),
	// so skip remaining warnings as we don't want to modify values within Sema.
	if (S.getLangOpts().OpenCL)
	return;

	// Check right/shifter operand
	Expr::EvalResult RHSResult;
	if (RHS.get()->isValueDependent() \|\|
	!RHS.get()->EvaluateAsInt(RHSResult, S.Context))
	return;
	llvm::APSInt Right = RHSResult.Val.getInt();

	if (Right.isNegative()) {
	S.DiagRuntimeBehavior(Loc, RHS.get(),
	S.PDiag(diag::warn_shift_negative)
	<< RHS.get()->getSourceRange());
	return;
	}

	QualType LHSExprType = LHS.get()->getType();
	uint64_t LeftSize = S.Context.getTypeSize(LHSExprType);
	if (LHSExprType->isBitIntType())
	LeftSize = S.Context.getIntWidth(LHSExprType);
	else if (LHSExprType->isFixedPointType()) {
	auto FXSema = S.Context.getFixedPointSemantics(LHSExprType);
	LeftSize = FXSema.getWidth() - (unsigned)FXSema.hasUnsignedPadding();
	}
	if (Right.uge(LeftSize)) {
	S.DiagRuntimeBehavior(Loc, RHS.get(),
	S.PDiag(diag::warn_shift_gt_typewidth)
	<< RHS.get()->getSourceRange());
	return;
	}

	// FIXME: We probably need to handle fixed point types specially here.
	if (Opc != BO_Shl \|\| LHSExprType->isFixedPointType())
	return;

	// When left shifting an ICE which is signed, we can check for overflow which
	// according to C++ standards prior to C++2a has undefined behavior
	// ([expr.shift] 5.8/2). Unsigned integers have defined behavior modulo one
	// more than the maximum value representable in the result type, so never
	// warn for those. (FIXME: Unsigned left-shift overflow in a constant
	// expression is still probably a bug.)
	Expr::EvalResult LHSResult;
	if (LHS.get()->isValueDependent() \|\|
	LHSType->hasUnsignedIntegerRepresentation() \|\|
	!LHS.get()->EvaluateAsInt(LHSResult, S.Context))
	return;
	llvm::APSInt Left = LHSResult.Val.getInt();

	// Don't warn if signed overflow is defined, then all the rest of the
	// diagnostics will not be triggered because the behavior is defined.
	// Also don't warn in C++20 mode (and newer), as signed left shifts
	// always wrap and never overflow.
	if (S.getLangOpts().isSignedOverflowDefined() \|\| S.getLangOpts().CPlusPlus20)
	return;

	// If LHS does not have a non-negative value then, the
	// behavior is undefined before C++2a. Warn about it.
	if (Left.isNegative()) {
	S.DiagRuntimeBehavior(Loc, LHS.get(),
	S.PDiag(diag::warn_shift_lhs_negative)
	<< LHS.get()->getSourceRange());
	return;
	}

	llvm::APInt ResultBits =
	static_cast<llvm::APInt &>(Right) + Left.getSignificantBits();
	if (ResultBits.ule(LeftSize))
	return;
	llvm::APSInt Result = Left.extend(ResultBits.getLimitedValue());
	Result = Result.shl(Right);

	// Print the bit representation of the signed integer as an unsigned
	// hexadecimal number.
	SmallString<40> HexResult;
	Result.toString(HexResult, 16, /Signed =/false, /Literal =/true);

	// If we are only missing a sign bit, this is less likely to result in actual
	// bugs -- if the result is cast back to an unsigned type, it will have the
	// expected value. Thus we place this behind a different warning that can be
	// turned off separately if needed.
	if (ResultBits - 1 == LeftSize) {
	S.Diag(Loc, diag::warn_shift_result_sets_sign_bit)
	<< HexResult << LHSType
	<< LHS.get()->getSourceRange() << RHS.get()->getSourceRange();
	return;
	}

	S.Diag(Loc, diag::warn_shift_result_gt_typewidth)
	<< HexResult.str() << Result.getSignificantBits() << LHSType
	<< Left.getBitWidth() << LHS.get()->getSourceRange()
	<< RHS.get()->getSourceRange();
	}

	/// Return the resulting type when a vector is shifted
	/// by a scalar or vector shift amount.
	static QualType checkVectorShift(Sema &S, ExprResult &LHS, ExprResult &RHS,
	SourceLocation Loc, bool IsCompAssign) {
	// OpenCL v1.1 s6.3.j says RHS can be a vector only if LHS is a vector.
	if ((S.LangOpts.OpenCL \|\| S.LangOpts.ZVector) &&
	!LHS.get()->getType()->isVectorType()) {
	S.Diag(Loc, diag::err_shift_rhs_only_vector)
	<< RHS.get()->getType() << LHS.get()->getType()
	<< LHS.get()->getSourceRange() << RHS.get()->getSourceRange();
	return QualType();
	}

	if (!IsCompAssign) {
	LHS = S.UsualUnaryConversions(LHS.get());
	if (LHS.isInvalid()) return QualType();
	}

	RHS = S.UsualUnaryConversions(RHS.get());
	if (RHS.isInvalid()) return QualType();

	QualType LHSType = LHS.get()->getType();
	// Note that LHS might be a scalar because the routine calls not only in
	// OpenCL case.
	const VectorType *LHSVecTy = LHSType->getAs<VectorType>();
	QualType LHSEleType = LHSVecTy ? LHSVecTy->getElementType() : LHSType;

	// Note that RHS might not be a vector.
	QualType RHSType = RHS.get()->getType();
	const VectorType *RHSVecTy = RHSType->getAs<VectorType>();
	QualType RHSEleType = RHSVecTy ? RHSVecTy->getElementType() : RHSType;

	// Do not allow shifts for boolean vectors.
	if ((LHSVecTy && LHSVecTy->isExtVectorBoolType()) \|\|
	(RHSVecTy && RHSVecTy->isExtVectorBoolType())) {
	S.Diag(Loc, diag::err_typecheck_invalid_operands)
	<< LHS.get()->getType() << RHS.get()->getType()
	<< LHS.get()->getSourceRange();
	return QualType();
	}

	// The operands need to be integers.
	if (!LHSEleType->isIntegerType()) {
	S.Diag(Loc, diag::err_typecheck_expect_int)
	<< LHS.get()->getType() << LHS.get()->getSourceRange();
	return QualType();
	}

	if (!RHSEleType->isIntegerType()) {
	S.Diag(Loc, diag::err_typecheck_expect_int)
	<< RHS.get()->getType() << RHS.get()->getSourceRange();
	return QualType();
	}

	if (!LHSVecTy) {
	assert(RHSVecTy);
	if (IsCompAssign)
	return RHSType;
	if (LHSEleType != RHSEleType) {
	LHS = S.ImpCastExprToType(LHS.get(),RHSEleType, CK_IntegralCast);
	LHSEleType = RHSEleType;
	}
	QualType VecTy =
	S.Context.getExtVectorType(LHSEleType, RHSVecTy->getNumElements());
	LHS = S.ImpCastExprToType(LHS.get(), VecTy, CK_VectorSplat);
	LHSType = VecTy;
	} else if (RHSVecTy) {
	// OpenCL v1.1 s6.3.j says that for vector types, the operators
	// are applied component-wise. So if RHS is a vector, then ensure
	// that the number of elements is the same as LHS...
	if (RHSVecTy->getNumElements() != LHSVecTy->getNumElements()) {
	S.Diag(Loc, diag::err_typecheck_vector_lengths_not_equal)
	<< LHS.get()->getType() << RHS.get()->getType()
	<< LHS.get()->getSourceRange() << RHS.get()->getSourceRange();
	return QualType();
	}
	if (!S.LangOpts.OpenCL && !S.LangOpts.ZVector) {
	const BuiltinType *LHSBT = LHSEleType->getAs<clang::BuiltinType>();
	const BuiltinType *RHSBT = RHSEleType->getAs<clang::BuiltinType>();
	if (LHSBT != RHSBT &&
	S.Context.getTypeSize(LHSBT) != S.Context.getTypeSize(RHSBT)) {
	S.Diag(Loc, diag::warn_typecheck_vector_element_sizes_not_equal)
	<< LHS.get()->getType() << RHS.get()->getType()
	<< LHS.get()->getSourceRange() << RHS.get()->getSourceRange();
	}
	}
	} else {
	// ...else expand RHS to match the number of elements in LHS.
	QualType VecTy =
	S.Context.getExtVectorType(RHSEleType, LHSVecTy->getNumElements());
	RHS = S.ImpCastExprToType(RHS.get(), VecTy, CK_VectorSplat);
	}

	return LHSType;
	}

	static QualType checkSizelessVectorShift(Sema &S, ExprResult &LHS,
	ExprResult &RHS, SourceLocation Loc,
	bool IsCompAssign) {
	if (!IsCompAssign) {
	LHS = S.UsualUnaryConversions(LHS.get());
	if (LHS.isInvalid())
	return QualType();
	}

	RHS = S.UsualUnaryConversions(RHS.get());
	if (RHS.isInvalid())
	return QualType();

	QualType LHSType = LHS.get()->getType();
	const BuiltinType *LHSBuiltinTy = LHSType->castAs<BuiltinType>();
	QualType LHSEleType = LHSType->isSveVLSBuiltinType()
	? LHSBuiltinTy->getSveEltType(S.getASTContext())
	: LHSType;

	// Note that RHS might not be a vector
	QualType RHSType = RHS.get()->getType();
	const BuiltinType *RHSBuiltinTy = RHSType->castAs<BuiltinType>();
	QualType RHSEleType = RHSType->isSveVLSBuiltinType()
	? RHSBuiltinTy->getSveEltType(S.getASTContext())
	: RHSType;

	if ((LHSBuiltinTy && LHSBuiltinTy->isSVEBool()) \|\|
	(RHSBuiltinTy && RHSBuiltinTy->isSVEBool())) {
	S.Diag(Loc, diag::err_typecheck_invalid_operands)
	<< LHSType << RHSType << LHS.get()->getSourceRange();
	return QualType();
	}

	if (!LHSEleType->isIntegerType()) {
	S.Diag(Loc, diag::err_typecheck_expect_int)
	<< LHS.get()->getType() << LHS.get()->getSourceRange();
	return QualType();
	}

	if (!RHSEleType->isIntegerType()) {
	S.Diag(Loc, diag::err_typecheck_expect_int)
	<< RHS.get()->getType() << RHS.get()->getSourceRange();
	return QualType();
	}

	if (LHSType->isSveVLSBuiltinType() && RHSType->isSveVLSBuiltinType() &&
	(S.Context.getBuiltinVectorTypeInfo(LHSBuiltinTy).EC !=
	S.Context.getBuiltinVectorTypeInfo(RHSBuiltinTy).EC)) {
	S.Diag(Loc, diag::err_typecheck_invalid_operands)
	<< LHSType << RHSType << LHS.get()->getSourceRange()
	<< RHS.get()->getSourceRange();
	return QualType();
	}

	if (!LHSType->isSveVLSBuiltinType()) {
	assert(RHSType->isSveVLSBuiltinType());
	if (IsCompAssign)
	return RHSType;
	if (LHSEleType != RHSEleType) {
	LHS = S.ImpCastExprToType(LHS.get(), RHSEleType, clang::CK_IntegralCast);
	LHSEleType = RHSEleType;
	}
	const llvm::ElementCount VecSize =
	S.Context.getBuiltinVectorTypeInfo(RHSBuiltinTy).EC;
	QualType VecTy =
	S.Context.getScalableVectorType(LHSEleType, VecSize.getKnownMinValue());
	LHS = S.ImpCastExprToType(LHS.get(), VecTy, clang::CK_VectorSplat);
	LHSType = VecTy;
	} else if (RHSBuiltinTy && RHSBuiltinTy->isSveVLSBuiltinType()) {
	if (S.Context.getTypeSize(RHSBuiltinTy) !=
	S.Context.getTypeSize(LHSBuiltinTy)) {
	S.Diag(Loc, diag::err_typecheck_vector_lengths_not_equal)
	<< LHSType << RHSType << LHS.get()->getSourceRange()
	<< RHS.get()->getSourceRange();
	return QualType();
	}
	} else {
	const llvm::ElementCount VecSize =
	S.Context.getBuiltinVectorTypeInfo(LHSBuiltinTy).EC;
	if (LHSEleType != RHSEleType) {
	RHS = S.ImpCastExprToType(RHS.get(), LHSEleType, clang::CK_IntegralCast);
	RHSEleType = LHSEleType;
	}
	QualType VecTy =
	S.Context.getScalableVectorType(RHSEleType, VecSize.getKnownMinValue());
	RHS = S.ImpCastExprToType(RHS.get(), VecTy, CK_VectorSplat);
	}

	return LHSType;
	}

	// C99 6.5.7
	QualType Sema::CheckShiftOperands(ExprResult &LHS, ExprResult &RHS,
	SourceLocation Loc, BinaryOperatorKind Opc,
	bool IsCompAssign) {
	checkArithmeticNull(this, LHS, RHS, Loc, /IsCompare=*/false);

	// Vector shifts promote their scalar inputs to vector type.
	if (LHS.get()->getType()->isVectorType() \|\|
	RHS.get()->getType()->isVectorType()) {
	if (LangOpts.ZVector) {
	// The shift operators for the z vector extensions work basically
	// like general shifts, except that neither the LHS nor the RHS is
	// allowed to be a "vector bool".
	if (auto LHSVecType = LHS.get()->getType()->getAs<VectorType>())
	if (LHSVecType->getVectorKind() == VectorKind::AltiVecBool)
	return InvalidOperands(Loc, LHS, RHS);
	if (auto RHSVecType = RHS.get()->getType()->getAs<VectorType>())
	if (RHSVecType->getVectorKind() == VectorKind::AltiVecBool)
	return InvalidOperands(Loc, LHS, RHS);
	}
	return checkVectorShift(*this, LHS, RHS, Loc, IsCompAssign);
	}

	if (LHS.get()->getType()->isSveVLSBuiltinType() \|\|
	RHS.get()->getType()->isSveVLSBuiltinType())
	return checkSizelessVectorShift(*this, LHS, RHS, Loc, IsCompAssign);

	// Shifts don't perform usual arithmetic conversions, they just do integer
	// promotions on each operand. C99 6.5.7p3

	// For the LHS, do usual unary conversions, but then reset them away
	// if this is a compound assignment.
	ExprResult OldLHS = LHS;
	LHS = UsualUnaryConversions(LHS.get());
	if (LHS.isInvalid())
	return QualType();
	QualType LHSType = LHS.get()->getType();
	if (IsCompAssign) LHS = OldLHS;

	// The RHS is simpler.
	RHS = UsualUnaryConversions(RHS.get());
	if (RHS.isInvalid())
	return QualType();
	QualType RHSType = RHS.get()->getType();

	// C99 6.5.7p2: Each of the operands shall have integer type.
	// Embedded-C 4.1.6.2.2: The LHS may also be fixed-point.
	if ((!LHSType->isFixedPointOrIntegerType() &&
	!LHSType->hasIntegerRepresentation()) \|\|
	!RHSType->hasIntegerRepresentation())
	return InvalidOperands(Loc, LHS, RHS);

	// C++0x: Don't allow scoped enums. FIXME: Use something better than
	// hasIntegerRepresentation() above instead of this.
	if (isScopedEnumerationType(LHSType) \|\|
	isScopedEnumerationType(RHSType)) {
	return InvalidOperands(Loc, LHS, RHS);
	}
	DiagnoseBadShiftValues(*this, LHS, RHS, Loc, Opc, LHSType);

	// "The type of the result is that of the promoted left operand."
	return LHSType;
	}

	/// Diagnose bad pointer comparisons.
	static void diagnoseDistinctPointerComparison(Sema &S, SourceLocation Loc,
	ExprResult &LHS, ExprResult &RHS,
	bool IsError) {
	S.Diag(Loc, IsError ? diag::err_typecheck_comparison_of_distinct_pointers
	: diag::ext_typecheck_comparison_of_distinct_pointers)
	<< LHS.get()->getType() << RHS.get()->getType()
	<< LHS.get()->getSourceRange() << RHS.get()->getSourceRange();
	}

	/// Returns false if the pointers are converted to a composite type,
	/// true otherwise.
	static bool convertPointersToCompositeType(Sema &S, SourceLocation Loc,
	ExprResult &LHS, ExprResult &RHS) {
	// C++ [expr.rel]p2:
	// [...] Pointer conversions (4.10) and qualification
	// conversions (4.4) are performed on pointer operands (or on
	// a pointer operand and a null pointer constant) to bring
	// them to their composite pointer type. [...]
	//
	// C++ [expr.eq]p1 uses the same notion for (in)equality
	// comparisons of pointers.

	QualType LHSType = LHS.get()->getType();
	QualType RHSType = RHS.get()->getType();
	assert(LHSType->isPointerType() \|\| RHSType->isPointerType() \|\|
	LHSType->isMemberPointerType() \|\| RHSType->isMemberPointerType());

	QualType T = S.FindCompositePointerType(Loc, LHS, RHS);
	if (T.isNull()) {
	if ((LHSType->isAnyPointerType() \|\| LHSType->isMemberPointerType()) &&
	(RHSType->isAnyPointerType() \|\| RHSType->isMemberPointerType()))
	diagnoseDistinctPointerComparison(S, Loc, LHS, RHS, /isError/true);
	else
	S.InvalidOperands(Loc, LHS, RHS);
	return true;
	}

	return false;
	}

	static void diagnoseFunctionPointerToVoidComparison(Sema &S, SourceLocation Loc,
	ExprResult &LHS,
	ExprResult &RHS,
	bool IsError) {
	S.Diag(Loc, IsError ? diag::err_typecheck_comparison_of_fptr_to_void
	: diag::ext_typecheck_comparison_of_fptr_to_void)
	<< LHS.get()->getType() << RHS.get()->getType()
	<< LHS.get()->getSourceRange() << RHS.get()->getSourceRange();
	}

	static bool isObjCObjectLiteral(ExprResult &E) {
	switch (E.get()->IgnoreParenImpCasts()->getStmtClass()) {
	case Stmt::ObjCArrayLiteralClass:
	case Stmt::ObjCDictionaryLiteralClass:
	case Stmt::ObjCStringLiteralClass:
	case Stmt::ObjCBoxedExprClass:
	return true;
	default:
	// Note that ObjCBoolLiteral is NOT an object literal!
	return false;
	}
	}

	static bool hasIsEqualMethod(Sema &S, const Expr LHS, const Expr RHS) {
	const ObjCObjectPointerType *Type =
	LHS->getType()->getAs<ObjCObjectPointerType>();

	// If this is not actually an Objective-C object, bail out.
	if (!Type)
	return false;

	// Get the LHS object's interface type.
	QualType InterfaceType = Type->getPointeeType();

	// If the RHS isn't an Objective-C object, bail out.
	if (!RHS->getType()->isObjCObjectPointerType())
	return false;

	// Try to find the -isEqual: method.
	Selector IsEqualSel = S.ObjC().NSAPIObj->getIsEqualSelector();
	ObjCMethodDecl *Method =
	S.ObjC().LookupMethodInObjectType(IsEqualSel, InterfaceType,
	/IsInstance=/true);
	if (!Method) {
	if (Type->isObjCIdType()) {
	// For 'id', just check the global pool.
	Method =
	S.ObjC().LookupInstanceMethodInGlobalPool(IsEqualSel, SourceRange(),
	/receiverId=/true);
	} else {
	// Check protocols.
	Method = S.ObjC().LookupMethodInQualifiedType(IsEqualSel, Type,
	/IsInstance=/true);
	}
	}

	if (!Method)
	return false;

	QualType T = Method->parameters()[0]->getType();
	if (!T->isObjCObjectPointerType())
	return false;

	QualType R = Method->getReturnType();
	if (!R->isScalarType())
	return false;

	return true;
	}

	static void diagnoseObjCLiteralComparison(Sema &S, SourceLocation Loc,
	ExprResult &LHS, ExprResult &RHS,
	BinaryOperator::Opcode Opc){
	Expr *Literal;
	Expr *Other;
	if (isObjCObjectLiteral(LHS)) {
	Literal = LHS.get();
	Other = RHS.get();
	} else {
	Literal = RHS.get();
	Other = LHS.get();
	}

	// Don't warn on comparisons against nil.
	Other = Other->IgnoreParenCasts();
	if (Other->isNullPointerConstant(S.getASTContext(),
	Expr::NPC_ValueDependentIsNotNull))
	return;

	// This should be kept in sync with warn_objc_literal_comparison.
	// LK_String should always be after the other literals, since it has its own
	// warning flag.
	SemaObjC::ObjCLiteralKind LiteralKind = S.ObjC().CheckLiteralKind(Literal);
	assert(LiteralKind != SemaObjC::LK_Block);
	if (LiteralKind == SemaObjC::LK_None) {
	llvm_unreachable("Unknown Objective-C object literal kind");
	}

	if (LiteralKind == SemaObjC::LK_String)
	S.Diag(Loc, diag::warn_objc_string_literal_comparison)
	<< Literal->getSourceRange();
	else
	S.Diag(Loc, diag::warn_objc_literal_comparison)
	<< LiteralKind << Literal->getSourceRange();

	if (BinaryOperator::isEqualityOp(Opc) &&
	hasIsEqualMethod(S, LHS.get(), RHS.get())) {
	SourceLocation Start = LHS.get()->getBeginLoc();
	SourceLocation End = S.getLocForEndOfToken(RHS.get()->getEndLoc());
	CharSourceRange OpRange =
	CharSourceRange::getCharRange(Loc, S.getLocForEndOfToken(Loc));

	S.Diag(Loc, diag::note_objc_literal_comparison_isequal)
	<< FixItHint::CreateInsertion(Start, Opc == BO_EQ ? "[" : "![")
	<< FixItHint::CreateReplacement(OpRange, " isEqual:")
	<< FixItHint::CreateInsertion(End, "]");
	}
	}

	/// Warns on !x < y, !x & y where !(x < y), !(x & y) was probably intended.
	static void diagnoseLogicalNotOnLHSofCheck(Sema &S, ExprResult &LHS,
	ExprResult &RHS, SourceLocation Loc,
	BinaryOperatorKind Opc) {
	// Check that left hand side is !something.
	UnaryOperator *UO = dyn_cast<UnaryOperator>(LHS.get()->IgnoreImpCasts());
	if (!UO \|\| UO->getOpcode() != UO_LNot) return;

	// Only check if the right hand side is non-bool arithmetic type.
	if (RHS.get()->isKnownToHaveBooleanValue()) return;

	// Make sure that the something in !something is not bool.
	Expr *SubExpr = UO->getSubExpr()->IgnoreImpCasts();
	if (SubExpr->isKnownToHaveBooleanValue()) return;

	// Emit warning.
	bool IsBitwiseOp = Opc == BO_And \|\| Opc == BO_Or \|\| Opc == BO_Xor;
	S.Diag(UO->getOperatorLoc(), diag::warn_logical_not_on_lhs_of_check)
	<< Loc << IsBitwiseOp;

	// First note suggest !(x < y)
	SourceLocation FirstOpen = SubExpr->getBeginLoc();
	SourceLocation FirstClose = RHS.get()->getEndLoc();
	FirstClose = S.getLocForEndOfToken(FirstClose);
	if (FirstClose.isInvalid())
	FirstOpen = SourceLocation();
	S.Diag(UO->getOperatorLoc(), diag::note_logical_not_fix)
	<< IsBitwiseOp
	<< FixItHint::CreateInsertion(FirstOpen, "(")
	<< FixItHint::CreateInsertion(FirstClose, ")");

	// Second note suggests (!x) < y
	SourceLocation SecondOpen = LHS.get()->getBeginLoc();
	SourceLocation SecondClose = LHS.get()->getEndLoc();
	SecondClose = S.getLocForEndOfToken(SecondClose);
	if (SecondClose.isInvalid())
	SecondOpen = SourceLocation();
	S.Diag(UO->getOperatorLoc(), diag::note_logical_not_silence_with_parens)
	<< FixItHint::CreateInsertion(SecondOpen, "(")
	<< FixItHint::CreateInsertion(SecondClose, ")");
	}

	// Returns true if E refers to a non-weak array.
	static bool checkForArray(const Expr *E) {
	const ValueDecl *D = nullptr;
	if (const DeclRefExpr *DR = dyn_cast<DeclRefExpr>(E)) {
	D = DR->getDecl();
	} else if (const MemberExpr *Mem = dyn_cast<MemberExpr>(E)) {
	if (Mem->isImplicitAccess())
	D = Mem->getMemberDecl();
	}
	if (!D)
	return false;
	return D->getType()->isArrayType() && !D->isWeak();
	}

	/// Diagnose some forms of syntactically-obvious tautological comparison.
	static void diagnoseTautologicalComparison(Sema &S, SourceLocation Loc,
	Expr LHS, Expr RHS,
	BinaryOperatorKind Opc) {
	Expr *LHSStripped = LHS->IgnoreParenImpCasts();
	Expr *RHSStripped = RHS->IgnoreParenImpCasts();

	QualType LHSType = LHS->getType();
	QualType RHSType = RHS->getType();
	if (LHSType->hasFloatingRepresentation() \|\|
	(LHSType->isBlockPointerType() && !BinaryOperator::isEqualityOp(Opc)) \|\|
	S.inTemplateInstantiation())
	return;

	// WebAssembly Tables cannot be compared, therefore shouldn't emit
	// Tautological diagnostics.
	if (LHSType->isWebAssemblyTableType() \|\| RHSType->isWebAssemblyTableType())
	return;

	// Comparisons between two array types are ill-formed for operator<=>, so
	// we shouldn't emit any additional warnings about it.
	if (Opc == BO_Cmp && LHSType->isArrayType() && RHSType->isArrayType())
	return;

	// For non-floating point types, check for self-comparisons of the form
	// x == x, x != x, x < x, etc. These always evaluate to a constant, and
	// often indicate logic errors in the program.
	//
	// NOTE: Don't warn about comparison expressions resulting from macro
	// expansion. Also don't warn about comparisons which are only self
	// comparisons within a template instantiation. The warnings should catch
	// obvious cases in the definition of the template anyways. The idea is to
	// warn when the typed comparison operator will always evaluate to the same
	// result.

	// Used for indexing into %select in warn_comparison_always
	enum {
	AlwaysConstant,
	AlwaysTrue,
	AlwaysFalse,
	AlwaysEqual, // std::strong_ordering::equal from operator<=>
	};

	// C++2a [depr.array.comp]:
	// Equality and relational comparisons ([expr.eq], [expr.rel]) between two
	// operands of array type are deprecated.
	if (S.getLangOpts().CPlusPlus20 && LHSStripped->getType()->isArrayType() &&
	RHSStripped->getType()->isArrayType()) {
	S.Diag(Loc, diag::warn_depr_array_comparison)
	<< LHS->getSourceRange() << RHS->getSourceRange()
	<< LHSStripped->getType() << RHSStripped->getType();
	// Carry on to produce the tautological comparison warning, if this
	// expression is potentially-evaluated, we can resolve the array to a
	// non-weak declaration, and so on.
	}

	if (!LHS->getBeginLoc().isMacroID() && !RHS->getBeginLoc().isMacroID()) {
	if (Expr::isSameComparisonOperand(LHS, RHS)) {
	unsigned Result;
	switch (Opc) {
	case BO_EQ:
	case BO_LE:
	case BO_GE:
	Result = AlwaysTrue;
	break;
	case BO_NE:
	case BO_LT:
	case BO_GT:
	Result = AlwaysFalse;
	break;
	case BO_Cmp:
	Result = AlwaysEqual;
	break;
	default:
	Result = AlwaysConstant;
	break;
	}
	S.DiagRuntimeBehavior(Loc, nullptr,
	S.PDiag(diag::warn_comparison_always)
	<< 0 /self-comparison/
	<< Result);
	} else if (checkForArray(LHSStripped) && checkForArray(RHSStripped)) {
	// What is it always going to evaluate to?
	unsigned Result;
	switch (Opc) {
	case BO_EQ: // e.g. array1 == array2
	Result = AlwaysFalse;
	break;
	case BO_NE: // e.g. array1 != array2
	Result = AlwaysTrue;
	break;
	default: // e.g. array1 <= array2
	// The best we can say is 'a constant'
	Result = AlwaysConstant;
	break;
	}
	S.DiagRuntimeBehavior(Loc, nullptr,
	S.PDiag(diag::warn_comparison_always)
	<< 1 /array comparison/
	<< Result);
	}
	}

	if (isa<CastExpr>(LHSStripped))
	LHSStripped = LHSStripped->IgnoreParenCasts();
	if (isa<CastExpr>(RHSStripped))
	RHSStripped = RHSStripped->IgnoreParenCasts();

	// Warn about comparisons against a string constant (unless the other
	// operand is null); the user probably wants string comparison function.
	Expr *LiteralString = nullptr;
	Expr *LiteralStringStripped = nullptr;
	if ((isa<StringLiteral>(LHSStripped) \|\| isa<ObjCEncodeExpr>(LHSStripped)) &&
	!RHSStripped->isNullPointerConstant(S.Context,
	Expr::NPC_ValueDependentIsNull)) {
	LiteralString = LHS;
	LiteralStringStripped = LHSStripped;
	} else if ((isa<StringLiteral>(RHSStripped) \|\|
	isa<ObjCEncodeExpr>(RHSStripped)) &&
	!LHSStripped->isNullPointerConstant(S.Context,
	Expr::NPC_ValueDependentIsNull)) {
	LiteralString = RHS;
	LiteralStringStripped = RHSStripped;
	}

	if (LiteralString) {
	S.DiagRuntimeBehavior(Loc, nullptr,
	S.PDiag(diag::warn_stringcompare)
	<< isa<ObjCEncodeExpr>(LiteralStringStripped)
	<< LiteralString->getSourceRange());
	}
	}

	static ImplicitConversionKind castKindToImplicitConversionKind(CastKind CK) {
	switch (CK) {
	default: {
	#ifndef NDEBUG
	llvm::errs() << "unhandled cast kind: " << CastExpr::getCastKindName(CK)
	<< "\n";
	#endif
	llvm_unreachable("unhandled cast kind");
	}
	case CK_UserDefinedConversion:
	return ICK_Identity;
	case CK_LValueToRValue:
	return ICK_Lvalue_To_Rvalue;
	case CK_ArrayToPointerDecay:
	return ICK_Array_To_Pointer;
	case CK_FunctionToPointerDecay:
	return ICK_Function_To_Pointer;
	case CK_IntegralCast:
	return ICK_Integral_Conversion;
	case CK_FloatingCast:
	return ICK_Floating_Conversion;
	case CK_IntegralToFloating:
	case CK_FloatingToIntegral:
	return ICK_Floating_Integral;
	case CK_IntegralComplexCast:
	case CK_FloatingComplexCast:
	case CK_FloatingComplexToIntegralComplex:
	case CK_IntegralComplexToFloatingComplex:
	return ICK_Complex_Conversion;
	case CK_FloatingComplexToReal:
	case CK_FloatingRealToComplex:
	case CK_IntegralComplexToReal:
	case CK_IntegralRealToComplex:
	return ICK_Complex_Real;
	case CK_HLSLArrayRValue:
	return ICK_HLSL_Array_RValue;
	}
	}

	static bool checkThreeWayNarrowingConversion(Sema &S, QualType ToType, Expr *E,
	QualType FromType,
	SourceLocation Loc) {
	// Check for a narrowing implicit conversion.
	StandardConversionSequence SCS;
	SCS.setAsIdentityConversion();
	SCS.setToType(0, FromType);
	SCS.setToType(1, ToType);
	if (const auto *ICE = dyn_cast<ImplicitCastExpr>(E))
	SCS.Second = castKindToImplicitConversionKind(ICE->getCastKind());

	APValue PreNarrowingValue;
	QualType PreNarrowingType;
	switch (SCS.getNarrowingKind(S.Context, E, PreNarrowingValue,
	PreNarrowingType,
	/IgnoreFloatToIntegralConversion/ true)) {
	case NK_Dependent_Narrowing:
	// Implicit conversion to a narrower type, but the expression is
	// value-dependent so we can't tell whether it's actually narrowing.
	case NK_Not_Narrowing:
	return false;

	case NK_Constant_Narrowing:
	// Implicit conversion to a narrower type, and the value is not a constant
	// expression.
	S.Diag(E->getBeginLoc(), diag::err_spaceship_argument_narrowing)
	<< /Constant/ 1
	<< PreNarrowingValue.getAsString(S.Context, PreNarrowingType) << ToType;
	return true;

	case NK_Variable_Narrowing:
	// Implicit conversion to a narrower type, and the value is not a constant
	// expression.
	case NK_Type_Narrowing:
	S.Diag(E->getBeginLoc(), diag::err_spaceship_argument_narrowing)
	<< /Constant/ 0 << FromType << ToType;
	// TODO: It's not a constant expression, but what if the user intended it
	// to be? Can we produce notes to help them figure out why it isn't?
	return true;
	}
	llvm_unreachable("unhandled case in switch");
	}

	static QualType checkArithmeticOrEnumeralThreeWayCompare(Sema &S,
	ExprResult &LHS,
	ExprResult &RHS,
	SourceLocation Loc) {
	QualType LHSType = LHS.get()->getType();
	QualType RHSType = RHS.get()->getType();
	// Dig out the original argument type and expression before implicit casts
	// were applied. These are the types/expressions we need to check the
	// [expr.spaceship] requirements against.
	ExprResult LHSStripped = LHS.get()->IgnoreParenImpCasts();
	ExprResult RHSStripped = RHS.get()->IgnoreParenImpCasts();
	QualType LHSStrippedType = LHSStripped.get()->getType();
	QualType RHSStrippedType = RHSStripped.get()->getType();

	// C++2a [expr.spaceship]p3: If one of the operands is of type bool and the
	// other is not, the program is ill-formed.
	if (LHSStrippedType->isBooleanType() != RHSStrippedType->isBooleanType()) {
	S.InvalidOperands(Loc, LHSStripped, RHSStripped);
	return QualType();
	}

	// FIXME: Consider combining this with checkEnumArithmeticConversions.
	int NumEnumArgs = (int)LHSStrippedType->isEnumeralType() +
	RHSStrippedType->isEnumeralType();
	if (NumEnumArgs == 1) {
	bool LHSIsEnum = LHSStrippedType->isEnumeralType();
	QualType OtherTy = LHSIsEnum ? RHSStrippedType : LHSStrippedType;
	if (OtherTy->hasFloatingRepresentation()) {
	S.InvalidOperands(Loc, LHSStripped, RHSStripped);
	return QualType();
	}
	}
	if (NumEnumArgs == 2) {
	// C++2a [expr.spaceship]p5: If both operands have the same enumeration
	// type E, the operator yields the result of converting the operands
	// to the underlying type of E and applying <=> to the converted operands.
	if (!S.Context.hasSameUnqualifiedType(LHSStrippedType, RHSStrippedType)) {
	S.InvalidOperands(Loc, LHS, RHS);
	return QualType();
	}
	QualType IntType =
	LHSStrippedType->castAs<EnumType>()->getDecl()->getIntegerType();
	assert(IntType->isArithmeticType());

	// We can't use `CK_IntegralCast` when the underlying type is 'bool', so we
	// promote the boolean type, and all other promotable integer types, to
	// avoid this.
	if (S.Context.isPromotableIntegerType(IntType))
	IntType = S.Context.getPromotedIntegerType(IntType);

	LHS = S.ImpCastExprToType(LHS.get(), IntType, CK_IntegralCast);
	RHS = S.ImpCastExprToType(RHS.get(), IntType, CK_IntegralCast);
	LHSType = RHSType = IntType;
	}

	// C++2a [expr.spaceship]p4: If both operands have arithmetic types, the
	// usual arithmetic conversions are applied to the operands.
	QualType Type =
	S.UsualArithmeticConversions(LHS, RHS, Loc, Sema::ACK_Comparison);
	if (LHS.isInvalid() \|\| RHS.isInvalid())
	return QualType();
	if (Type.isNull())
	return S.InvalidOperands(Loc, LHS, RHS);

	std::optional<ComparisonCategoryType> CCT =
	getComparisonCategoryForBuiltinCmp(Type);
	if (!CCT)
	return S.InvalidOperands(Loc, LHS, RHS);

	bool HasNarrowing = checkThreeWayNarrowingConversion(
	S, Type, LHS.get(), LHSType, LHS.get()->getBeginLoc());
	HasNarrowing \|= checkThreeWayNarrowingConversion(S, Type, RHS.get(), RHSType,
	RHS.get()->getBeginLoc());
	if (HasNarrowing)
	return QualType();

	assert(!Type.isNull() && "composite type for <=> has not been set");

	return S.CheckComparisonCategoryType(
	*CCT, Loc, Sema::ComparisonCategoryUsage::OperatorInExpression);
	}

	static QualType checkArithmeticOrEnumeralCompare(Sema &S, ExprResult &LHS,
	ExprResult &RHS,
	SourceLocation Loc,
	BinaryOperatorKind Opc) {
	if (Opc == BO_Cmp)
	return checkArithmeticOrEnumeralThreeWayCompare(S, LHS, RHS, Loc);

	// C99 6.5.8p3 / C99 6.5.9p4
	QualType Type =
	S.UsualArithmeticConversions(LHS, RHS, Loc, Sema::ACK_Comparison);
	if (LHS.isInvalid() \|\| RHS.isInvalid())
	return QualType();
	if (Type.isNull())
	return S.InvalidOperands(Loc, LHS, RHS);
	assert(Type->isArithmeticType() \|\| Type->isEnumeralType());

	if (Type->isAnyComplexType() && BinaryOperator::isRelationalOp(Opc))
	return S.InvalidOperands(Loc, LHS, RHS);

	// Check for comparisons of floating point operands using != and ==.
	if (Type->hasFloatingRepresentation())
	S.CheckFloatComparison(Loc, LHS.get(), RHS.get(), Opc);

	// The result of comparisons is 'bool' in C++, 'int' in C.
	return S.Context.getLogicalOperationType();
	}

	void Sema::CheckPtrComparisonWithNullChar(ExprResult &E, ExprResult &NullE) {
	if (!NullE.get()->getType()->isAnyPointerType())
	return;
	int NullValue = PP.isMacroDefined("NULL") ? 0 : 1;
	if (!E.get()->getType()->isAnyPointerType() &&
	E.get()->isNullPointerConstant(Context,
	Expr::NPC_ValueDependentIsNotNull) ==
	Expr::NPCK_ZeroExpression) {
	if (const auto *CL = dyn_cast<CharacterLiteral>(E.get())) {
	if (CL->getValue() == 0)
	Diag(E.get()->getExprLoc(), diag::warn_pointer_compare)
	<< NullValue
	<< FixItHint::CreateReplacement(E.get()->getExprLoc(),
	NullValue ? "NULL" : "(void *)0");
	} else if (const auto *CE = dyn_cast<CStyleCastExpr>(E.get())) {
	TypeSourceInfo *TI = CE->getTypeInfoAsWritten();
	QualType T = Context.getCanonicalType(TI->getType()).getUnqualifiedType();
	if (T == Context.CharTy)
	Diag(E.get()->getExprLoc(), diag::warn_pointer_compare)
	<< NullValue
	<< FixItHint::CreateReplacement(E.get()->getExprLoc(),
	NullValue ? "NULL" : "(void *)0");
	}
	}
	}

	// C99 6.5.8, C++ [expr.rel]
	QualType Sema::CheckCompareOperands(ExprResult &LHS, ExprResult &RHS,
	SourceLocation Loc,
	BinaryOperatorKind Opc) {
	bool IsRelational = BinaryOperator::isRelationalOp(Opc);
	bool IsThreeWay = Opc == BO_Cmp;
	bool IsOrdered = IsRelational \|\| IsThreeWay;
	auto IsAnyPointerType = [](ExprResult E) {
	QualType Ty = E.get()->getType();
	return Ty->isPointerType() \|\| Ty->isMemberPointerType();
	};

	// C++2a [expr.spaceship]p6: If at least one of the operands is of pointer
	// type, array-to-pointer, ..., conversions are performed on both operands to
	// bring them to their composite type.
	// Otherwise, all comparisons expect an rvalue, so convert to rvalue before
	// any type-related checks.
	if (!IsThreeWay \|\| IsAnyPointerType(LHS) \|\| IsAnyPointerType(RHS)) {
	LHS = DefaultFunctionArrayLvalueConversion(LHS.get());
	if (LHS.isInvalid())
	return QualType();
	RHS = DefaultFunctionArrayLvalueConversion(RHS.get());
	if (RHS.isInvalid())
	return QualType();
	} else {
	LHS = DefaultLvalueConversion(LHS.get());
	if (LHS.isInvalid())
	return QualType();
	RHS = DefaultLvalueConversion(RHS.get());
	if (RHS.isInvalid())
	return QualType();
	}

	checkArithmeticNull(this, LHS, RHS, Loc, /IsCompare=*/true);
	if (!getLangOpts().CPlusPlus && BinaryOperator::isEqualityOp(Opc)) {
	CheckPtrComparisonWithNullChar(LHS, RHS);
	CheckPtrComparisonWithNullChar(RHS, LHS);
	}

	// Handle vector comparisons separately.
	if (LHS.get()->getType()->isVectorType() \|\|
	RHS.get()->getType()->isVectorType())
	return CheckVectorCompareOperands(LHS, RHS, Loc, Opc);

	if (LHS.get()->getType()->isSveVLSBuiltinType() \|\|
	RHS.get()->getType()->isSveVLSBuiltinType())
	return CheckSizelessVectorCompareOperands(LHS, RHS, Loc, Opc);

	diagnoseLogicalNotOnLHSofCheck(*this, LHS, RHS, Loc, Opc);
	diagnoseTautologicalComparison(*this, Loc, LHS.get(), RHS.get(), Opc);

	QualType LHSType = LHS.get()->getType();
	QualType RHSType = RHS.get()->getType();
	if ((LHSType->isArithmeticType() \|\| LHSType->isEnumeralType()) &&
	(RHSType->isArithmeticType() \|\| RHSType->isEnumeralType()))
	return checkArithmeticOrEnumeralCompare(*this, LHS, RHS, Loc, Opc);

	if ((LHSType->isPointerType() &&
	LHSType->getPointeeType().isWebAssemblyReferenceType()) \|\|
	(RHSType->isPointerType() &&
	RHSType->getPointeeType().isWebAssemblyReferenceType()))
	return InvalidOperands(Loc, LHS, RHS);

	const Expr::NullPointerConstantKind LHSNullKind =
	LHS.get()->isNullPointerConstant(Context, Expr::NPC_ValueDependentIsNull);
	const Expr::NullPointerConstantKind RHSNullKind =
	RHS.get()->isNullPointerConstant(Context, Expr::NPC_ValueDependentIsNull);
	bool LHSIsNull = LHSNullKind != Expr::NPCK_NotNull;
	bool RHSIsNull = RHSNullKind != Expr::NPCK_NotNull;

	auto computeResultTy = [&]() {
	if (Opc != BO_Cmp)
	return Context.getLogicalOperationType();
	assert(getLangOpts().CPlusPlus);
	assert(Context.hasSameType(LHS.get()->getType(), RHS.get()->getType()));

	QualType CompositeTy = LHS.get()->getType();
	assert(!CompositeTy->isReferenceType());

	std::optional<ComparisonCategoryType> CCT =
	getComparisonCategoryForBuiltinCmp(CompositeTy);
	if (!CCT)
	return InvalidOperands(Loc, LHS, RHS);

	if (CompositeTy->isPointerType() && LHSIsNull != RHSIsNull) {
	// P0946R0: Comparisons between a null pointer constant and an object
	// pointer result in std::strong_equality, which is ill-formed under
	// P1959R0.
	Diag(Loc, diag::err_typecheck_three_way_comparison_of_pointer_and_zero)
	<< (LHSIsNull ? LHS.get()->getSourceRange()
	: RHS.get()->getSourceRange());
	return QualType();
	}

	return CheckComparisonCategoryType(
	*CCT, Loc, ComparisonCategoryUsage::OperatorInExpression);
	};

	if (!IsOrdered && LHSIsNull != RHSIsNull) {
	bool IsEquality = Opc == BO_EQ;
	if (RHSIsNull)
	DiagnoseAlwaysNonNullPointer(LHS.get(), RHSNullKind, IsEquality,
	RHS.get()->getSourceRange());
	else
	DiagnoseAlwaysNonNullPointer(RHS.get(), LHSNullKind, IsEquality,
	LHS.get()->getSourceRange());
	}

	if (IsOrdered && LHSType->isFunctionPointerType() &&
	RHSType->isFunctionPointerType()) {
	// Valid unless a relational comparison of function pointers
	bool IsError = Opc == BO_Cmp;
	auto DiagID =
	IsError ? diag::err_typecheck_ordered_comparison_of_function_pointers
	: getLangOpts().CPlusPlus
	? diag::warn_typecheck_ordered_comparison_of_function_pointers
	: diag::ext_typecheck_ordered_comparison_of_function_pointers;
	Diag(Loc, DiagID) << LHSType << RHSType << LHS.get()->getSourceRange()
	<< RHS.get()->getSourceRange();
	if (IsError)
	return QualType();
	}

	if ((LHSType->isIntegerType() && !LHSIsNull) \|\|
	(RHSType->isIntegerType() && !RHSIsNull)) {
	// Skip normal pointer conversion checks in this case; we have better
	// diagnostics for this below.
	} else if (getLangOpts().CPlusPlus) {
	// Equality comparison of a function pointer to a void pointer is invalid,
	// but we allow it as an extension.
	// FIXME: If we really want to allow this, should it be part of composite
	// pointer type computation so it works in conditionals too?
	if (!IsOrdered &&
	((LHSType->isFunctionPointerType() && RHSType->isVoidPointerType()) \|\|
	(RHSType->isFunctionPointerType() && LHSType->isVoidPointerType()))) {
	// This is a gcc extension compatibility comparison.
	// In a SFINAE context, we treat this as a hard error to maintain
	// conformance with the C++ standard.
	diagnoseFunctionPointerToVoidComparison(
	this, Loc, LHS, RHS, /isError*/ (bool)isSFINAEContext());

	if (isSFINAEContext())
	return QualType();

	RHS = ImpCastExprToType(RHS.get(), LHSType, CK_BitCast);
	return computeResultTy();
	}

	// C++ [expr.eq]p2:
	// If at least one operand is a pointer [...] bring them to their
	// composite pointer type.
	// C++ [expr.spaceship]p6
	// If at least one of the operands is of pointer type, [...] bring them
	// to their composite pointer type.
	// C++ [expr.rel]p2:
	// If both operands are pointers, [...] bring them to their composite
	// pointer type.
	// For <=>, the only valid non-pointer types are arrays and functions, and
	// we already decayed those, so this is really the same as the relational
	// comparison rule.
	if ((int)LHSType->isPointerType() + (int)RHSType->isPointerType() >=
	(IsOrdered ? 2 : 1) &&
	(!LangOpts.ObjCAutoRefCount \|\| !(LHSType->isObjCObjectPointerType() \|\|
	RHSType->isObjCObjectPointerType()))) {
	if (convertPointersToCompositeType(*this, Loc, LHS, RHS))
	return QualType();
	return computeResultTy();
	}
	} else if (LHSType->isPointerType() &&
	RHSType->isPointerType()) { // C99 6.5.8p2
	// All of the following pointer-related warnings are GCC extensions, except
	// when handling null pointer constants.
	QualType LCanPointeeTy =
	LHSType->castAs<PointerType>()->getPointeeType().getCanonicalType();
	QualType RCanPointeeTy =
	RHSType->castAs<PointerType>()->getPointeeType().getCanonicalType();

	// C99 6.5.9p2 and C99 6.5.8p2
	if (Context.typesAreCompatible(LCanPointeeTy.getUnqualifiedType(),
	RCanPointeeTy.getUnqualifiedType())) {
	if (IsRelational) {
	// Pointers both need to point to complete or incomplete types
	if ((LCanPointeeTy->isIncompleteType() !=
	RCanPointeeTy->isIncompleteType()) &&
	!getLangOpts().C11) {
	Diag(Loc, diag::ext_typecheck_compare_complete_incomplete_pointers)
	<< LHS.get()->getSourceRange() << RHS.get()->getSourceRange()
	<< LHSType << RHSType << LCanPointeeTy->isIncompleteType()
	<< RCanPointeeTy->isIncompleteType();
	}
	}
	} else if (!IsRelational &&
	(LCanPointeeTy->isVoidType() \|\| RCanPointeeTy->isVoidType())) {
	// Valid unless comparison between non-null pointer and function pointer
	if ((LCanPointeeTy->isFunctionType() \|\| RCanPointeeTy->isFunctionType())
	&& !LHSIsNull && !RHSIsNull)
	diagnoseFunctionPointerToVoidComparison(*this, Loc, LHS, RHS,
	/isError/false);
	} else {
	// Invalid
	diagnoseDistinctPointerComparison(this, Loc, LHS, RHS, /isError*/false);
	}
	if (LCanPointeeTy != RCanPointeeTy) {
	// Treat NULL constant as a special case in OpenCL.
	if (getLangOpts().OpenCL && !LHSIsNull && !RHSIsNull) {
	if (!LCanPointeeTy.isAddressSpaceOverlapping(RCanPointeeTy)) {
	Diag(Loc,
	diag::err_typecheck_op_on_nonoverlapping_address_space_pointers)
	<< LHSType << RHSType << 0 /* comparison */
	<< LHS.get()->getSourceRange() << RHS.get()->getSourceRange();
	}
	}
	LangAS AddrSpaceL = LCanPointeeTy.getAddressSpace();
	LangAS AddrSpaceR = RCanPointeeTy.getAddressSpace();
	CastKind Kind = AddrSpaceL != AddrSpaceR ? CK_AddressSpaceConversion
	: CK_BitCast;
	if (LHSIsNull && !RHSIsNull)
	LHS = ImpCastExprToType(LHS.get(), RHSType, Kind);
	else
	RHS = ImpCastExprToType(RHS.get(), LHSType, Kind);
	}
	return computeResultTy();
	}


	// C++ [expr.eq]p4:
	// Two operands of type std::nullptr_t or one operand of type
	// std::nullptr_t and the other a null pointer constant compare
	// equal.
	// C23 6.5.9p5:
	// If both operands have type nullptr_t or one operand has type nullptr_t
	// and the other is a null pointer constant, they compare equal if the
	// former is a null pointer.
	if (!IsOrdered && LHSIsNull && RHSIsNull) {
	if (LHSType->isNullPtrType()) {
	RHS = ImpCastExprToType(RHS.get(), LHSType, CK_NullToPointer);
	return computeResultTy();
	}
	if (RHSType->isNullPtrType()) {
	LHS = ImpCastExprToType(LHS.get(), RHSType, CK_NullToPointer);
	return computeResultTy();
	}
	}

	if (!getLangOpts().CPlusPlus && !IsOrdered && (LHSIsNull \|\| RHSIsNull)) {
	// C23 6.5.9p6:
	// Otherwise, at least one operand is a pointer. If one is a pointer and
	// the other is a null pointer constant or has type nullptr_t, they
	// compare equal
	if (LHSIsNull && RHSType->isPointerType()) {
	LHS = ImpCastExprToType(LHS.get(), RHSType, CK_NullToPointer);
	return computeResultTy();
	}
	if (RHSIsNull && LHSType->isPointerType()) {
	RHS = ImpCastExprToType(RHS.get(), LHSType, CK_NullToPointer);
	return computeResultTy();
	}
	}

	// Comparison of Objective-C pointers and block pointers against nullptr_t.
	// These aren't covered by the composite pointer type rules.
	if (!IsOrdered && RHSType->isNullPtrType() &&
	(LHSType->isObjCObjectPointerType() \|\| LHSType->isBlockPointerType())) {
	RHS = ImpCastExprToType(RHS.get(), LHSType, CK_NullToPointer);
	return computeResultTy();
	}
	if (!IsOrdered && LHSType->isNullPtrType() &&
	(RHSType->isObjCObjectPointerType() \|\| RHSType->isBlockPointerType())) {
	LHS = ImpCastExprToType(LHS.get(), RHSType, CK_NullToPointer);
	return computeResultTy();
	}

	if (getLangOpts().CPlusPlus) {
	if (IsRelational &&
	((LHSType->isNullPtrType() && RHSType->isPointerType()) \|\|
	(RHSType->isNullPtrType() && LHSType->isPointerType()))) {
	// HACK: Relational comparison of nullptr_t against a pointer type is
	// invalid per DR583, but we allow it within std::less<> and friends,
	// since otherwise common uses of it break.
	// FIXME: Consider removing this hack once LWG fixes std::less<> and
	// friends to have std::nullptr_t overload candidates.
	DeclContext *DC = CurContext;
	if (isa<FunctionDecl>(DC))
	DC = DC->getParent();
	if (auto *CTSD = dyn_cast<ClassTemplateSpecializationDecl>(DC)) {
	if (CTSD->isInStdNamespace() &&
	llvm::StringSwitch<bool>(CTSD->getName())
	.Cases("less", "less_equal", "greater", "greater_equal", true)
	.Default(false)) {
	if (RHSType->isNullPtrType())
	RHS = ImpCastExprToType(RHS.get(), LHSType, CK_NullToPointer);
	else
	LHS = ImpCastExprToType(LHS.get(), RHSType, CK_NullToPointer);
	return computeResultTy();
	}
	}
	}

	// C++ [expr.eq]p2:
	// If at least one operand is a pointer to member, [...] bring them to
	// their composite pointer type.
	if (!IsOrdered &&
	(LHSType->isMemberPointerType() \|\| RHSType->isMemberPointerType())) {
	if (convertPointersToCompositeType(*this, Loc, LHS, RHS))
	return QualType();
	else
	return computeResultTy();
	}
	}

	// Handle block pointer types.
	if (!IsOrdered && LHSType->isBlockPointerType() &&
	RHSType->isBlockPointerType()) {
	QualType lpointee = LHSType->castAs<BlockPointerType>()->getPointeeType();
	QualType rpointee = RHSType->castAs<BlockPointerType>()->getPointeeType();

	if (!LHSIsNull && !RHSIsNull &&
	!Context.typesAreCompatible(lpointee, rpointee)) {
	Diag(Loc, diag::err_typecheck_comparison_of_distinct_blocks)
	<< LHSType << RHSType << LHS.get()->getSourceRange()
	<< RHS.get()->getSourceRange();
	}
	RHS = ImpCastExprToType(RHS.get(), LHSType, CK_BitCast);
	return computeResultTy();
	}

	// Allow block pointers to be compared with null pointer constants.
	if (!IsOrdered
	&& ((LHSType->isBlockPointerType() && RHSType->isPointerType())
	\|\| (LHSType->isPointerType() && RHSType->isBlockPointerType()))) {
	if (!LHSIsNull && !RHSIsNull) {
	if (!((RHSType->isPointerType() && RHSType->castAs<PointerType>()
	->getPointeeType()->isVoidType())
	\|\| (LHSType->isPointerType() && LHSType->castAs<PointerType>()
	->getPointeeType()->isVoidType())))
	Diag(Loc, diag::err_typecheck_comparison_of_distinct_blocks)
	<< LHSType << RHSType << LHS.get()->getSourceRange()
	<< RHS.get()->getSourceRange();
	}
	if (LHSIsNull && !RHSIsNull)
	LHS = ImpCastExprToType(LHS.get(), RHSType,
	RHSType->isPointerType() ? CK_BitCast
	: CK_AnyPointerToBlockPointerCast);
	else
	RHS = ImpCastExprToType(RHS.get(), LHSType,
	LHSType->isPointerType() ? CK_BitCast
	: CK_AnyPointerToBlockPointerCast);
	return computeResultTy();
	}

	if (LHSType->isObjCObjectPointerType() \|\|
	RHSType->isObjCObjectPointerType()) {
	const PointerType *LPT = LHSType->getAs<PointerType>();
	const PointerType *RPT = RHSType->getAs<PointerType>();
	if (LPT \|\| RPT) {
	bool LPtrToVoid = LPT ? LPT->getPointeeType()->isVoidType() : false;
	bool RPtrToVoid = RPT ? RPT->getPointeeType()->isVoidType() : false;

	if (!LPtrToVoid && !RPtrToVoid &&
	!Context.typesAreCompatible(LHSType, RHSType)) {
	diagnoseDistinctPointerComparison(*this, Loc, LHS, RHS,
	/isError/false);
	}
	// FIXME: If LPtrToVoid, we should presumably convert the LHS rather than
	// the RHS, but we have test coverage for this behavior.
	// FIXME: Consider using convertPointersToCompositeType in C++.
	if (LHSIsNull && !RHSIsNull) {
	Expr *E = LHS.get();
	if (getLangOpts().ObjCAutoRefCount)
	ObjC().CheckObjCConversion(SourceRange(), RHSType, E,
	CheckedConversionKind::Implicit);
	LHS = ImpCastExprToType(E, RHSType,
	RPT ? CK_BitCast :CK_CPointerToObjCPointerCast);
	}
	else {
	Expr *E = RHS.get();
	if (getLangOpts().ObjCAutoRefCount)
	ObjC().CheckObjCConversion(SourceRange(), LHSType, E,
	CheckedConversionKind::Implicit,
	/Diagnose=/true,
	/DiagnoseCFAudited=/false, Opc);
	RHS = ImpCastExprToType(E, LHSType,
	LPT ? CK_BitCast :CK_CPointerToObjCPointerCast);
	}
	return computeResultTy();
	}
	if (LHSType->isObjCObjectPointerType() &&
	RHSType->isObjCObjectPointerType()) {
	if (!Context.areComparableObjCPointerTypes(LHSType, RHSType))
	diagnoseDistinctPointerComparison(*this, Loc, LHS, RHS,
	/isError/false);
	if (isObjCObjectLiteral(LHS) \|\| isObjCObjectLiteral(RHS))
	diagnoseObjCLiteralComparison(*this, Loc, LHS, RHS, Opc);

	if (LHSIsNull && !RHSIsNull)
	LHS = ImpCastExprToType(LHS.get(), RHSType, CK_BitCast);
	else
	RHS = ImpCastExprToType(RHS.get(), LHSType, CK_BitCast);
	return computeResultTy();
	}

	if (!IsOrdered && LHSType->isBlockPointerType() &&
	RHSType->isBlockCompatibleObjCPointerType(Context)) {
	LHS = ImpCastExprToType(LHS.get(), RHSType,
	CK_BlockPointerToObjCPointerCast);
	return computeResultTy();
	} else if (!IsOrdered &&
	LHSType->isBlockCompatibleObjCPointerType(Context) &&
	RHSType->isBlockPointerType()) {
	RHS = ImpCastExprToType(RHS.get(), LHSType,
	CK_BlockPointerToObjCPointerCast);
	return computeResultTy();
	}
	}
	if ((LHSType->isAnyPointerType() && RHSType->isIntegerType()) \|\|
	(LHSType->isIntegerType() && RHSType->isAnyPointerType())) {
	unsigned DiagID = 0;
	bool isError = false;
	if (LangOpts.DebuggerSupport) {
	// Under a debugger, allow the comparison of pointers to integers,
	// since users tend to want to compare addresses.
	} else if ((LHSIsNull && LHSType->isIntegerType()) \|\|
	(RHSIsNull && RHSType->isIntegerType())) {
	if (IsOrdered) {
	isError = getLangOpts().CPlusPlus;
	DiagID =
	isError ? diag::err_typecheck_ordered_comparison_of_pointer_and_zero
	: diag::ext_typecheck_ordered_comparison_of_pointer_and_zero;
	}
	} else if (getLangOpts().CPlusPlus) {
	DiagID = diag::err_typecheck_comparison_of_pointer_integer;
	isError = true;
	} else if (IsOrdered)
	DiagID = diag::ext_typecheck_ordered_comparison_of_pointer_integer;
	else
	DiagID = diag::ext_typecheck_comparison_of_pointer_integer;

	if (DiagID) {
	Diag(Loc, DiagID)
	<< LHSType << RHSType << LHS.get()->getSourceRange()
	<< RHS.get()->getSourceRange();
	if (isError)
	return QualType();
	}

	if (LHSType->isIntegerType())
	LHS = ImpCastExprToType(LHS.get(), RHSType,
	LHSIsNull ? CK_NullToPointer : CK_IntegralToPointer);
	else
	RHS = ImpCastExprToType(RHS.get(), LHSType,
	RHSIsNull ? CK_NullToPointer : CK_IntegralToPointer);
	return computeResultTy();
	}

	// Handle block pointers.
	if (!IsOrdered && RHSIsNull
	&& LHSType->isBlockPointerType() && RHSType->isIntegerType()) {
	RHS = ImpCastExprToType(RHS.get(), LHSType, CK_NullToPointer);
	return computeResultTy();
	}
	if (!IsOrdered && LHSIsNull
	&& LHSType->isIntegerType() && RHSType->isBlockPointerType()) {
	LHS = ImpCastExprToType(LHS.get(), RHSType, CK_NullToPointer);
	return computeResultTy();
	}

	if (getLangOpts().getOpenCLCompatibleVersion() >= 200) {
	if (LHSType->isClkEventT() && RHSType->isClkEventT()) {
	return computeResultTy();
	}

	if (LHSType->isQueueT() && RHSType->isQueueT()) {
	return computeResultTy();
	}

	if (LHSIsNull && RHSType->isQueueT()) {
	LHS = ImpCastExprToType(LHS.get(), RHSType, CK_NullToPointer);
	return computeResultTy();
	}

	if (LHSType->isQueueT() && RHSIsNull) {
	RHS = ImpCastExprToType(RHS.get(), LHSType, CK_NullToPointer);
	return computeResultTy();
	}
	}

	return InvalidOperands(Loc, LHS, RHS);
	}

	QualType Sema::GetSignedVectorType(QualType V) {
	const VectorType *VTy = V->castAs<VectorType>();
	unsigned TypeSize = Context.getTypeSize(VTy->getElementType());

	if (isa<ExtVectorType>(VTy)) {
	if (VTy->isExtVectorBoolType())
	return Context.getExtVectorType(Context.BoolTy, VTy->getNumElements());
	if (TypeSize == Context.getTypeSize(Context.CharTy))
	return Context.getExtVectorType(Context.CharTy, VTy->getNumElements());
	if (TypeSize == Context.getTypeSize(Context.ShortTy))
	return Context.getExtVectorType(Context.ShortTy, VTy->getNumElements());
	if (TypeSize == Context.getTypeSize(Context.IntTy))
	return Context.getExtVectorType(Context.IntTy, VTy->getNumElements());
	if (TypeSize == Context.getTypeSize(Context.Int128Ty))
	return Context.getExtVectorType(Context.Int128Ty, VTy->getNumElements());
	if (TypeSize == Context.getTypeSize(Context.LongTy))
	return Context.getExtVectorType(Context.LongTy, VTy->getNumElements());
	assert(TypeSize == Context.getTypeSize(Context.LongLongTy) &&
	"Unhandled vector element size in vector compare");
	return Context.getExtVectorType(Context.LongLongTy, VTy->getNumElements());
	}

	if (TypeSize == Context.getTypeSize(Context.Int128Ty))
	return Context.getVectorType(Context.Int128Ty, VTy->getNumElements(),
	VectorKind::Generic);
	if (TypeSize == Context.getTypeSize(Context.LongLongTy))
	return Context.getVectorType(Context.LongLongTy, VTy->getNumElements(),
	VectorKind::Generic);
	if (TypeSize == Context.getTypeSize(Context.LongTy))
	return Context.getVectorType(Context.LongTy, VTy->getNumElements(),
	VectorKind::Generic);
	if (TypeSize == Context.getTypeSize(Context.IntTy))
	return Context.getVectorType(Context.IntTy, VTy->getNumElements(),
	VectorKind::Generic);
	if (TypeSize == Context.getTypeSize(Context.ShortTy))
	return Context.getVectorType(Context.ShortTy, VTy->getNumElements(),
	VectorKind::Generic);
	assert(TypeSize == Context.getTypeSize(Context.CharTy) &&
	"Unhandled vector element size in vector compare");
	return Context.getVectorType(Context.CharTy, VTy->getNumElements(),
	VectorKind::Generic);
	}

	QualType Sema::GetSignedSizelessVectorType(QualType V) {
	const BuiltinType *VTy = V->castAs<BuiltinType>();
	assert(VTy->isSizelessBuiltinType() && "expected sizeless type");

	const QualType ETy = V->getSveEltType(Context);
	const auto TypeSize = Context.getTypeSize(ETy);

	const QualType IntTy = Context.getIntTypeForBitwidth(TypeSize, true);
	const llvm::ElementCount VecSize = Context.getBuiltinVectorTypeInfo(VTy).EC;
	return Context.getScalableVectorType(IntTy, VecSize.getKnownMinValue());
	}

	QualType Sema::CheckVectorCompareOperands(ExprResult &LHS, ExprResult &RHS,
	SourceLocation Loc,
	BinaryOperatorKind Opc) {
	if (Opc == BO_Cmp) {
	Diag(Loc, diag::err_three_way_vector_comparison);
	return QualType();
	}

	// Check to make sure we're operating on vectors of the same type and width,
	// Allowing one side to be a scalar of element type.
	QualType vType =
	CheckVectorOperands(LHS, RHS, Loc, /isCompAssign/ false,
	/AllowBothBool/ true,
	/AllowBoolConversions/ getLangOpts().ZVector,
	/AllowBooleanOperation/ true,
	/ReportInvalid/ true);
	if (vType.isNull())
	return vType;

	QualType LHSType = LHS.get()->getType();

	// Determine the return type of a vector compare. By default clang will return
	// a scalar for all vector compares except vector bool and vector pixel.
	// With the gcc compiler we will always return a vector type and with the xl
	// compiler we will always return a scalar type. This switch allows choosing
	// which behavior is prefered.
	if (getLangOpts().AltiVec) {
	switch (getLangOpts().getAltivecSrcCompat()) {
	case LangOptions::AltivecSrcCompatKind::Mixed:
	// If AltiVec, the comparison results in a numeric type, i.e.
	// bool for C++, int for C
	if (vType->castAs<VectorType>()->getVectorKind() ==
	VectorKind::AltiVecVector)
	return Context.getLogicalOperationType();
	else
	Diag(Loc, diag::warn_deprecated_altivec_src_compat);
	break;
	case LangOptions::AltivecSrcCompatKind::GCC:
	// For GCC we always return the vector type.
	break;
	case LangOptions::AltivecSrcCompatKind::XL:
	return Context.getLogicalOperationType();
	break;
	}
	}

	// For non-floating point types, check for self-comparisons of the form
	// x == x, x != x, x < x, etc. These always evaluate to a constant, and
	// often indicate logic errors in the program.
	diagnoseTautologicalComparison(*this, Loc, LHS.get(), RHS.get(), Opc);

	// Check for comparisons of floating point operands using != and ==.
	if (LHSType->hasFloatingRepresentation()) {
	assert(RHS.get()->getType()->hasFloatingRepresentation());
	CheckFloatComparison(Loc, LHS.get(), RHS.get(), Opc);
	}

	// Return a signed type for the vector.
	return GetSignedVectorType(vType);
	}

	QualType Sema::CheckSizelessVectorCompareOperands(ExprResult &LHS,
	ExprResult &RHS,
	SourceLocation Loc,
	BinaryOperatorKind Opc) {
	if (Opc == BO_Cmp) {
	Diag(Loc, diag::err_three_way_vector_comparison);
	return QualType();
	}

	// Check to make sure we're operating on vectors of the same type and width,
	// Allowing one side to be a scalar of element type.
	QualType vType = CheckSizelessVectorOperands(
	LHS, RHS, Loc, /isCompAssign/ false, ACK_Comparison);

	if (vType.isNull())
	return vType;

	QualType LHSType = LHS.get()->getType();

	// For non-floating point types, check for self-comparisons of the form
	// x == x, x != x, x < x, etc. These always evaluate to a constant, and
	// often indicate logic errors in the program.
	diagnoseTautologicalComparison(*this, Loc, LHS.get(), RHS.get(), Opc);

	// Check for comparisons of floating point operands using != and ==.
	if (LHSType->hasFloatingRepresentation()) {
	assert(RHS.get()->getType()->hasFloatingRepresentation());
	CheckFloatComparison(Loc, LHS.get(), RHS.get(), Opc);
	}

	const BuiltinType *LHSBuiltinTy = LHSType->getAs<BuiltinType>();
	const BuiltinType *RHSBuiltinTy = RHS.get()->getType()->getAs<BuiltinType>();

	if (LHSBuiltinTy && RHSBuiltinTy && LHSBuiltinTy->isSVEBool() &&
	RHSBuiltinTy->isSVEBool())
	return LHSType;

	// Return a signed type for the vector.
	return GetSignedSizelessVectorType(vType);
	}

	static void diagnoseXorMisusedAsPow(Sema &S, const ExprResult &XorLHS,
	const ExprResult &XorRHS,
	const SourceLocation Loc) {
	// Do not diagnose macros.
	if (Loc.isMacroID())
	return;

	// Do not diagnose if both LHS and RHS are macros.
	if (XorLHS.get()->getExprLoc().isMacroID() &&
	XorRHS.get()->getExprLoc().isMacroID())
	return;

	bool Negative = false;
	bool ExplicitPlus = false;
	const auto *LHSInt = dyn_cast<IntegerLiteral>(XorLHS.get());
	const auto *RHSInt = dyn_cast<IntegerLiteral>(XorRHS.get());

	if (!LHSInt)
	return;
	if (!RHSInt) {
	// Check negative literals.
	if (const auto *UO = dyn_cast<UnaryOperator>(XorRHS.get())) {
	UnaryOperatorKind Opc = UO->getOpcode();
	if (Opc != UO_Minus && Opc != UO_Plus)
	return;
	RHSInt = dyn_cast<IntegerLiteral>(UO->getSubExpr());
	if (!RHSInt)
	return;
	Negative = (Opc == UO_Minus);
	ExplicitPlus = !Negative;
	} else {
	return;
	}
	}

	const llvm::APInt &LeftSideValue = LHSInt->getValue();
	llvm::APInt RightSideValue = RHSInt->getValue();
	if (LeftSideValue != 2 && LeftSideValue != 10)
	return;

	if (LeftSideValue.getBitWidth() != RightSideValue.getBitWidth())
	return;

	CharSourceRange ExprRange = CharSourceRange::getCharRange(
	LHSInt->getBeginLoc(), S.getLocForEndOfToken(RHSInt->getLocation()));
	llvm::StringRef ExprStr =
	Lexer::getSourceText(ExprRange, S.getSourceManager(), S.getLangOpts());

	CharSourceRange XorRange =
	CharSourceRange::getCharRange(Loc, S.getLocForEndOfToken(Loc));
	llvm::StringRef XorStr =
	Lexer::getSourceText(XorRange, S.getSourceManager(), S.getLangOpts());
	// Do not diagnose if xor keyword/macro is used.
	if (XorStr == "xor")
	return;

	std::string LHSStr = std::string(Lexer::getSourceText(
	CharSourceRange::getTokenRange(LHSInt->getSourceRange()),
	S.getSourceManager(), S.getLangOpts()));
	std::string RHSStr = std::string(Lexer::getSourceText(
	CharSourceRange::getTokenRange(RHSInt->getSourceRange()),
	S.getSourceManager(), S.getLangOpts()));

	if (Negative) {
	RightSideValue = -RightSideValue;
	RHSStr = "-" + RHSStr;
	} else if (ExplicitPlus) {
	RHSStr = "+" + RHSStr;
	}

	StringRef LHSStrRef = LHSStr;
	StringRef RHSStrRef = RHSStr;
	// Do not diagnose literals with digit separators, binary, hexadecimal, octal
	// literals.
	if (LHSStrRef.starts_with("0b") \|\| LHSStrRef.starts_with("0B") \|\|
	RHSStrRef.starts_with("0b") \|\| RHSStrRef.starts_with("0B") \|\|
	LHSStrRef.starts_with("0x") \|\| LHSStrRef.starts_with("0X") \|\|
	RHSStrRef.starts_with("0x") \|\| RHSStrRef.starts_with("0X") \|\|
	(LHSStrRef.size() > 1 && LHSStrRef.starts_with("0")) \|\|
	(RHSStrRef.size() > 1 && RHSStrRef.starts_with("0")) \|\|
	LHSStrRef.contains('\'') \|\| RHSStrRef.contains('\''))
	return;

	bool SuggestXor =
	S.getLangOpts().CPlusPlus \|\| S.getPreprocessor().isMacroDefined("xor");
	const llvm::APInt XorValue = LeftSideValue ^ RightSideValue;
	int64_t RightSideIntValue = RightSideValue.getSExtValue();
	if (LeftSideValue == 2 && RightSideIntValue >= 0) {
	std::string SuggestedExpr = "1 << " + RHSStr;
	bool Overflow = false;
	llvm::APInt One = (LeftSideValue - 1);
	llvm::APInt PowValue = One.sshl_ov(RightSideValue, Overflow);
	if (Overflow) {
	if (RightSideIntValue < 64)
	S.Diag(Loc, diag::warn_xor_used_as_pow_base)
	<< ExprStr << toString(XorValue, 10, true) << ("1LL << " + RHSStr)
	<< FixItHint::CreateReplacement(ExprRange, "1LL << " + RHSStr);
	else if (RightSideIntValue == 64)
	S.Diag(Loc, diag::warn_xor_used_as_pow)
	<< ExprStr << toString(XorValue, 10, true);
	else
	return;
	} else {
	S.Diag(Loc, diag::warn_xor_used_as_pow_base_extra)
	<< ExprStr << toString(XorValue, 10, true) << SuggestedExpr
	<< toString(PowValue, 10, true)
	<< FixItHint::CreateReplacement(
	ExprRange, (RightSideIntValue == 0) ? "1" : SuggestedExpr);
	}

	S.Diag(Loc, diag::note_xor_used_as_pow_silence)
	<< ("0x2 ^ " + RHSStr) << SuggestXor;
	} else if (LeftSideValue == 10) {
	std::string SuggestedValue = "1e" + std::to_string(RightSideIntValue);
	S.Diag(Loc, diag::warn_xor_used_as_pow_base)
	<< ExprStr << toString(XorValue, 10, true) << SuggestedValue
	<< FixItHint::CreateReplacement(ExprRange, SuggestedValue);
	S.Diag(Loc, diag::note_xor_used_as_pow_silence)
	<< ("0xA ^ " + RHSStr) << SuggestXor;
	}
	}

	QualType Sema::CheckVectorLogicalOperands(ExprResult &LHS, ExprResult &RHS,
	SourceLocation Loc) {
	// Ensure that either both operands are of the same vector type, or
	// one operand is of a vector type and the other is of its element type.
	QualType vType = CheckVectorOperands(LHS, RHS, Loc, false,
	/AllowBothBool/ true,
	/AllowBoolConversions/ false,
	/AllowBooleanOperation/ false,
	/ReportInvalid/ false);
	if (vType.isNull())
	return InvalidOperands(Loc, LHS, RHS);
	if (getLangOpts().OpenCL &&
	getLangOpts().getOpenCLCompatibleVersion() < 120 &&
	vType->hasFloatingRepresentation())
	return InvalidOperands(Loc, LHS, RHS);
	// FIXME: The check for C++ here is for GCC compatibility. GCC rejects the
	// usage of the logical operators && and \|\| with vectors in C. This
	// check could be notionally dropped.
	if (!getLangOpts().CPlusPlus &&
	!(isa<ExtVectorType>(vType->getAs<VectorType>())))
	return InvalidLogicalVectorOperands(Loc, LHS, RHS);

	return GetSignedVectorType(LHS.get()->getType());
	}

	QualType Sema::CheckMatrixElementwiseOperands(ExprResult &LHS, ExprResult &RHS,
	SourceLocation Loc,
	bool IsCompAssign) {
	if (!IsCompAssign) {
	LHS = DefaultFunctionArrayLvalueConversion(LHS.get());
	if (LHS.isInvalid())
	return QualType();
	}
	RHS = DefaultFunctionArrayLvalueConversion(RHS.get());
	if (RHS.isInvalid())
	return QualType();

	// For conversion purposes, we ignore any qualifiers.
	// For example, "const float" and "float" are equivalent.
	QualType LHSType = LHS.get()->getType().getUnqualifiedType();
	QualType RHSType = RHS.get()->getType().getUnqualifiedType();

	const MatrixType *LHSMatType = LHSType->getAs<MatrixType>();
	const MatrixType *RHSMatType = RHSType->getAs<MatrixType>();
	assert((LHSMatType \|\| RHSMatType) && "At least one operand must be a matrix");

	if (Context.hasSameType(LHSType, RHSType))
	return Context.getCommonSugaredType(LHSType, RHSType);

	// Type conversion may change LHS/RHS. Keep copies to the original results, in
	// case we have to return InvalidOperands.
	ExprResult OriginalLHS = LHS;
	ExprResult OriginalRHS = RHS;
	if (LHSMatType && !RHSMatType) {
	RHS = tryConvertExprToType(RHS.get(), LHSMatType->getElementType());
	if (!RHS.isInvalid())
	return LHSType;

	return InvalidOperands(Loc, OriginalLHS, OriginalRHS);
	}

	if (!LHSMatType && RHSMatType) {
	LHS = tryConvertExprToType(LHS.get(), RHSMatType->getElementType());
	if (!LHS.isInvalid())
	return RHSType;
	return InvalidOperands(Loc, OriginalLHS, OriginalRHS);
	}

	return InvalidOperands(Loc, LHS, RHS);
	}

	QualType Sema::CheckMatrixMultiplyOperands(ExprResult &LHS, ExprResult &RHS,
	SourceLocation Loc,
	bool IsCompAssign) {
	if (!IsCompAssign) {
	LHS = DefaultFunctionArrayLvalueConversion(LHS.get());
	if (LHS.isInvalid())
	return QualType();
	}
	RHS = DefaultFunctionArrayLvalueConversion(RHS.get());
	if (RHS.isInvalid())
	return QualType();

	auto *LHSMatType = LHS.get()->getType()->getAs<ConstantMatrixType>();
	auto *RHSMatType = RHS.get()->getType()->getAs<ConstantMatrixType>();
	assert((LHSMatType \|\| RHSMatType) && "At least one operand must be a matrix");

	if (LHSMatType && RHSMatType) {
	if (LHSMatType->getNumColumns() != RHSMatType->getNumRows())
	return InvalidOperands(Loc, LHS, RHS);

	if (Context.hasSameType(LHSMatType, RHSMatType))
	return Context.getCommonSugaredType(
	LHS.get()->getType().getUnqualifiedType(),
	RHS.get()->getType().getUnqualifiedType());

	QualType LHSELTy = LHSMatType->getElementType(),
	RHSELTy = RHSMatType->getElementType();
	if (!Context.hasSameType(LHSELTy, RHSELTy))
	return InvalidOperands(Loc, LHS, RHS);

	return Context.getConstantMatrixType(
	Context.getCommonSugaredType(LHSELTy, RHSELTy),
	LHSMatType->getNumRows(), RHSMatType->getNumColumns());
	}
	return CheckMatrixElementwiseOperands(LHS, RHS, Loc, IsCompAssign);
	}

	static bool isLegalBoolVectorBinaryOp(BinaryOperatorKind Opc) {
	switch (Opc) {
	default:
	return false;
	case BO_And:
	case BO_AndAssign:
	case BO_Or:
	case BO_OrAssign:
	case BO_Xor:
	case BO_XorAssign:
	return true;
	}
	}

	inline QualType Sema::CheckBitwiseOperands(ExprResult &LHS, ExprResult &RHS,
	SourceLocation Loc,
	BinaryOperatorKind Opc) {
	checkArithmeticNull(this, LHS, RHS, Loc, /IsCompare=*/false);

	bool IsCompAssign =
	Opc == BO_AndAssign \|\| Opc == BO_OrAssign \|\| Opc == BO_XorAssign;

	bool LegalBoolVecOperator = isLegalBoolVectorBinaryOp(Opc);

	if (LHS.get()->getType()->isVectorType() \|\|
	RHS.get()->getType()->isVectorType()) {
	if (LHS.get()->getType()->hasIntegerRepresentation() &&
	RHS.get()->getType()->hasIntegerRepresentation())
	return CheckVectorOperands(LHS, RHS, Loc, IsCompAssign,
	/AllowBothBool/ true,
	/AllowBoolConversions/ getLangOpts().ZVector,
	/AllowBooleanOperation/ LegalBoolVecOperator,
	/ReportInvalid/ true);
	return InvalidOperands(Loc, LHS, RHS);
	}

	if (LHS.get()->getType()->isSveVLSBuiltinType() \|\|
	RHS.get()->getType()->isSveVLSBuiltinType()) {
	if (LHS.get()->getType()->hasIntegerRepresentation() &&
	RHS.get()->getType()->hasIntegerRepresentation())
	return CheckSizelessVectorOperands(LHS, RHS, Loc, IsCompAssign,
	ACK_BitwiseOp);
	return InvalidOperands(Loc, LHS, RHS);
	}

	if (LHS.get()->getType()->isSveVLSBuiltinType() \|\|
	RHS.get()->getType()->isSveVLSBuiltinType()) {
	if (LHS.get()->getType()->hasIntegerRepresentation() &&
	RHS.get()->getType()->hasIntegerRepresentation())
	return CheckSizelessVectorOperands(LHS, RHS, Loc, IsCompAssign,
	ACK_BitwiseOp);
	return InvalidOperands(Loc, LHS, RHS);
	}

	if (Opc == BO_And)
	diagnoseLogicalNotOnLHSofCheck(*this, LHS, RHS, Loc, Opc);

	if (LHS.get()->getType()->hasFloatingRepresentation() \|\|
	RHS.get()->getType()->hasFloatingRepresentation())
	return InvalidOperands(Loc, LHS, RHS);

	ExprResult LHSResult = LHS, RHSResult = RHS;
	QualType compType = UsualArithmeticConversions(
	LHSResult, RHSResult, Loc, IsCompAssign ? ACK_CompAssign : ACK_BitwiseOp);
	if (LHSResult.isInvalid() \|\| RHSResult.isInvalid())
	return QualType();
	LHS = LHSResult.get();
	RHS = RHSResult.get();

	if (Opc == BO_Xor)
	diagnoseXorMisusedAsPow(*this, LHS, RHS, Loc);

	if (!compType.isNull() && compType->isIntegralOrUnscopedEnumerationType())
	return compType;
	return InvalidOperands(Loc, LHS, RHS);
	}

	// C99 6.5.[13,14]
	inline QualType Sema::CheckLogicalOperands(ExprResult &LHS, ExprResult &RHS,
	SourceLocation Loc,
	BinaryOperatorKind Opc) {
	// Check vector operands differently.
	if (LHS.get()->getType()->isVectorType() \|\|
	RHS.get()->getType()->isVectorType())
	return CheckVectorLogicalOperands(LHS, RHS, Loc);

	bool EnumConstantInBoolContext = false;
	for (const ExprResult &HS : {LHS, RHS}) {
	if (const auto *DREHS = dyn_cast<DeclRefExpr>(HS.get())) {
	const auto *ECDHS = dyn_cast<EnumConstantDecl>(DREHS->getDecl());
	if (ECDHS && ECDHS->getInitVal() != 0 && ECDHS->getInitVal() != 1)
	EnumConstantInBoolContext = true;
	}
	}

	if (EnumConstantInBoolContext)
	Diag(Loc, diag::warn_enum_constant_in_bool_context);

	// WebAssembly tables can't be used with logical operators.
	QualType LHSTy = LHS.get()->getType();
	QualType RHSTy = RHS.get()->getType();
	const auto *LHSATy = dyn_cast<ArrayType>(LHSTy);
	const auto *RHSATy = dyn_cast<ArrayType>(RHSTy);
	if ((LHSATy && LHSATy->getElementType().isWebAssemblyReferenceType()) \|\|
	(RHSATy && RHSATy->getElementType().isWebAssemblyReferenceType())) {
	return InvalidOperands(Loc, LHS, RHS);
	}

	// Diagnose cases where the user write a logical and/or but probably meant a
	// bitwise one. We do this when the LHS is a non-bool integer and the RHS
	// is a constant.
	if (!EnumConstantInBoolContext && LHS.get()->getType()->isIntegerType() &&
	!LHS.get()->getType()->isBooleanType() &&
	RHS.get()->getType()->isIntegerType() && !RHS.get()->isValueDependent() &&
	// Don't warn in macros or template instantiations.
	!Loc.isMacroID() && !inTemplateInstantiation()) {
	// If the RHS can be constant folded, and if it constant folds to something
	// that isn't 0 or 1 (which indicate a potential logical operation that
	// happened to fold to true/false) then warn.
	// Parens on the RHS are ignored.
	Expr::EvalResult EVResult;
	if (RHS.get()->EvaluateAsInt(EVResult, Context)) {
	llvm::APSInt Result = EVResult.Val.getInt();
	if ((getLangOpts().CPlusPlus && !RHS.get()->getType()->isBooleanType() &&
	!RHS.get()->getExprLoc().isMacroID()) \|\|
	(Result != 0 && Result != 1)) {
	Diag(Loc, diag::warn_logical_instead_of_bitwise)
	<< RHS.get()->getSourceRange() << (Opc == BO_LAnd ? "&&" : "\|\|");
	// Suggest replacing the logical operator with the bitwise version
	Diag(Loc, diag::note_logical_instead_of_bitwise_change_operator)
	<< (Opc == BO_LAnd ? "&" : "\|")
	<< FixItHint::CreateReplacement(
	SourceRange(Loc, getLocForEndOfToken(Loc)),
	Opc == BO_LAnd ? "&" : "\|");
	if (Opc == BO_LAnd)
	// Suggest replacing "Foo() && kNonZero" with "Foo()"
	Diag(Loc, diag::note_logical_instead_of_bitwise_remove_constant)
	<< FixItHint::CreateRemoval(
	SourceRange(getLocForEndOfToken(LHS.get()->getEndLoc()),
	RHS.get()->getEndLoc()));
	}
	}
	}

	if (!Context.getLangOpts().CPlusPlus) {
	// OpenCL v1.1 s6.3.g: The logical operators and (&&), or (\|\|) do
	// not operate on the built-in scalar and vector float types.
	if (Context.getLangOpts().OpenCL &&
	Context.getLangOpts().OpenCLVersion < 120) {
	if (LHS.get()->getType()->isFloatingType() \|\|
	RHS.get()->getType()->isFloatingType())
	return InvalidOperands(Loc, LHS, RHS);
	}

	LHS = UsualUnaryConversions(LHS.get());
	if (LHS.isInvalid())
	return QualType();

	RHS = UsualUnaryConversions(RHS.get());
	if (RHS.isInvalid())
	return QualType();

	if (!LHS.get()->getType()->isScalarType() \|\|
	!RHS.get()->getType()->isScalarType())
	return InvalidOperands(Loc, LHS, RHS);

	return Context.IntTy;
	}

	// The following is safe because we only use this method for
	// non-overloadable operands.

	// C++ [expr.log.and]p1
	// C++ [expr.log.or]p1
	// The operands are both contextually converted to type bool.
	ExprResult LHSRes = PerformContextuallyConvertToBool(LHS.get());
	if (LHSRes.isInvalid())
	return InvalidOperands(Loc, LHS, RHS);
	LHS = LHSRes;

	ExprResult RHSRes = PerformContextuallyConvertToBool(RHS.get());
	if (RHSRes.isInvalid())
	return InvalidOperands(Loc, LHS, RHS);
	RHS = RHSRes;

	// C++ [expr.log.and]p2
	// C++ [expr.log.or]p2
	// The result is a bool.
	return Context.BoolTy;
	}

	static bool IsReadonlyMessage(Expr *E, Sema &S) {
	const MemberExpr *ME = dyn_cast<MemberExpr>(E);
	if (!ME) return false;
	if (!isa<FieldDecl>(ME->getMemberDecl())) return false;
	ObjCMessageExpr *Base = dyn_cast<ObjCMessageExpr>(
	ME->getBase()->IgnoreImplicit()->IgnoreParenImpCasts());
	if (!Base) return false;
	return Base->getMethodDecl() != nullptr;
	}

	/// Is the given expression (which must be 'const') a reference to a
	/// variable which was originally non-const, but which has become
	/// 'const' due to being captured within a block?
	enum NonConstCaptureKind { NCCK_None, NCCK_Block, NCCK_Lambda };
	static NonConstCaptureKind isReferenceToNonConstCapture(Sema &S, Expr *E) {
	assert(E->isLValue() && E->getType().isConstQualified());
	E = E->IgnoreParens();

	// Must be a reference to a declaration from an enclosing scope.
	DeclRefExpr *DRE = dyn_cast<DeclRefExpr>(E);
	if (!DRE) return NCCK_None;
	if (!DRE->refersToEnclosingVariableOrCapture()) return NCCK_None;

	// The declaration must be a variable which is not declared 'const'.
	VarDecl *var = dyn_cast<VarDecl>(DRE->getDecl());
	if (!var) return NCCK_None;
	if (var->getType().isConstQualified()) return NCCK_None;
	assert(var->hasLocalStorage() && "capture added 'const' to non-local?");

	// Decide whether the first capture was for a block or a lambda.
	DeclContext DC = S.CurContext, Prev = nullptr;
	// Decide whether the first capture was for a block or a lambda.
	while (DC) {
	// For init-capture, it is possible that the variable belongs to the
	// template pattern of the current context.
	if (auto *FD = dyn_cast<FunctionDecl>(DC))
	if (var->isInitCapture() &&
	FD->getTemplateInstantiationPattern() == var->getDeclContext())
	break;
	if (DC == var->getDeclContext())
	break;
	Prev = DC;
	DC = DC->getParent();
	}
	// Unless we have an init-capture, we've gone one step too far.
	if (!var->isInitCapture())
	DC = Prev;
	return (isa<BlockDecl>(DC) ? NCCK_Block : NCCK_Lambda);
	}

	static bool IsTypeModifiable(QualType Ty, bool IsDereference) {
	Ty = Ty.getNonReferenceType();
	if (IsDereference && Ty->isPointerType())
	Ty = Ty->getPointeeType();
	return !Ty.isConstQualified();
	}

	// Update err_typecheck_assign_const and note_typecheck_assign_const
	// when this enum is changed.
	enum {
	ConstFunction,
	ConstVariable,
	ConstMember,
	ConstMethod,
	NestedConstMember,
	ConstUnknown, // Keep as last element
	};

	/// Emit the "read-only variable not assignable" error and print notes to give
	/// more information about why the variable is not assignable, such as pointing
	/// to the declaration of a const variable, showing that a method is const, or
	/// that the function is returning a const reference.
	static void DiagnoseConstAssignment(Sema &S, const Expr *E,
	SourceLocation Loc) {
	SourceRange ExprRange = E->getSourceRange();

	// Only emit one error on the first const found. All other consts will emit
	// a note to the error.
	bool DiagnosticEmitted = false;

	// Track if the current expression is the result of a dereference, and if the
	// next checked expression is the result of a dereference.
	bool IsDereference = false;
	bool NextIsDereference = false;

	// Loop to process MemberExpr chains.
	while (true) {
	IsDereference = NextIsDereference;

	E = E->IgnoreImplicit()->IgnoreParenImpCasts();
	if (const MemberExpr *ME = dyn_cast<MemberExpr>(E)) {
	NextIsDereference = ME->isArrow();
	const ValueDecl *VD = ME->getMemberDecl();
	if (const FieldDecl *Field = dyn_cast<FieldDecl>(VD)) {
	// Mutable fields can be modified even if the class is const.
	if (Field->isMutable()) {
	assert(DiagnosticEmitted && "Expected diagnostic not emitted.");
	break;
	}

	if (!IsTypeModifiable(Field->getType(), IsDereference)) {
	if (!DiagnosticEmitted) {
	S.Diag(Loc, diag::err_typecheck_assign_const)
	<< ExprRange << ConstMember << false /static/ << Field
	<< Field->getType();
	DiagnosticEmitted = true;
	}
	S.Diag(VD->getLocation(), diag::note_typecheck_assign_const)
	<< ConstMember << false /static/ << Field << Field->getType()
	<< Field->getSourceRange();
	}
	E = ME->getBase();
	continue;
	} else if (const VarDecl *VDecl = dyn_cast<VarDecl>(VD)) {
	if (VDecl->getType().isConstQualified()) {
	if (!DiagnosticEmitted) {
	S.Diag(Loc, diag::err_typecheck_assign_const)
	<< ExprRange << ConstMember << true /static/ << VDecl
	<< VDecl->getType();
	DiagnosticEmitted = true;
	}
	S.Diag(VD->getLocation(), diag::note_typecheck_assign_const)
	<< ConstMember << true /static/ << VDecl << VDecl->getType()
	<< VDecl->getSourceRange();
	}
	// Static fields do not inherit constness from parents.
	break;
	}
	break; // End MemberExpr
	} else if (const ArraySubscriptExpr *ASE =
	dyn_cast<ArraySubscriptExpr>(E)) {
	E = ASE->getBase()->IgnoreParenImpCasts();
	continue;
	} else if (const ExtVectorElementExpr *EVE =
	dyn_cast<ExtVectorElementExpr>(E)) {
	E = EVE->getBase()->IgnoreParenImpCasts();
	continue;
	}
	break;
	}

	if (const CallExpr *CE = dyn_cast<CallExpr>(E)) {
	// Function calls
	const FunctionDecl *FD = CE->getDirectCallee();
	if (FD && !IsTypeModifiable(FD->getReturnType(), IsDereference)) {
	if (!DiagnosticEmitted) {
	S.Diag(Loc, diag::err_typecheck_assign_const) << ExprRange
	<< ConstFunction << FD;
	DiagnosticEmitted = true;
	}
	S.Diag(FD->getReturnTypeSourceRange().getBegin(),
	diag::note_typecheck_assign_const)
	<< ConstFunction << FD << FD->getReturnType()
	<< FD->getReturnTypeSourceRange();
	}
	} else if (const DeclRefExpr *DRE = dyn_cast<DeclRefExpr>(E)) {
	// Point to variable declaration.
	if (const ValueDecl *VD = DRE->getDecl()) {
	if (!IsTypeModifiable(VD->getType(), IsDereference)) {
	if (!DiagnosticEmitted) {
	S.Diag(Loc, diag::err_typecheck_assign_const)
	<< ExprRange << ConstVariable << VD << VD->getType();
	DiagnosticEmitted = true;
	}
	S.Diag(VD->getLocation(), diag::note_typecheck_assign_const)
	<< ConstVariable << VD << VD->getType() << VD->getSourceRange();
	}
	}
	} else if (isa<CXXThisExpr>(E)) {
	if (const DeclContext *DC = S.getFunctionLevelDeclContext()) {
	if (const CXXMethodDecl *MD = dyn_cast<CXXMethodDecl>(DC)) {
	if (MD->isConst()) {
	if (!DiagnosticEmitted) {
	S.Diag(Loc, diag::err_typecheck_assign_const) << ExprRange
	<< ConstMethod << MD;
	DiagnosticEmitted = true;
	}
	S.Diag(MD->getLocation(), diag::note_typecheck_assign_const)
	<< ConstMethod << MD << MD->getSourceRange();
	}
	}
	}
	}

	if (DiagnosticEmitted)
	return;

	// Can't determine a more specific message, so display the generic error.
	S.Diag(Loc, diag::err_typecheck_assign_const) << ExprRange << ConstUnknown;
	}

	enum OriginalExprKind {
	OEK_Variable,
	OEK_Member,
	OEK_LValue
	};

	static void DiagnoseRecursiveConstFields(Sema &S, const ValueDecl *VD,
	const RecordType *Ty,
	SourceLocation Loc, SourceRange Range,
	OriginalExprKind OEK,
	bool &DiagnosticEmitted) {
	std::vector<const RecordType *> RecordTypeList;
	RecordTypeList.push_back(Ty);
	unsigned NextToCheckIndex = 0;
	// We walk the record hierarchy breadth-first to ensure that we print
	// diagnostics in field nesting order.
	while (RecordTypeList.size() > NextToCheckIndex) {
	bool IsNested = NextToCheckIndex > 0;
	for (const FieldDecl *Field :
	RecordTypeList[NextToCheckIndex]->getDecl()->fields()) {
	// First, check every field for constness.
	QualType FieldTy = Field->getType();
	if (FieldTy.isConstQualified()) {
	if (!DiagnosticEmitted) {
	S.Diag(Loc, diag::err_typecheck_assign_const)
	<< Range << NestedConstMember << OEK << VD
	<< IsNested << Field;
	DiagnosticEmitted = true;
	}
	S.Diag(Field->getLocation(), diag::note_typecheck_assign_const)
	<< NestedConstMember << IsNested << Field
	<< FieldTy << Field->getSourceRange();
	}

	// Then we append it to the list to check next in order.
	FieldTy = FieldTy.getCanonicalType();
	if (const auto *FieldRecTy = FieldTy->getAs<RecordType>()) {
	if (!llvm::is_contained(RecordTypeList, FieldRecTy))
	RecordTypeList.push_back(FieldRecTy);
	}
	}
	++NextToCheckIndex;
	}
	}

	/// Emit an error for the case where a record we are trying to assign to has a
	/// const-qualified field somewhere in its hierarchy.
	static void DiagnoseRecursiveConstFields(Sema &S, const Expr *E,
	SourceLocation Loc) {
	QualType Ty = E->getType();
	assert(Ty->isRecordType() && "lvalue was not record?");
	SourceRange Range = E->getSourceRange();
	const RecordType *RTy = Ty.getCanonicalType()->getAs<RecordType>();
	bool DiagEmitted = false;

	if (const MemberExpr *ME = dyn_cast<MemberExpr>(E))
	DiagnoseRecursiveConstFields(S, ME->getMemberDecl(), RTy, Loc,
	Range, OEK_Member, DiagEmitted);
	else if (const DeclRefExpr *DRE = dyn_cast<DeclRefExpr>(E))
	DiagnoseRecursiveConstFields(S, DRE->getDecl(), RTy, Loc,
	Range, OEK_Variable, DiagEmitted);
	else
	DiagnoseRecursiveConstFields(S, nullptr, RTy, Loc,
	Range, OEK_LValue, DiagEmitted);
	if (!DiagEmitted)
	DiagnoseConstAssignment(S, E, Loc);
	}

	/// CheckForModifiableLvalue - Verify that E is a modifiable lvalue. If not,
	/// emit an error and return true. If so, return false.
	static bool CheckForModifiableLvalue(Expr *E, SourceLocation Loc, Sema &S) {
	assert(!E->hasPlaceholderType(BuiltinType::PseudoObject));

	S.CheckShadowingDeclModification(E, Loc);

	SourceLocation OrigLoc = Loc;
	Expr::isModifiableLvalueResult IsLV = E->isModifiableLvalue(S.Context,
	&Loc);
	if (IsLV == Expr::MLV_ClassTemporary && IsReadonlyMessage(E, S))
	IsLV = Expr::MLV_InvalidMessageExpression;
	if (IsLV == Expr::MLV_Valid)
	return false;

	unsigned DiagID = 0;
	bool NeedType = false;
	switch (IsLV) { // C99 6.5.16p2
	case Expr::MLV_ConstQualified:
	// Use a specialized diagnostic when we're assigning to an object
	// from an enclosing function or block.
	if (NonConstCaptureKind NCCK = isReferenceToNonConstCapture(S, E)) {
	if (NCCK == NCCK_Block)
	DiagID = diag::err_block_decl_ref_not_modifiable_lvalue;
	else
	DiagID = diag::err_lambda_decl_ref_not_modifiable_lvalue;
	break;
	}

	// In ARC, use some specialized diagnostics for occasions where we
	// infer 'const'. These are always pseudo-strong variables.
	if (S.getLangOpts().ObjCAutoRefCount) {
	DeclRefExpr *declRef = dyn_cast<DeclRefExpr>(E->IgnoreParenCasts());
	if (declRef && isa<VarDecl>(declRef->getDecl())) {
	VarDecl *var = cast<VarDecl>(declRef->getDecl());

	// Use the normal diagnostic if it's pseudo-__strong but the
	// user actually wrote 'const'.
	if (var->isARCPseudoStrong() &&
	(!var->getTypeSourceInfo() \|\|
	!var->getTypeSourceInfo()->getType().isConstQualified())) {
	// There are three pseudo-strong cases:
	// - self
	ObjCMethodDecl *method = S.getCurMethodDecl();
	if (method && var == method->getSelfDecl()) {
	DiagID = method->isClassMethod()
	? diag::err_typecheck_arc_assign_self_class_method
	: diag::err_typecheck_arc_assign_self;

	// - Objective-C externally_retained attribute.
	} else if (var->hasAttr<ObjCExternallyRetainedAttr>() \|\|
	isa<ParmVarDecl>(var)) {
	DiagID = diag::err_typecheck_arc_assign_externally_retained;

	// - fast enumeration variables
	} else {
	DiagID = diag::err_typecheck_arr_assign_enumeration;
	}

	SourceRange Assign;
	if (Loc != OrigLoc)
	Assign = SourceRange(OrigLoc, OrigLoc);
	S.Diag(Loc, DiagID) << E->getSourceRange() << Assign;
	// We need to preserve the AST regardless, so migration tool
	// can do its job.
	return false;
	}
	}
	}

	// If none of the special cases above are triggered, then this is a
	// simple const assignment.
	if (DiagID == 0) {
	DiagnoseConstAssignment(S, E, Loc);
	return true;
	}

	break;
	case Expr::MLV_ConstAddrSpace:
	DiagnoseConstAssignment(S, E, Loc);
	return true;
	case Expr::MLV_ConstQualifiedField:
	DiagnoseRecursiveConstFields(S, E, Loc);
	return true;
	case Expr::MLV_ArrayType:
	case Expr::MLV_ArrayTemporary:
	DiagID = diag::err_typecheck_array_not_modifiable_lvalue;
	NeedType = true;
	break;
	case Expr::MLV_NotObjectType:
	DiagID = diag::err_typecheck_non_object_not_modifiable_lvalue;
	NeedType = true;
	break;
	case Expr::MLV_LValueCast:
	DiagID = diag::err_typecheck_lvalue_casts_not_supported;
	break;
	case Expr::MLV_Valid:
	llvm_unreachable("did not take early return for MLV_Valid");
	case Expr::MLV_InvalidExpression:
	case Expr::MLV_MemberFunction:
	case Expr::MLV_ClassTemporary:
	DiagID = diag::err_typecheck_expression_not_modifiable_lvalue;
	break;
	case Expr::MLV_IncompleteType:
	case Expr::MLV_IncompleteVoidType:
	return S.RequireCompleteType(Loc, E->getType(),
	diag::err_typecheck_incomplete_type_not_modifiable_lvalue, E);
	case Expr::MLV_DuplicateVectorComponents:
	DiagID = diag::err_typecheck_duplicate_vector_components_not_mlvalue;
	break;
	case Expr::MLV_NoSetterProperty:
	llvm_unreachable("readonly properties should be processed differently");
	case Expr::MLV_InvalidMessageExpression:
	DiagID = diag::err_readonly_message_assignment;
	break;
	case Expr::MLV_SubObjCPropertySetting:
	DiagID = diag::err_no_subobject_property_setting;
	break;
	}

	SourceRange Assign;
	if (Loc != OrigLoc)
	Assign = SourceRange(OrigLoc, OrigLoc);
	if (NeedType)
	S.Diag(Loc, DiagID) << E->getType() << E->getSourceRange() << Assign;
	else
	S.Diag(Loc, DiagID) << E->getSourceRange() << Assign;
	return true;
	}

	static void CheckIdentityFieldAssignment(Expr LHSExpr, Expr RHSExpr,
	SourceLocation Loc,
	Sema &Sema) {
	if (Sema.inTemplateInstantiation())
	return;
	if (Sema.isUnevaluatedContext())
	return;
	if (Loc.isInvalid() \|\| Loc.isMacroID())
	return;
	if (LHSExpr->getExprLoc().isMacroID() \|\| RHSExpr->getExprLoc().isMacroID())
	return;

	// C / C++ fields
	MemberExpr *ML = dyn_cast<MemberExpr>(LHSExpr);
	MemberExpr *MR = dyn_cast<MemberExpr>(RHSExpr);
	if (ML && MR) {
	if (!(isa<CXXThisExpr>(ML->getBase()) && isa<CXXThisExpr>(MR->getBase())))
	return;
	const ValueDecl *LHSDecl =
	cast<ValueDecl>(ML->getMemberDecl()->getCanonicalDecl());
	const ValueDecl *RHSDecl =
	cast<ValueDecl>(MR->getMemberDecl()->getCanonicalDecl());
	if (LHSDecl != RHSDecl)
	return;
	if (LHSDecl->getType().isVolatileQualified())
	return;
	if (const ReferenceType *RefTy = LHSDecl->getType()->getAs<ReferenceType>())
	if (RefTy->getPointeeType().isVolatileQualified())
	return;

	Sema.Diag(Loc, diag::warn_identity_field_assign) << 0;
	}

	// Objective-C instance variables
	ObjCIvarRefExpr *OL = dyn_cast<ObjCIvarRefExpr>(LHSExpr);
	ObjCIvarRefExpr *OR = dyn_cast<ObjCIvarRefExpr>(RHSExpr);
	if (OL && OR && OL->getDecl() == OR->getDecl()) {
	DeclRefExpr *RL = dyn_cast<DeclRefExpr>(OL->getBase()->IgnoreImpCasts());
	DeclRefExpr *RR = dyn_cast<DeclRefExpr>(OR->getBase()->IgnoreImpCasts());
	if (RL && RR && RL->getDecl() == RR->getDecl())
	Sema.Diag(Loc, diag::warn_identity_field_assign) << 1;
	}
	}

	// C99 6.5.16.1
	QualType Sema::CheckAssignmentOperands(Expr *LHSExpr, ExprResult &RHS,
	SourceLocation Loc,
	QualType CompoundType,
	BinaryOperatorKind Opc) {
	assert(!LHSExpr->hasPlaceholderType(BuiltinType::PseudoObject));

	// Verify that LHS is a modifiable lvalue, and emit error if not.
	if (CheckForModifiableLvalue(LHSExpr, Loc, *this))
	return QualType();

	QualType LHSType = LHSExpr->getType();
	QualType RHSType = CompoundType.isNull() ? RHS.get()->getType() :
	CompoundType;
	// OpenCL v1.2 s6.1.1.1 p2:
	// The half data type can only be used to declare a pointer to a buffer that
	// contains half values
	if (getLangOpts().OpenCL &&
	!getOpenCLOptions().isAvailableOption("cl_khr_fp16", getLangOpts()) &&
	LHSType->isHalfType()) {
	Diag(Loc, diag::err_opencl_half_load_store) << 1
	<< LHSType.getUnqualifiedType();
	return QualType();
	}

	// WebAssembly tables can't be used on RHS of an assignment expression.
	if (RHSType->isWebAssemblyTableType()) {
	Diag(Loc, diag::err_wasm_table_art) << 0;
	return QualType();
	}

	AssignConvertType ConvTy;
	if (CompoundType.isNull()) {
	Expr *RHSCheck = RHS.get();

	CheckIdentityFieldAssignment(LHSExpr, RHSCheck, Loc, *this);

	QualType LHSTy(LHSType);
	ConvTy = CheckSingleAssignmentConstraints(LHSTy, RHS);
	if (RHS.isInvalid())
	return QualType();
	// Special case of NSObject attributes on c-style pointer types.
	if (ConvTy == IncompatiblePointer &&
	((Context.isObjCNSObjectType(LHSType) &&
	RHSType->isObjCObjectPointerType()) \|\|
	(Context.isObjCNSObjectType(RHSType) &&
	LHSType->isObjCObjectPointerType())))
	ConvTy = Compatible;

	if (ConvTy == Compatible &&
	LHSType->isObjCObjectType())
	Diag(Loc, diag::err_objc_object_assignment)
	<< LHSType;

	// If the RHS is a unary plus or minus, check to see if they = and + are
	// right next to each other. If so, the user may have typo'd "x =+ 4"
	// instead of "x += 4".
	if (ImplicitCastExpr *ICE = dyn_cast<ImplicitCastExpr>(RHSCheck))
	RHSCheck = ICE->getSubExpr();
	if (UnaryOperator *UO = dyn_cast<UnaryOperator>(RHSCheck)) {
	if ((UO->getOpcode() == UO_Plus \|\| UO->getOpcode() == UO_Minus) &&
	Loc.isFileID() && UO->getOperatorLoc().isFileID() &&
	// Only if the two operators are exactly adjacent.
	Loc.getLocWithOffset(1) == UO->getOperatorLoc() &&
	// And there is a space or other character before the subexpr of the
	// unary +/-. We don't want to warn on "x=-1".
	Loc.getLocWithOffset(2) != UO->getSubExpr()->getBeginLoc() &&
	UO->getSubExpr()->getBeginLoc().isFileID()) {
	Diag(Loc, diag::warn_not_compound_assign)
	<< (UO->getOpcode() == UO_Plus ? "+" : "-")
	<< SourceRange(UO->getOperatorLoc(), UO->getOperatorLoc());
	}
	}

	if (ConvTy == Compatible) {
	if (LHSType.getObjCLifetime() == Qualifiers::OCL_Strong) {
	// Warn about retain cycles where a block captures the LHS, but
	// not if the LHS is a simple variable into which the block is
	// being stored...unless that variable can be captured by reference!
	const Expr *InnerLHS = LHSExpr->IgnoreParenCasts();
	const DeclRefExpr *DRE = dyn_cast<DeclRefExpr>(InnerLHS);
	if (!DRE \|\| DRE->getDecl()->hasAttr<BlocksAttr>())
	ObjC().checkRetainCycles(LHSExpr, RHS.get());
	}

	if (LHSType.getObjCLifetime() == Qualifiers::OCL_Strong \|\|
	LHSType.isNonWeakInMRRWithObjCWeak(Context)) {
	// It is safe to assign a weak reference into a strong variable.
	// Although this code can still have problems:
	// id x = self.weakProp;
	// id y = self.weakProp;
	// we do not warn to warn spuriously when 'x' and 'y' are on separate
	// paths through the function. This should be revisited if
	// -Wrepeated-use-of-weak is made flow-sensitive.
	// For ObjCWeak only, we do not warn if the assign is to a non-weak
	// variable, which will be valid for the current autorelease scope.
	if (!Diags.isIgnored(diag::warn_arc_repeated_use_of_weak,
	RHS.get()->getBeginLoc()))
	getCurFunction()->markSafeWeakUse(RHS.get());

	} else if (getLangOpts().ObjCAutoRefCount \|\| getLangOpts().ObjCWeak) {
	checkUnsafeExprAssigns(Loc, LHSExpr, RHS.get());
	}
	}
	} else {
	// Compound assignment "x += y"
	ConvTy = CheckAssignmentConstraints(Loc, LHSType, RHSType);
	}

	if (DiagnoseAssignmentResult(ConvTy, Loc, LHSType, RHSType,
	RHS.get(), AA_Assigning))
	return QualType();

	CheckForNullPointerDereference(*this, LHSExpr);

	AssignedEntity AE{LHSExpr};
	checkExprLifetime(*this, AE, RHS.get());

	if (getLangOpts().CPlusPlus20 && LHSType.isVolatileQualified()) {
	if (CompoundType.isNull()) {
	// C++2a [expr.ass]p5:
	// A simple-assignment whose left operand is of a volatile-qualified
	// type is deprecated unless the assignment is either a discarded-value
	// expression or an unevaluated operand
	ExprEvalContexts.back().VolatileAssignmentLHSs.push_back(LHSExpr);
	}
	}

	// C11 6.5.16p3: The type of an assignment expression is the type of the
	// left operand would have after lvalue conversion.
	// C11 6.3.2.1p2: ...this is called lvalue conversion. If the lvalue has
	// qualified type, the value has the unqualified version of the type of the
	// lvalue; additionally, if the lvalue has atomic type, the value has the
	// non-atomic version of the type of the lvalue.
	// C++ 5.17p1: the type of the assignment expression is that of its left
	// operand.
	return getLangOpts().CPlusPlus ? LHSType : LHSType.getAtomicUnqualifiedType();
	}

	// Scenarios to ignore if expression E is:
	// 1. an explicit cast expression into void
	// 2. a function call expression that returns void
	static bool IgnoreCommaOperand(const Expr *E, const ASTContext &Context) {
	E = E->IgnoreParens();

	if (const CastExpr *CE = dyn_cast<CastExpr>(E)) {
	if (CE->getCastKind() == CK_ToVoid) {
	return true;
	}

	// static_cast<void> on a dependent type will not show up as CK_ToVoid.
	if (CE->getCastKind() == CK_Dependent && E->getType()->isVoidType() &&
	CE->getSubExpr()->getType()->isDependentType()) {
	return true;
	}
	}

	if (const auto *CE = dyn_cast<CallExpr>(E))
	return CE->getCallReturnType(Context)->isVoidType();
	return false;
	}

	void Sema::DiagnoseCommaOperator(const Expr *LHS, SourceLocation Loc) {
	// No warnings in macros
	if (Loc.isMacroID())
	return;

	// Don't warn in template instantiations.
	if (inTemplateInstantiation())
	return;

	// Scope isn't fine-grained enough to explicitly list the specific cases, so
	// instead, skip more than needed, then call back into here with the
	// CommaVisitor in SemaStmt.cpp.
	// The listed locations are the initialization and increment portions
	// of a for loop. The additional checks are on the condition of
	// if statements, do/while loops, and for loops.
	// Differences in scope flags for C89 mode requires the extra logic.
	const unsigned ForIncrementFlags =
	getLangOpts().C99 \|\| getLangOpts().CPlusPlus
	? Scope::ControlScope \| Scope::ContinueScope \| Scope::BreakScope
	: Scope::ContinueScope \| Scope::BreakScope;
	const unsigned ForInitFlags = Scope::ControlScope \| Scope::DeclScope;
	const unsigned ScopeFlags = getCurScope()->getFlags();
	if ((ScopeFlags & ForIncrementFlags) == ForIncrementFlags \|\|
	(ScopeFlags & ForInitFlags) == ForInitFlags)
	return;

	// If there are multiple comma operators used together, get the RHS of the
	// of the comma operator as the LHS.
	while (const BinaryOperator *BO = dyn_cast<BinaryOperator>(LHS)) {
	if (BO->getOpcode() != BO_Comma)
	break;
	LHS = BO->getRHS();
	}

	// Only allow some expressions on LHS to not warn.
	if (IgnoreCommaOperand(LHS, Context))
	return;

	Diag(Loc, diag::warn_comma_operator);
	Diag(LHS->getBeginLoc(), diag::note_cast_to_void)
	<< LHS->getSourceRange()
	<< FixItHint::CreateInsertion(LHS->getBeginLoc(),
	LangOpts.CPlusPlus ? "static_cast<void>("
	: "(void)(")
	<< FixItHint::CreateInsertion(PP.getLocForEndOfToken(LHS->getEndLoc()),
	")");
	}

	// C99 6.5.17
	static QualType CheckCommaOperands(Sema &S, ExprResult &LHS, ExprResult &RHS,
	SourceLocation Loc) {
	LHS = S.CheckPlaceholderExpr(LHS.get());
	RHS = S.CheckPlaceholderExpr(RHS.get());
	if (LHS.isInvalid() \|\| RHS.isInvalid())
	return QualType();

	// C's comma performs lvalue conversion (C99 6.3.2.1) on both its
	// operands, but not unary promotions.
	// C++'s comma does not do any conversions at all (C++ [expr.comma]p1).

	// So we treat the LHS as a ignored value, and in C++ we allow the
	// containing site to determine what should be done with the RHS.
	LHS = S.IgnoredValueConversions(LHS.get());
	if (LHS.isInvalid())
	return QualType();

	S.DiagnoseUnusedExprResult(LHS.get(), diag::warn_unused_comma_left_operand);

	if (!S.getLangOpts().CPlusPlus) {
	RHS = S.DefaultFunctionArrayLvalueConversion(RHS.get());
	if (RHS.isInvalid())
	return QualType();
	if (!RHS.get()->getType()->isVoidType())
	S.RequireCompleteType(Loc, RHS.get()->getType(),
	diag::err_incomplete_type);
	}

	if (!S.getDiagnostics().isIgnored(diag::warn_comma_operator, Loc))
	S.DiagnoseCommaOperator(LHS.get(), Loc);

	return RHS.get()->getType();
	}

	/// CheckIncrementDecrementOperand - unlike most "Check" methods, this routine
	/// doesn't need to call UsualUnaryConversions or UsualArithmeticConversions.
	static QualType CheckIncrementDecrementOperand(Sema &S, Expr *Op,
	ExprValueKind &VK,
	ExprObjectKind &OK,
	SourceLocation OpLoc, bool IsInc,
	bool IsPrefix) {
	QualType ResType = Op->getType();
	// Atomic types can be used for increment / decrement where the non-atomic
	// versions can, so ignore the _Atomic() specifier for the purpose of
	// checking.
	if (const AtomicType *ResAtomicType = ResType->getAs<AtomicType>())
	ResType = ResAtomicType->getValueType();

	assert(!ResType.isNull() && "no type for increment/decrement expression");

	if (S.getLangOpts().CPlusPlus && ResType->isBooleanType()) {
	// Decrement of bool is not allowed.
	if (!IsInc) {
	S.Diag(OpLoc, diag::err_decrement_bool) << Op->getSourceRange();
	return QualType();
	}
	// Increment of bool sets it to true, but is deprecated.
	S.Diag(OpLoc, S.getLangOpts().CPlusPlus17 ? diag::ext_increment_bool
	: diag::warn_increment_bool)
	<< Op->getSourceRange();
	} else if (S.getLangOpts().CPlusPlus && ResType->isEnumeralType()) {
	// Error on enum increments and decrements in C++ mode
	S.Diag(OpLoc, diag::err_increment_decrement_enum) << IsInc << ResType;
	return QualType();
	} else if (ResType->isRealType()) {
	// OK!
	} else if (ResType->isPointerType()) {
	// C99 6.5.2.4p2, 6.5.6p2
	if (!checkArithmeticOpPointerOperand(S, OpLoc, Op))
	return QualType();
	} else if (ResType->isObjCObjectPointerType()) {
	// On modern runtimes, ObjC pointer arithmetic is forbidden.
	// Otherwise, we just need a complete type.
	if (checkArithmeticIncompletePointerType(S, OpLoc, Op) \|\|
	checkArithmeticOnObjCPointer(S, OpLoc, Op))
	return QualType();
	} else if (ResType->isAnyComplexType()) {
	// C99 does not support ++/-- on complex types, we allow as an extension.
	S.Diag(OpLoc, S.getLangOpts().C2y ? diag::warn_c2y_compat_increment_complex
	: diag::ext_c2y_increment_complex)
	<< IsInc << Op->getSourceRange();
	} else if (ResType->isPlaceholderType()) {
	ExprResult PR = S.CheckPlaceholderExpr(Op);
	if (PR.isInvalid()) return QualType();
	return CheckIncrementDecrementOperand(S, PR.get(), VK, OK, OpLoc,
	IsInc, IsPrefix);
	} else if (S.getLangOpts().AltiVec && ResType->isVectorType()) {
	// OK! ( C/C++ Language Extensions for CBEA(Version 2.6) 10.3 )
	} else if (S.getLangOpts().ZVector && ResType->isVectorType() &&
	(ResType->castAs<VectorType>()->getVectorKind() !=
	VectorKind::AltiVecBool)) {
	// The z vector extensions allow ++ and -- for non-bool vectors.
	} else if (S.getLangOpts().OpenCL && ResType->isVectorType() &&
	ResType->castAs<VectorType>()->getElementType()->isIntegerType()) {
	// OpenCL V1.2 6.3 says dec/inc ops operate on integer vector types.
	} else {
	S.Diag(OpLoc, diag::err_typecheck_illegal_increment_decrement)
	<< ResType << int(IsInc) << Op->getSourceRange();
	return QualType();
	}
	// At this point, we know we have a real, complex or pointer type.
	// Now make sure the operand is a modifiable lvalue.
	if (CheckForModifiableLvalue(Op, OpLoc, S))
	return QualType();
	if (S.getLangOpts().CPlusPlus20 && ResType.isVolatileQualified()) {
	// C++2a [expr.pre.inc]p1, [expr.post.inc]p1:
	// An operand with volatile-qualified type is deprecated
	S.Diag(OpLoc, diag::warn_deprecated_increment_decrement_volatile)
	<< IsInc << ResType;
	}
	// In C++, a prefix increment is the same type as the operand. Otherwise
	// (in C or with postfix), the increment is the unqualified type of the
	// operand.
	if (IsPrefix && S.getLangOpts().CPlusPlus) {
	VK = VK_LValue;
	OK = Op->getObjectKind();
	return ResType;
	} else {
	VK = VK_PRValue;
	return ResType.getUnqualifiedType();
	}
	}

	/// getPrimaryDecl - Helper function for CheckAddressOfOperand().
	/// This routine allows us to typecheck complex/recursive expressions
	/// where the declaration is needed for type checking. We only need to
	/// handle cases when the expression references a function designator
	/// or is an lvalue. Here are some examples:
	/// - &(x) => x
	/// - &*****f => f for f a function designator.
	/// - &s.xx => s
	/// - &s.zz[1].yy -> s, if zz is an array
	/// - *(x + 1) -> x, if x is an array
	/// - &"123"[2] -> 0
	/// - & __real__ x -> x
	///
	/// FIXME: We don't recurse to the RHS of a comma, nor handle pointers to
	/// members.
	static ValueDecl getPrimaryDecl(Expr E) {
	switch (E->getStmtClass()) {
	case Stmt::DeclRefExprClass:
	return cast<DeclRefExpr>(E)->getDecl();
	case Stmt::MemberExprClass:
	// If this is an arrow operator, the address is an offset from
	// the base's value, so the object the base refers to is
	// irrelevant.
	if (cast<MemberExpr>(E)->isArrow())
	return nullptr;
	// Otherwise, the expression refers to a part of the base
	return getPrimaryDecl(cast<MemberExpr>(E)->getBase());
	case Stmt::ArraySubscriptExprClass: {
	// FIXME: This code shouldn't be necessary! We should catch the implicit
	// promotion of register arrays earlier.
	Expr* Base = cast<ArraySubscriptExpr>(E)->getBase();
	if (ImplicitCastExpr* ICE = dyn_cast<ImplicitCastExpr>(Base)) {
	if (ICE->getSubExpr()->getType()->isArrayType())
	return getPrimaryDecl(ICE->getSubExpr());
	}
	return nullptr;
	}
	case Stmt::UnaryOperatorClass: {
	UnaryOperator *UO = cast<UnaryOperator>(E);

	switch(UO->getOpcode()) {
	case UO_Real:
	case UO_Imag:
	case UO_Extension:
	return getPrimaryDecl(UO->getSubExpr());
	default:
	return nullptr;
	}
	}
	case Stmt::ParenExprClass:
	return getPrimaryDecl(cast<ParenExpr>(E)->getSubExpr());
	case Stmt::ImplicitCastExprClass:
	// If the result of an implicit cast is an l-value, we care about
	// the sub-expression; otherwise, the result here doesn't matter.
	return getPrimaryDecl(cast<ImplicitCastExpr>(E)->getSubExpr());
	case Stmt::CXXUuidofExprClass:
	return cast<CXXUuidofExpr>(E)->getGuidDecl();
	default:
	return nullptr;
	}
	}

	namespace {
	enum {
	AO_Bit_Field = 0,
	AO_Vector_Element = 1,
	AO_Property_Expansion = 2,
	AO_Register_Variable = 3,
	AO_Matrix_Element = 4,
	AO_No_Error = 5
	};
	}
	/// Diagnose invalid operand for address of operations.
	///
	/// \param Type The type of operand which cannot have its address taken.
	static void diagnoseAddressOfInvalidType(Sema &S, SourceLocation Loc,
	Expr *E, unsigned Type) {
	S.Diag(Loc, diag::err_typecheck_address_of) << Type << E->getSourceRange();
	}

	bool Sema::CheckUseOfCXXMethodAsAddressOfOperand(SourceLocation OpLoc,
	const Expr *Op,
	const CXXMethodDecl *MD) {
	const auto *DRE = cast<DeclRefExpr>(Op->IgnoreParens());

	if (Op != DRE)
	return Diag(OpLoc, diag::err_parens_pointer_member_function)
	<< Op->getSourceRange();

	// Taking the address of a dtor is illegal per C++ [class.dtor]p2.
	if (isa<CXXDestructorDecl>(MD))
	return Diag(OpLoc, diag::err_typecheck_addrof_dtor)
	<< DRE->getSourceRange();

	if (DRE->getQualifier())
	return false;

	if (MD->getParent()->getName().empty())
	return Diag(OpLoc, diag::err_unqualified_pointer_member_function)
	<< DRE->getSourceRange();

	SmallString<32> Str;
	StringRef Qual = (MD->getParent()->getName() + "::").toStringRef(Str);
	return Diag(OpLoc, diag::err_unqualified_pointer_member_function)
	<< DRE->getSourceRange()
	<< FixItHint::CreateInsertion(DRE->getSourceRange().getBegin(), Qual);
	}

	QualType Sema::CheckAddressOfOperand(ExprResult &OrigOp, SourceLocation OpLoc) {
	if (const BuiltinType *PTy = OrigOp.get()->getType()->getAsPlaceholderType()){
	if (PTy->getKind() == BuiltinType::Overload) {
	Expr *E = OrigOp.get()->IgnoreParens();
	if (!isa<OverloadExpr>(E)) {
	assert(cast<UnaryOperator>(E)->getOpcode() == UO_AddrOf);
	Diag(OpLoc, diag::err_typecheck_invalid_lvalue_addrof_addrof_function)
	<< OrigOp.get()->getSourceRange();
	return QualType();
	}

	OverloadExpr *Ovl = cast<OverloadExpr>(E);
	if (isa<UnresolvedMemberExpr>(Ovl))
	if (!ResolveSingleFunctionTemplateSpecialization(Ovl)) {
	Diag(OpLoc, diag::err_invalid_form_pointer_member_function)
	<< OrigOp.get()->getSourceRange();
	return QualType();
	}

	return Context.OverloadTy;
	}

	if (PTy->getKind() == BuiltinType::UnknownAny)
	return Context.UnknownAnyTy;

	if (PTy->getKind() == BuiltinType::BoundMember) {
	Diag(OpLoc, diag::err_invalid_form_pointer_member_function)
	<< OrigOp.get()->getSourceRange();
	return QualType();
	}

	OrigOp = CheckPlaceholderExpr(OrigOp.get());
	if (OrigOp.isInvalid()) return QualType();
	}

	if (OrigOp.get()->isTypeDependent())
	return Context.DependentTy;

	assert(!OrigOp.get()->hasPlaceholderType());

	// Make sure to ignore parentheses in subsequent checks
	Expr *op = OrigOp.get()->IgnoreParens();

	// In OpenCL captures for blocks called as lambda functions
	// are located in the private address space. Blocks used in
	// enqueue_kernel can be located in a different address space
	// depending on a vendor implementation. Thus preventing
	// taking an address of the capture to avoid invalid AS casts.
	if (LangOpts.OpenCL) {
	auto* VarRef = dyn_cast<DeclRefExpr>(op);
	if (VarRef && VarRef->refersToEnclosingVariableOrCapture()) {
	Diag(op->getExprLoc(), diag::err_opencl_taking_address_capture);
	return QualType();
	}
	}

	if (getLangOpts().C99) {
	// Implement C99-only parts of addressof rules.
	if (UnaryOperator* uOp = dyn_cast<UnaryOperator>(op)) {
	if (uOp->getOpcode() == UO_Deref)
	// Per C99 6.5.3.2, the address of a deref always returns a valid result
	// (assuming the deref expression is valid).
	return uOp->getSubExpr()->getType();
	}
	// Technically, there should be a check for array subscript
	// expressions here, but the result of one is always an lvalue anyway.
	}
	ValueDecl *dcl = getPrimaryDecl(op);

	if (auto *FD = dyn_cast_or_null<FunctionDecl>(dcl))
	if (!checkAddressOfFunctionIsAvailable(FD, /Complain=/true,
	op->getBeginLoc()))
	return QualType();

	Expr::LValueClassification lval = op->ClassifyLValue(Context);
	unsigned AddressOfError = AO_No_Error;

	if (lval == Expr::LV_ClassTemporary \|\| lval == Expr::LV_ArrayTemporary) {
	bool sfinae = (bool)isSFINAEContext();
	Diag(OpLoc, isSFINAEContext() ? diag::err_typecheck_addrof_temporary
	: diag::ext_typecheck_addrof_temporary)
	<< op->getType() << op->getSourceRange();
	if (sfinae)
	return QualType();
	// Materialize the temporary as an lvalue so that we can take its address.
	OrigOp = op =
	CreateMaterializeTemporaryExpr(op->getType(), OrigOp.get(), true);
	} else if (isa<ObjCSelectorExpr>(op)) {
	return Context.getPointerType(op->getType());
	} else if (lval == Expr::LV_MemberFunction) {
	// If it's an instance method, make a member pointer.
	// The expression must have exactly the form &A::foo.

	// If the underlying expression isn't a decl ref, give up.
	if (!isa<DeclRefExpr>(op)) {
	Diag(OpLoc, diag::err_invalid_form_pointer_member_function)
	<< OrigOp.get()->getSourceRange();
	return QualType();
	}
	DeclRefExpr *DRE = cast<DeclRefExpr>(op);
	CXXMethodDecl *MD = cast<CXXMethodDecl>(DRE->getDecl());

	CheckUseOfCXXMethodAsAddressOfOperand(OpLoc, OrigOp.get(), MD);

	QualType MPTy = Context.getMemberPointerType(
	op->getType(), Context.getTypeDeclType(MD->getParent()).getTypePtr());

	if (getLangOpts().PointerAuthCalls && MD->isVirtual() &&
	!isUnevaluatedContext() && !MPTy->isDependentType()) {
	// When pointer authentication is enabled, argument and return types of
	// vitual member functions must be complete. This is because vitrual
	// member function pointers are implemented using virtual dispatch
	// thunks and the thunks cannot be emitted if the argument or return
	// types are incomplete.
	auto ReturnOrParamTypeIsIncomplete = [&](QualType T,
	SourceLocation DeclRefLoc,
	SourceLocation RetArgTypeLoc) {
	if (RequireCompleteType(DeclRefLoc, T, diag::err_incomplete_type)) {
	Diag(DeclRefLoc,
	diag::note_ptrauth_virtual_function_pointer_incomplete_arg_ret);
	Diag(RetArgTypeLoc,
	diag::note_ptrauth_virtual_function_incomplete_arg_ret_type)
	<< T;
	return true;
	}
	return false;
	};
	QualType RetTy = MD->getReturnType();
	bool IsIncomplete =
	!RetTy->isVoidType() &&
	ReturnOrParamTypeIsIncomplete(
	RetTy, OpLoc, MD->getReturnTypeSourceRange().getBegin());
	for (auto *PVD : MD->parameters())
	IsIncomplete \|= ReturnOrParamTypeIsIncomplete(PVD->getType(), OpLoc,
	PVD->getBeginLoc());
	if (IsIncomplete)
	return QualType();
	}

	// Under the MS ABI, lock down the inheritance model now.
	if (Context.getTargetInfo().getCXXABI().isMicrosoft())
	(void)isCompleteType(OpLoc, MPTy);
	return MPTy;
	} else if (lval != Expr::LV_Valid && lval != Expr::LV_IncompleteVoidType) {
	// C99 6.5.3.2p1
	// The operand must be either an l-value or a function designator
	if (!op->getType()->isFunctionType()) {
	// Use a special diagnostic for loads from property references.
	if (isa<PseudoObjectExpr>(op)) {
	AddressOfError = AO_Property_Expansion;
	} else {
	Diag(OpLoc, diag::err_typecheck_invalid_lvalue_addrof)
	<< op->getType() << op->getSourceRange();
	return QualType();
	}
	} else if (const auto *DRE = dyn_cast<DeclRefExpr>(op)) {
	if (const auto *MD = dyn_cast_or_null<CXXMethodDecl>(DRE->getDecl()))
	CheckUseOfCXXMethodAsAddressOfOperand(OpLoc, OrigOp.get(), MD);
	}

	} else if (op->getObjectKind() == OK_BitField) { // C99 6.5.3.2p1
	// The operand cannot be a bit-field
	AddressOfError = AO_Bit_Field;
	} else if (op->getObjectKind() == OK_VectorComponent) {
	// The operand cannot be an element of a vector
	AddressOfError = AO_Vector_Element;
	} else if (op->getObjectKind() == OK_MatrixComponent) {
	// The operand cannot be an element of a matrix.
	AddressOfError = AO_Matrix_Element;
	} else if (dcl) { // C99 6.5.3.2p1
	// We have an lvalue with a decl. Make sure the decl is not declared
	// with the register storage-class specifier.
	if (const VarDecl *vd = dyn_cast<VarDecl>(dcl)) {
	// in C++ it is not error to take address of a register
	// variable (c++03 7.1.1P3)
	if (vd->getStorageClass() == SC_Register &&
	!getLangOpts().CPlusPlus) {
	AddressOfError = AO_Register_Variable;
	}
	} else if (isa<MSPropertyDecl>(dcl)) {
	AddressOfError = AO_Property_Expansion;
	} else if (isa<FunctionTemplateDecl>(dcl)) {
	return Context.OverloadTy;
	} else if (isa<FieldDecl>(dcl) \|\| isa<IndirectFieldDecl>(dcl)) {
	// Okay: we can take the address of a field.
	// Could be a pointer to member, though, if there is an explicit
	// scope qualifier for the class.

	// [C++26] [expr.prim.id.general]
	// If an id-expression E denotes a non-static non-type member
	// of some class C [...] and if E is a qualified-id, E is
	// not the un-parenthesized operand of the unary & operator [...]
	// the id-expression is transformed into a class member access expression.
	if (isa<DeclRefExpr>(op) && cast<DeclRefExpr>(op)->getQualifier() &&
	!isa<ParenExpr>(OrigOp.get())) {
	DeclContext *Ctx = dcl->getDeclContext();
	if (Ctx && Ctx->isRecord()) {
	if (dcl->getType()->isReferenceType()) {
	Diag(OpLoc,
	diag::err_cannot_form_pointer_to_member_of_reference_type)
	<< dcl->getDeclName() << dcl->getType();
	return QualType();
	}

	while (cast<RecordDecl>(Ctx)->isAnonymousStructOrUnion())
	Ctx = Ctx->getParent();

	QualType MPTy = Context.getMemberPointerType(
	op->getType(),
	Context.getTypeDeclType(cast<RecordDecl>(Ctx)).getTypePtr());
	// Under the MS ABI, lock down the inheritance model now.
	if (Context.getTargetInfo().getCXXABI().isMicrosoft())
	(void)isCompleteType(OpLoc, MPTy);
	return MPTy;
	}
	}
	} else if (!isa<FunctionDecl, NonTypeTemplateParmDecl, BindingDecl,
	MSGuidDecl, UnnamedGlobalConstantDecl>(dcl))
	llvm_unreachable("Unknown/unexpected decl type");
	}

	if (AddressOfError != AO_No_Error) {
	diagnoseAddressOfInvalidType(*this, OpLoc, op, AddressOfError);
	return QualType();
	}

	if (lval == Expr::LV_IncompleteVoidType) {
	// Taking the address of a void variable is technically illegal, but we
	// allow it in cases which are otherwise valid.
	// Example: "extern void x; void* y = &x;".
	Diag(OpLoc, diag::ext_typecheck_addrof_void) << op->getSourceRange();
	}

	// If the operand has type "type", the result has type "pointer to type".
	if (op->getType()->isObjCObjectType())
	return Context.getObjCObjectPointerType(op->getType());

	// Cannot take the address of WebAssembly references or tables.
	if (Context.getTargetInfo().getTriple().isWasm()) {
	QualType OpTy = op->getType();
	if (OpTy.isWebAssemblyReferenceType()) {
	Diag(OpLoc, diag::err_wasm_ca_reference)
	<< 1 << OrigOp.get()->getSourceRange();
	return QualType();
	}
	if (OpTy->isWebAssemblyTableType()) {
	Diag(OpLoc, diag::err_wasm_table_pr)
	<< 1 << OrigOp.get()->getSourceRange();
	return QualType();
	}
	}

	CheckAddressOfPackedMember(op);

	return Context.getPointerType(op->getType());
	}

	static void RecordModifiableNonNullParam(Sema &S, const Expr *Exp) {
	const DeclRefExpr *DRE = dyn_cast<DeclRefExpr>(Exp);
	if (!DRE)
	return;
	const Decl *D = DRE->getDecl();
	if (!D)
	return;
	const ParmVarDecl *Param = dyn_cast<ParmVarDecl>(D);
	if (!Param)
	return;
	if (const FunctionDecl* FD = dyn_cast<FunctionDecl>(Param->getDeclContext()))
	if (!FD->hasAttr<NonNullAttr>() && !Param->hasAttr<NonNullAttr>())
	return;
	if (FunctionScopeInfo *FD = S.getCurFunction())
	FD->ModifiedNonNullParams.insert(Param);
	}

	/// CheckIndirectionOperand - Type check unary indirection (prefix '*').
	static QualType CheckIndirectionOperand(Sema &S, Expr *Op, ExprValueKind &VK,
	SourceLocation OpLoc,
	bool IsAfterAmp = false) {
	ExprResult ConvResult = S.UsualUnaryConversions(Op);
	if (ConvResult.isInvalid())
	return QualType();
	Op = ConvResult.get();
	QualType OpTy = Op->getType();
	QualType Result;

	if (isa<CXXReinterpretCastExpr>(Op)) {
	QualType OpOrigType = Op->IgnoreParenCasts()->getType();
	S.CheckCompatibleReinterpretCast(OpOrigType, OpTy, /IsDereference/true,
	Op->getSourceRange());
	}

	if (const PointerType *PT = OpTy->getAs<PointerType>())
	{
	Result = PT->getPointeeType();
	}
	else if (const ObjCObjectPointerType *OPT =
	OpTy->getAs<ObjCObjectPointerType>())
	Result = OPT->getPointeeType();
	else {
	ExprResult PR = S.CheckPlaceholderExpr(Op);
	if (PR.isInvalid()) return QualType();
	if (PR.get() != Op)
	return CheckIndirectionOperand(S, PR.get(), VK, OpLoc);
	}

	if (Result.isNull()) {
	S.Diag(OpLoc, diag::err_typecheck_indirection_requires_pointer)
	<< OpTy << Op->getSourceRange();
	return QualType();
	}

	if (Result->isVoidType()) {
	// C++ [expr.unary.op]p1:
	// [...] the expression to which [the unary * operator] is applied shall
	// be a pointer to an object type, or a pointer to a function type
	LangOptions LO = S.getLangOpts();
	if (LO.CPlusPlus)
	S.Diag(OpLoc, diag::err_typecheck_indirection_through_void_pointer_cpp)
	<< OpTy << Op->getSourceRange();
	else if (!(LO.C99 && IsAfterAmp) && !S.isUnevaluatedContext())
	S.Diag(OpLoc, diag::ext_typecheck_indirection_through_void_pointer)
	<< OpTy << Op->getSourceRange();
	}

	// Dereferences are usually l-values...
	VK = VK_LValue;

	// ...except that certain expressions are never l-values in C.
	if (!S.getLangOpts().CPlusPlus && Result.isCForbiddenLValueType())
	VK = VK_PRValue;

	return Result;
	}

	BinaryOperatorKind Sema::ConvertTokenKindToBinaryOpcode(tok::TokenKind Kind) {
	BinaryOperatorKind Opc;
	switch (Kind) {
	default: llvm_unreachable("Unknown binop!");
	case tok::periodstar: Opc = BO_PtrMemD; break;
	case tok::arrowstar: Opc = BO_PtrMemI; break;
	case tok::star: Opc = BO_Mul; break;
	case tok::slash: Opc = BO_Div; break;
	case tok::percent: Opc = BO_Rem; break;
	case tok::plus: Opc = BO_Add; break;
	case tok::minus: Opc = BO_Sub; break;
	case tok::lessless: Opc = BO_Shl; break;
	case tok::greatergreater: Opc = BO_Shr; break;
	case tok::lessequal: Opc = BO_LE; break;
	case tok::less: Opc = BO_LT; break;
	case tok::greaterequal: Opc = BO_GE; break;
	case tok::greater: Opc = BO_GT; break;
	case tok::exclaimequal: Opc = BO_NE; break;
	case tok::equalequal: Opc = BO_EQ; break;
	case tok::spaceship: Opc = BO_Cmp; break;
	case tok::amp: Opc = BO_And; break;
	case tok::caret: Opc = BO_Xor; break;
	case tok::pipe: Opc = BO_Or; break;
	case tok::ampamp: Opc = BO_LAnd; break;
	case tok::pipepipe: Opc = BO_LOr; break;
	case tok::equal: Opc = BO_Assign; break;
	case tok::starequal: Opc = BO_MulAssign; break;
	case tok::slashequal: Opc = BO_DivAssign; break;
	case tok::percentequal: Opc = BO_RemAssign; break;
	case tok::plusequal: Opc = BO_AddAssign; break;
	case tok::minusequal: Opc = BO_SubAssign; break;
	case tok::lesslessequal: Opc = BO_ShlAssign; break;
	case tok::greatergreaterequal: Opc = BO_ShrAssign; break;
	case tok::ampequal: Opc = BO_AndAssign; break;
	case tok::caretequal: Opc = BO_XorAssign; break;
	case tok::pipeequal: Opc = BO_OrAssign; break;
	case tok::comma: Opc = BO_Comma; break;
	}
	return Opc;
	}

	static inline UnaryOperatorKind ConvertTokenKindToUnaryOpcode(
	tok::TokenKind Kind) {
	UnaryOperatorKind Opc;
	switch (Kind) {
	default: llvm_unreachable("Unknown unary op!");
	case tok::plusplus: Opc = UO_PreInc; break;
	case tok::minusminus: Opc = UO_PreDec; break;
	case tok::amp: Opc = UO_AddrOf; break;
	case tok::star: Opc = UO_Deref; break;
	case tok::plus: Opc = UO_Plus; break;
	case tok::minus: Opc = UO_Minus; break;
	case tok::tilde: Opc = UO_Not; break;
	case tok::exclaim: Opc = UO_LNot; break;
	case tok::kw___real: Opc = UO_Real; break;
	case tok::kw___imag: Opc = UO_Imag; break;
	case tok::kw___extension__: Opc = UO_Extension; break;
	}
	return Opc;
	}

	const FieldDecl *
	Sema::getSelfAssignmentClassMemberCandidate(const ValueDecl *SelfAssigned) {
	// Explore the case for adding 'this->' to the LHS of a self assignment, very
	// common for setters.
	// struct A {
	// int X;
	// -void setX(int X) { X = X; }
	// +void setX(int X) { this->X = X; }
	// };

	// Only consider parameters for self assignment fixes.
	if (!isa<ParmVarDecl>(SelfAssigned))
	return nullptr;
	const auto *Method =
	dyn_cast_or_null<CXXMethodDecl>(getCurFunctionDecl(true));
	if (!Method)
	return nullptr;

	const CXXRecordDecl *Parent = Method->getParent();
	// In theory this is fixable if the lambda explicitly captures this, but
	// that's added complexity that's rarely going to be used.
	if (Parent->isLambda())
	return nullptr;

	// FIXME: Use an actual Lookup operation instead of just traversing fields
	// in order to get base class fields.
	auto Field =
	llvm::find_if(Parent->fields(),
	[Name(SelfAssigned->getDeclName())](const FieldDecl *F) {
	return F->getDeclName() == Name;
	});
	return (Field != Parent->field_end()) ? *Field : nullptr;
	}

	/// DiagnoseSelfAssignment - Emits a warning if a value is assigned to itself.
	/// This warning suppressed in the event of macro expansions.
	static void DiagnoseSelfAssignment(Sema &S, Expr LHSExpr, Expr RHSExpr,
	SourceLocation OpLoc, bool IsBuiltin) {
	if (S.inTemplateInstantiation())
	return;
	if (S.isUnevaluatedContext())
	return;
	if (OpLoc.isInvalid() \|\| OpLoc.isMacroID())
	return;
	LHSExpr = LHSExpr->IgnoreParenImpCasts();
	RHSExpr = RHSExpr->IgnoreParenImpCasts();
	const DeclRefExpr *LHSDeclRef = dyn_cast<DeclRefExpr>(LHSExpr);
	const DeclRefExpr *RHSDeclRef = dyn_cast<DeclRefExpr>(RHSExpr);
	if (!LHSDeclRef \|\| !RHSDeclRef \|\|
	LHSDeclRef->getLocation().isMacroID() \|\|
	RHSDeclRef->getLocation().isMacroID())
	return;
	const ValueDecl *LHSDecl =
	cast<ValueDecl>(LHSDeclRef->getDecl()->getCanonicalDecl());
	const ValueDecl *RHSDecl =
	cast<ValueDecl>(RHSDeclRef->getDecl()->getCanonicalDecl());
	if (LHSDecl != RHSDecl)
	return;
	if (LHSDecl->getType().isVolatileQualified())
	return;
	if (const ReferenceType *RefTy = LHSDecl->getType()->getAs<ReferenceType>())
	if (RefTy->getPointeeType().isVolatileQualified())
	return;

	auto Diag = S.Diag(OpLoc, IsBuiltin ? diag::warn_self_assignment_builtin
	: diag::warn_self_assignment_overloaded)
	<< LHSDeclRef->getType() << LHSExpr->getSourceRange()
	<< RHSExpr->getSourceRange();
	if (const FieldDecl *SelfAssignField =
	S.getSelfAssignmentClassMemberCandidate(RHSDecl))
	Diag << 1 << SelfAssignField
	<< FixItHint::CreateInsertion(LHSDeclRef->getBeginLoc(), "this->");
	else
	Diag << 0;
	}

	/// Check if a bitwise-& is performed on an Objective-C pointer. This
	/// is usually indicative of introspection within the Objective-C pointer.
	static void checkObjCPointerIntrospection(Sema &S, ExprResult &L, ExprResult &R,
	SourceLocation OpLoc) {
	if (!S.getLangOpts().ObjC)
	return;

	const Expr ObjCPointerExpr = nullptr, OtherExpr = nullptr;
	const Expr *LHS = L.get();
	const Expr *RHS = R.get();

	if (LHS->IgnoreParenCasts()->getType()->isObjCObjectPointerType()) {
	ObjCPointerExpr = LHS;
	OtherExpr = RHS;
	}
	else if (RHS->IgnoreParenCasts()->getType()->isObjCObjectPointerType()) {
	ObjCPointerExpr = RHS;
	OtherExpr = LHS;
	}

	// This warning is deliberately made very specific to reduce false
	// positives with logic that uses '&' for hashing. This logic mainly
	// looks for code trying to introspect into tagged pointers, which
	// code should generally never do.
	if (ObjCPointerExpr && isa<IntegerLiteral>(OtherExpr->IgnoreParenCasts())) {
	unsigned Diag = diag::warn_objc_pointer_masking;
	// Determine if we are introspecting the result of performSelectorXXX.
	const Expr *Ex = ObjCPointerExpr->IgnoreParenCasts();
	// Special case messages to -performSelector and friends, which
	// can return non-pointer values boxed in a pointer value.
	// Some clients may wish to silence warnings in this subcase.
	if (const ObjCMessageExpr *ME = dyn_cast<ObjCMessageExpr>(Ex)) {
	Selector S = ME->getSelector();
	StringRef SelArg0 = S.getNameForSlot(0);
	if (SelArg0.starts_with("performSelector"))
	Diag = diag::warn_objc_pointer_masking_performSelector;
	}

	S.Diag(OpLoc, Diag)
	<< ObjCPointerExpr->getSourceRange();
	}
	}

	static NamedDecl getDeclFromExpr(Expr E) {
	if (!E)
	return nullptr;
	if (auto *DRE = dyn_cast<DeclRefExpr>(E))
	return DRE->getDecl();
	if (auto *ME = dyn_cast<MemberExpr>(E))
	return ME->getMemberDecl();
	if (auto *IRE = dyn_cast<ObjCIvarRefExpr>(E))
	return IRE->getDecl();
	return nullptr;
	}

	// This helper function promotes a binary operator's operands (which are of a
	// half vector type) to a vector of floats and then truncates the result to
	// a vector of either half or short.
	static ExprResult convertHalfVecBinOp(Sema &S, ExprResult LHS, ExprResult RHS,
	BinaryOperatorKind Opc, QualType ResultTy,
	ExprValueKind VK, ExprObjectKind OK,
	bool IsCompAssign, SourceLocation OpLoc,
	FPOptionsOverride FPFeatures) {
	auto &Context = S.getASTContext();
	assert((isVector(ResultTy, Context.HalfTy) \|\|
	isVector(ResultTy, Context.ShortTy)) &&
	"Result must be a vector of half or short");
	assert(isVector(LHS.get()->getType(), Context.HalfTy) &&
	isVector(RHS.get()->getType(), Context.HalfTy) &&
	"both operands expected to be a half vector");

	RHS = convertVector(RHS.get(), Context.FloatTy, S);
	QualType BinOpResTy = RHS.get()->getType();

	// If Opc is a comparison, ResultType is a vector of shorts. In that case,
	// change BinOpResTy to a vector of ints.
	if (isVector(ResultTy, Context.ShortTy))
	BinOpResTy = S.GetSignedVectorType(BinOpResTy);

	if (IsCompAssign)
	return CompoundAssignOperator::Create(Context, LHS.get(), RHS.get(), Opc,
	ResultTy, VK, OK, OpLoc, FPFeatures,
	BinOpResTy, BinOpResTy);

	LHS = convertVector(LHS.get(), Context.FloatTy, S);
	auto *BO = BinaryOperator::Create(Context, LHS.get(), RHS.get(), Opc,
	BinOpResTy, VK, OK, OpLoc, FPFeatures);
	return convertVector(BO, ResultTy->castAs<VectorType>()->getElementType(), S);
	}

	static std::pair<ExprResult, ExprResult>
	CorrectDelayedTyposInBinOp(Sema &S, BinaryOperatorKind Opc, Expr *LHSExpr,
	Expr *RHSExpr) {
	ExprResult LHS = LHSExpr, RHS = RHSExpr;
	if (!S.Context.isDependenceAllowed()) {
	// C cannot handle TypoExpr nodes on either side of a binop because it
	// doesn't handle dependent types properly, so make sure any TypoExprs have
	// been dealt with before checking the operands.
	LHS = S.CorrectDelayedTyposInExpr(LHS);
	RHS = S.CorrectDelayedTyposInExpr(
	RHS, /InitDecl=/nullptr, /RecoverUncorrectedTypos=/false,
	[Opc, LHS](Expr *E) {
	if (Opc != BO_Assign)
	return ExprResult(E);
	// Avoid correcting the RHS to the same Expr as the LHS.
	Decl *D = getDeclFromExpr(E);
	return (D && D == getDeclFromExpr(LHS.get())) ? ExprError() : E;
	});
	}
	return std::make_pair(LHS, RHS);
	}

	/// Returns true if conversion between vectors of halfs and vectors of floats
	/// is needed.
	static bool needsConversionOfHalfVec(bool OpRequiresConversion, ASTContext &Ctx,
	Expr E0, Expr E1 = nullptr) {
	if (!OpRequiresConversion \|\| Ctx.getLangOpts().NativeHalfType \|\|
	Ctx.getTargetInfo().useFP16ConversionIntrinsics())
	return false;

	auto HasVectorOfHalfType = [&Ctx](Expr *E) {
	QualType Ty = E->IgnoreImplicit()->getType();

	// Don't promote half precision neon vectors like float16x4_t in arm_neon.h
	// to vectors of floats. Although the element type of the vectors is __fp16,
	// the vectors shouldn't be treated as storage-only types. See the
	// discussion here: https://reviews.llvm.org/rG825235c140e7
	if (const VectorType *VT = Ty->getAs<VectorType>()) {
	if (VT->getVectorKind() == VectorKind::Neon)
	return false;
	return VT->getElementType().getCanonicalType() == Ctx.HalfTy;
	}
	return false;
	};

	return HasVectorOfHalfType(E0) && (!E1 \|\| HasVectorOfHalfType(E1));
	}

	ExprResult Sema::CreateBuiltinBinOp(SourceLocation OpLoc,
	BinaryOperatorKind Opc,
	Expr LHSExpr, Expr RHSExpr) {
	if (getLangOpts().CPlusPlus11 && isa<InitListExpr>(RHSExpr)) {
	// The syntax only allows initializer lists on the RHS of assignment,
	// so we don't need to worry about accepting invalid code for
	// non-assignment operators.
	// C++11 5.17p9:
	// The meaning of x = {v} [...] is that of x = T(v) [...]. The meaning
	// of x = {} is x = T().
	InitializationKind Kind = InitializationKind::CreateDirectList(
	RHSExpr->getBeginLoc(), RHSExpr->getBeginLoc(), RHSExpr->getEndLoc());
	InitializedEntity Entity =
	InitializedEntity::InitializeTemporary(LHSExpr->getType());
	InitializationSequence InitSeq(*this, Entity, Kind, RHSExpr);
	ExprResult Init = InitSeq.Perform(*this, Entity, Kind, RHSExpr);
	if (Init.isInvalid())
	return Init;
	RHSExpr = Init.get();
	}

	ExprResult LHS = LHSExpr, RHS = RHSExpr;
	QualType ResultTy; // Result type of the binary operator.
	// The following two variables are used for compound assignment operators
	QualType CompLHSTy; // Type of LHS after promotions for computation
	QualType CompResultTy; // Type of computation result
	ExprValueKind VK = VK_PRValue;
	ExprObjectKind OK = OK_Ordinary;
	bool ConvertHalfVec = false;

	std::tie(LHS, RHS) = CorrectDelayedTyposInBinOp(*this, Opc, LHSExpr, RHSExpr);
	if (!LHS.isUsable() \|\| !RHS.isUsable())
	return ExprError();

	if (getLangOpts().OpenCL) {
	QualType LHSTy = LHSExpr->getType();
	QualType RHSTy = RHSExpr->getType();
	// OpenCLC v2.0 s6.13.11.1 allows atomic variables to be initialized by
	// the ATOMIC_VAR_INIT macro.
	if (LHSTy->isAtomicType() \|\| RHSTy->isAtomicType()) {
	SourceRange SR(LHSExpr->getBeginLoc(), RHSExpr->getEndLoc());
	if (BO_Assign == Opc)
	Diag(OpLoc, diag::err_opencl_atomic_init) << 0 << SR;
	else
	ResultTy = InvalidOperands(OpLoc, LHS, RHS);
	return ExprError();
	}

	// OpenCL special types - image, sampler, pipe, and blocks are to be used
	// only with a builtin functions and therefore should be disallowed here.
	if (LHSTy->isImageType() \|\| RHSTy->isImageType() \|\|
	LHSTy->isSamplerT() \|\| RHSTy->isSamplerT() \|\|
	LHSTy->isPipeType() \|\| RHSTy->isPipeType() \|\|
	LHSTy->isBlockPointerType() \|\| RHSTy->isBlockPointerType()) {
	ResultTy = InvalidOperands(OpLoc, LHS, RHS);
	return ExprError();
	}
	}

	checkTypeSupport(LHSExpr->getType(), OpLoc, /ValueDecl/ nullptr);
	checkTypeSupport(RHSExpr->getType(), OpLoc, /ValueDecl/ nullptr);

	switch (Opc) {
	case BO_Assign:
	ResultTy = CheckAssignmentOperands(LHS.get(), RHS, OpLoc, QualType(), Opc);
	if (getLangOpts().CPlusPlus &&
	LHS.get()->getObjectKind() != OK_ObjCProperty) {
	VK = LHS.get()->getValueKind();
	OK = LHS.get()->getObjectKind();
	}
	if (!ResultTy.isNull()) {
	DiagnoseSelfAssignment(*this, LHS.get(), RHS.get(), OpLoc, true);
	DiagnoseSelfMove(LHS.get(), RHS.get(), OpLoc);

	// Avoid copying a block to the heap if the block is assigned to a local
	// auto variable that is declared in the same scope as the block. This
	// optimization is unsafe if the local variable is declared in an outer
	// scope. For example:
	//
	// BlockTy b;
	// {
	// b = ^{...};
	// }
	// // It is unsafe to invoke the block here if it wasn't copied to the
	// // heap.
	// b();

	if (auto *BE = dyn_cast<BlockExpr>(RHS.get()->IgnoreParens()))
	if (auto *DRE = dyn_cast<DeclRefExpr>(LHS.get()->IgnoreParens()))
	if (auto *VD = dyn_cast<VarDecl>(DRE->getDecl()))
	if (VD->hasLocalStorage() && getCurScope()->isDeclScope(VD))
	BE->getBlockDecl()->setCanAvoidCopyToHeap();

	if (LHS.get()->getType().hasNonTrivialToPrimitiveCopyCUnion())
	checkNonTrivialCUnion(LHS.get()->getType(), LHS.get()->getExprLoc(),
	NTCUC_Assignment, NTCUK_Copy);
	}
	RecordModifiableNonNullParam(*this, LHS.get());
	break;
	case BO_PtrMemD:
	case BO_PtrMemI:
	ResultTy = CheckPointerToMemberOperands(LHS, RHS, VK, OpLoc,
	Opc == BO_PtrMemI);
	break;
	case BO_Mul:
	case BO_Div:
	ConvertHalfVec = true;
	ResultTy = CheckMultiplyDivideOperands(LHS, RHS, OpLoc, false,
	Opc == BO_Div);
	break;
	case BO_Rem:
	ResultTy = CheckRemainderOperands(LHS, RHS, OpLoc);
	break;
	case BO_Add:
	ConvertHalfVec = true;
	ResultTy = CheckAdditionOperands(LHS, RHS, OpLoc, Opc);
	break;
	case BO_Sub:
	ConvertHalfVec = true;
	ResultTy = CheckSubtractionOperands(LHS, RHS, OpLoc);
	break;
	case BO_Shl:
	case BO_Shr:
	ResultTy = CheckShiftOperands(LHS, RHS, OpLoc, Opc);
	break;
	case BO_LE:
	case BO_LT:
	case BO_GE:
	case BO_GT:
	ConvertHalfVec = true;
	ResultTy = CheckCompareOperands(LHS, RHS, OpLoc, Opc);

	if (const auto *BI = dyn_cast<BinaryOperator>(LHSExpr);
	BI && BI->isComparisonOp())
	Diag(OpLoc, diag::warn_consecutive_comparison);

	break;
	case BO_EQ:
	case BO_NE:
	ConvertHalfVec = true;
	ResultTy = CheckCompareOperands(LHS, RHS, OpLoc, Opc);
	break;
	case BO_Cmp:
	ConvertHalfVec = true;
	ResultTy = CheckCompareOperands(LHS, RHS, OpLoc, Opc);
	assert(ResultTy.isNull() \|\| ResultTy->getAsCXXRecordDecl());
	break;
	case BO_And:
	checkObjCPointerIntrospection(*this, LHS, RHS, OpLoc);
	[[fallthrough]];
	case BO_Xor:
	case BO_Or:
	ResultTy = CheckBitwiseOperands(LHS, RHS, OpLoc, Opc);
	break;
	case BO_LAnd:
	case BO_LOr:
	ConvertHalfVec = true;
	ResultTy = CheckLogicalOperands(LHS, RHS, OpLoc, Opc);
	break;
	case BO_MulAssign:
	case BO_DivAssign:
	ConvertHalfVec = true;
	CompResultTy = CheckMultiplyDivideOperands(LHS, RHS, OpLoc, true,
	Opc == BO_DivAssign);
	CompLHSTy = CompResultTy;
	if (!CompResultTy.isNull() && !LHS.isInvalid() && !RHS.isInvalid())
	ResultTy =
	CheckAssignmentOperands(LHS.get(), RHS, OpLoc, CompResultTy, Opc);
	break;
	case BO_RemAssign:
	CompResultTy = CheckRemainderOperands(LHS, RHS, OpLoc, true);
	CompLHSTy = CompResultTy;
	if (!CompResultTy.isNull() && !LHS.isInvalid() && !RHS.isInvalid())
	ResultTy =
	CheckAssignmentOperands(LHS.get(), RHS, OpLoc, CompResultTy, Opc);
	break;
	case BO_AddAssign:
	ConvertHalfVec = true;
	CompResultTy = CheckAdditionOperands(LHS, RHS, OpLoc, Opc, &CompLHSTy);
	if (!CompResultTy.isNull() && !LHS.isInvalid() && !RHS.isInvalid())
	ResultTy =
	CheckAssignmentOperands(LHS.get(), RHS, OpLoc, CompResultTy, Opc);
	break;
	case BO_SubAssign:
	ConvertHalfVec = true;
	CompResultTy = CheckSubtractionOperands(LHS, RHS, OpLoc, &CompLHSTy);
	if (!CompResultTy.isNull() && !LHS.isInvalid() && !RHS.isInvalid())
	ResultTy =
	CheckAssignmentOperands(LHS.get(), RHS, OpLoc, CompResultTy, Opc);
	break;
	case BO_ShlAssign:
	case BO_ShrAssign:
	CompResultTy = CheckShiftOperands(LHS, RHS, OpLoc, Opc, true);
	CompLHSTy = CompResultTy;
	if (!CompResultTy.isNull() && !LHS.isInvalid() && !RHS.isInvalid())
	ResultTy =
	CheckAssignmentOperands(LHS.get(), RHS, OpLoc, CompResultTy, Opc);
	break;
	case BO_AndAssign:
	case BO_OrAssign: // fallthrough
	DiagnoseSelfAssignment(*this, LHS.get(), RHS.get(), OpLoc, true);
	[[fallthrough]];
	case BO_XorAssign:
	CompResultTy = CheckBitwiseOperands(LHS, RHS, OpLoc, Opc);
	CompLHSTy = CompResultTy;
	if (!CompResultTy.isNull() && !LHS.isInvalid() && !RHS.isInvalid())
	ResultTy =
	CheckAssignmentOperands(LHS.get(), RHS, OpLoc, CompResultTy, Opc);
	break;
	case BO_Comma:
	ResultTy = CheckCommaOperands(*this, LHS, RHS, OpLoc);
	if (getLangOpts().CPlusPlus && !RHS.isInvalid()) {
	VK = RHS.get()->getValueKind();
	OK = RHS.get()->getObjectKind();
	}
	break;
	}
	if (ResultTy.isNull() \|\| LHS.isInvalid() \|\| RHS.isInvalid())
	return ExprError();

	// Some of the binary operations require promoting operands of half vector to
	// float vectors and truncating the result back to half vector. For now, we do
	// this only when HalfArgsAndReturn is set (that is, when the target is arm or
	// arm64).
	assert(
	(Opc == BO_Comma \|\| isVector(RHS.get()->getType(), Context.HalfTy) ==
	isVector(LHS.get()->getType(), Context.HalfTy)) &&
	"both sides are half vectors or neither sides are");
	ConvertHalfVec =
	needsConversionOfHalfVec(ConvertHalfVec, Context, LHS.get(), RHS.get());

	// Check for array bounds violations for both sides of the BinaryOperator
	CheckArrayAccess(LHS.get());
	CheckArrayAccess(RHS.get());

	if (const ObjCIsaExpr *OISA = dyn_cast<ObjCIsaExpr>(LHS.get()->IgnoreParenCasts())) {
	NamedDecl *ObjectSetClass = LookupSingleName(TUScope,
	&Context.Idents.get("object_setClass"),
	SourceLocation(), LookupOrdinaryName);
	if (ObjectSetClass && isa<ObjCIsaExpr>(LHS.get())) {
	SourceLocation RHSLocEnd = getLocForEndOfToken(RHS.get()->getEndLoc());
	Diag(LHS.get()->getExprLoc(), diag::warn_objc_isa_assign)
	<< FixItHint::CreateInsertion(LHS.get()->getBeginLoc(),
	"object_setClass(")
	<< FixItHint::CreateReplacement(SourceRange(OISA->getOpLoc(), OpLoc),
	",")
	<< FixItHint::CreateInsertion(RHSLocEnd, ")");
	}
	else
	Diag(LHS.get()->getExprLoc(), diag::warn_objc_isa_assign);
	}
	else if (const ObjCIvarRefExpr *OIRE =
	dyn_cast<ObjCIvarRefExpr>(LHS.get()->IgnoreParenCasts()))
	DiagnoseDirectIsaAccess(*this, OIRE, OpLoc, RHS.get());

	// Opc is not a compound assignment if CompResultTy is null.
	if (CompResultTy.isNull()) {
	if (ConvertHalfVec)
	return convertHalfVecBinOp(*this, LHS, RHS, Opc, ResultTy, VK, OK, false,
	OpLoc, CurFPFeatureOverrides());
	return BinaryOperator::Create(Context, LHS.get(), RHS.get(), Opc, ResultTy,
	VK, OK, OpLoc, CurFPFeatureOverrides());
	}

	// Handle compound assignments.
	if (getLangOpts().CPlusPlus && LHS.get()->getObjectKind() !=
	OK_ObjCProperty) {
	VK = VK_LValue;
	OK = LHS.get()->getObjectKind();
	}

	// The LHS is not converted to the result type for fixed-point compound
	// assignment as the common type is computed on demand. Reset the CompLHSTy
	// to the LHS type we would have gotten after unary conversions.
	if (CompResultTy->isFixedPointType())
	CompLHSTy = UsualUnaryConversions(LHS.get()).get()->getType();

	if (ConvertHalfVec)
	return convertHalfVecBinOp(*this, LHS, RHS, Opc, ResultTy, VK, OK, true,
	OpLoc, CurFPFeatureOverrides());

	return CompoundAssignOperator::Create(
	Context, LHS.get(), RHS.get(), Opc, ResultTy, VK, OK, OpLoc,
	CurFPFeatureOverrides(), CompLHSTy, CompResultTy);
	}

	/// DiagnoseBitwisePrecedence - Emit a warning when bitwise and comparison
	/// operators are mixed in a way that suggests that the programmer forgot that
	/// comparison operators have higher precedence. The most typical example of
	/// such code is "flags & 0x0020 != 0", which is equivalent to "flags & 1".
	static void DiagnoseBitwisePrecedence(Sema &Self, BinaryOperatorKind Opc,
	SourceLocation OpLoc, Expr *LHSExpr,
	Expr *RHSExpr) {
	BinaryOperator *LHSBO = dyn_cast<BinaryOperator>(LHSExpr);
	BinaryOperator *RHSBO = dyn_cast<BinaryOperator>(RHSExpr);

	// Check that one of the sides is a comparison operator and the other isn't.
	bool isLeftComp = LHSBO && LHSBO->isComparisonOp();
	bool isRightComp = RHSBO && RHSBO->isComparisonOp();
	if (isLeftComp == isRightComp)
	return;

	// Bitwise operations are sometimes used as eager logical ops.
	// Don't diagnose this.
	bool isLeftBitwise = LHSBO && LHSBO->isBitwiseOp();
	bool isRightBitwise = RHSBO && RHSBO->isBitwiseOp();
	if (isLeftBitwise \|\| isRightBitwise)
	return;

	SourceRange DiagRange = isLeftComp
	? SourceRange(LHSExpr->getBeginLoc(), OpLoc)
	: SourceRange(OpLoc, RHSExpr->getEndLoc());
	StringRef OpStr = isLeftComp ? LHSBO->getOpcodeStr() : RHSBO->getOpcodeStr();
	SourceRange ParensRange =
	isLeftComp
	? SourceRange(LHSBO->getRHS()->getBeginLoc(), RHSExpr->getEndLoc())
	: SourceRange(LHSExpr->getBeginLoc(), RHSBO->getLHS()->getEndLoc());

	Self.Diag(OpLoc, diag::warn_precedence_bitwise_rel)
	<< DiagRange << BinaryOperator::getOpcodeStr(Opc) << OpStr;
	SuggestParentheses(Self, OpLoc,
	Self.PDiag(diag::note_precedence_silence) << OpStr,
	(isLeftComp ? LHSExpr : RHSExpr)->getSourceRange());
	SuggestParentheses(Self, OpLoc,
	Self.PDiag(diag::note_precedence_bitwise_first)
	<< BinaryOperator::getOpcodeStr(Opc),
	ParensRange);
	}

	/// It accepts a '&&' expr that is inside a '\|\|' one.
	/// Emit a diagnostic together with a fixit hint that wraps the '&&' expression
	/// in parentheses.
	static void
	EmitDiagnosticForLogicalAndInLogicalOr(Sema &Self, SourceLocation OpLoc,
	BinaryOperator *Bop) {
	assert(Bop->getOpcode() == BO_LAnd);
	Self.Diag(Bop->getOperatorLoc(), diag::warn_logical_and_in_logical_or)
	<< Bop->getSourceRange() << OpLoc;
	SuggestParentheses(Self, Bop->getOperatorLoc(),
	Self.PDiag(diag::note_precedence_silence)
	<< Bop->getOpcodeStr(),
	Bop->getSourceRange());
	}

	/// Look for '&&' in the left hand of a '\|\|' expr.
	static void DiagnoseLogicalAndInLogicalOrLHS(Sema &S, SourceLocation OpLoc,
	Expr LHSExpr, Expr RHSExpr) {
	if (BinaryOperator *Bop = dyn_cast<BinaryOperator>(LHSExpr)) {
	if (Bop->getOpcode() == BO_LAnd) {
	// If it's "string_literal && a \|\| b" don't warn since the precedence
	// doesn't matter.
	if (!isa<StringLiteral>(Bop->getLHS()->IgnoreParenImpCasts()))
	return EmitDiagnosticForLogicalAndInLogicalOr(S, OpLoc, Bop);
	} else if (Bop->getOpcode() == BO_LOr) {
	if (BinaryOperator *RBop = dyn_cast<BinaryOperator>(Bop->getRHS())) {
	// If it's "a \|\| b && string_literal \|\| c" we didn't warn earlier for
	// "a \|\| b && string_literal", but warn now.
	if (RBop->getOpcode() == BO_LAnd &&
	isa<StringLiteral>(RBop->getRHS()->IgnoreParenImpCasts()))
	return EmitDiagnosticForLogicalAndInLogicalOr(S, OpLoc, RBop);
	}
	}
	}
	}

	/// Look for '&&' in the right hand of a '\|\|' expr.
	static void DiagnoseLogicalAndInLogicalOrRHS(Sema &S, SourceLocation OpLoc,
	Expr LHSExpr, Expr RHSExpr) {
	if (BinaryOperator *Bop = dyn_cast<BinaryOperator>(RHSExpr)) {
	if (Bop->getOpcode() == BO_LAnd) {
	// If it's "a \|\| b && string_literal" don't warn since the precedence
	// doesn't matter.
	if (!isa<StringLiteral>(Bop->getRHS()->IgnoreParenImpCasts()))
	return EmitDiagnosticForLogicalAndInLogicalOr(S, OpLoc, Bop);
	}
	}
	}

	/// Look for bitwise op in the left or right hand of a bitwise op with
	/// lower precedence and emit a diagnostic together with a fixit hint that wraps
	/// the '&' expression in parentheses.
	static void DiagnoseBitwiseOpInBitwiseOp(Sema &S, BinaryOperatorKind Opc,
	SourceLocation OpLoc, Expr *SubExpr) {
	if (BinaryOperator *Bop = dyn_cast<BinaryOperator>(SubExpr)) {
	if (Bop->isBitwiseOp() && Bop->getOpcode() < Opc) {
	S.Diag(Bop->getOperatorLoc(), diag::warn_bitwise_op_in_bitwise_op)
	<< Bop->getOpcodeStr() << BinaryOperator::getOpcodeStr(Opc)
	<< Bop->getSourceRange() << OpLoc;
	SuggestParentheses(S, Bop->getOperatorLoc(),
	S.PDiag(diag::note_precedence_silence)
	<< Bop->getOpcodeStr(),
	Bop->getSourceRange());
	}
	}
	}

	static void DiagnoseAdditionInShift(Sema &S, SourceLocation OpLoc,
	Expr *SubExpr, StringRef Shift) {
	if (BinaryOperator *Bop = dyn_cast<BinaryOperator>(SubExpr)) {
	if (Bop->getOpcode() == BO_Add \|\| Bop->getOpcode() == BO_Sub) {
	StringRef Op = Bop->getOpcodeStr();
	S.Diag(Bop->getOperatorLoc(), diag::warn_addition_in_bitshift)
	<< Bop->getSourceRange() << OpLoc << Shift << Op;
	SuggestParentheses(S, Bop->getOperatorLoc(),
	S.PDiag(diag::note_precedence_silence) << Op,
	Bop->getSourceRange());
	}
	}
	}

	static void DiagnoseShiftCompare(Sema &S, SourceLocation OpLoc,
	Expr LHSExpr, Expr RHSExpr) {
	CXXOperatorCallExpr *OCE = dyn_cast<CXXOperatorCallExpr>(LHSExpr);
	if (!OCE)
	return;

	FunctionDecl *FD = OCE->getDirectCallee();
	if (!FD \|\| !FD->isOverloadedOperator())
	return;

	OverloadedOperatorKind Kind = FD->getOverloadedOperator();
	if (Kind != OO_LessLess && Kind != OO_GreaterGreater)
	return;

	S.Diag(OpLoc, diag::warn_overloaded_shift_in_comparison)
	<< LHSExpr->getSourceRange() << RHSExpr->getSourceRange()
	<< (Kind == OO_LessLess);
	SuggestParentheses(S, OCE->getOperatorLoc(),
	S.PDiag(diag::note_precedence_silence)
	<< (Kind == OO_LessLess ? "<<" : ">>"),
	OCE->getSourceRange());
	SuggestParentheses(
	S, OpLoc, S.PDiag(diag::note_evaluate_comparison_first),
	SourceRange(OCE->getArg(1)->getBeginLoc(), RHSExpr->getEndLoc()));
	}

	/// DiagnoseBinOpPrecedence - Emit warnings for expressions with tricky
	/// precedence.
	static void DiagnoseBinOpPrecedence(Sema &Self, BinaryOperatorKind Opc,
	SourceLocation OpLoc, Expr *LHSExpr,
	Expr *RHSExpr){
	// Diagnose "arg1 'bitwise' arg2 'eq' arg3".
	if (BinaryOperator::isBitwiseOp(Opc))
	DiagnoseBitwisePrecedence(Self, Opc, OpLoc, LHSExpr, RHSExpr);

	// Diagnose "arg1 & arg2 \| arg3"
	if ((Opc == BO_Or \|\| Opc == BO_Xor) &&
	!OpLoc.isMacroID()/* Don't warn in macros. */) {
	DiagnoseBitwiseOpInBitwiseOp(Self, Opc, OpLoc, LHSExpr);
	DiagnoseBitwiseOpInBitwiseOp(Self, Opc, OpLoc, RHSExpr);
	}

	// Warn about arg1 \|\| arg2 && arg3, as GCC 4.3+ does.
	// We don't warn for 'assert(a \|\| b && "bad")' since this is safe.
	if (Opc == BO_LOr && !OpLoc.isMacroID()/* Don't warn in macros. */) {
	DiagnoseLogicalAndInLogicalOrLHS(Self, OpLoc, LHSExpr, RHSExpr);
	DiagnoseLogicalAndInLogicalOrRHS(Self, OpLoc, LHSExpr, RHSExpr);
	}

	if ((Opc == BO_Shl && LHSExpr->getType()->isIntegralType(Self.getASTContext()))
	\|\| Opc == BO_Shr) {
	StringRef Shift = BinaryOperator::getOpcodeStr(Opc);
	DiagnoseAdditionInShift(Self, OpLoc, LHSExpr, Shift);
	DiagnoseAdditionInShift(Self, OpLoc, RHSExpr, Shift);
	}

	// Warn on overloaded shift operators and comparisons, such as:
	// cout << 5 == 4;
	if (BinaryOperator::isComparisonOp(Opc))
	DiagnoseShiftCompare(Self, OpLoc, LHSExpr, RHSExpr);
	}

	ExprResult Sema::ActOnBinOp(Scope *S, SourceLocation TokLoc,
	tok::TokenKind Kind,
	Expr LHSExpr, Expr RHSExpr) {
	BinaryOperatorKind Opc = ConvertTokenKindToBinaryOpcode(Kind);
	assert(LHSExpr && "ActOnBinOp(): missing left expression");
	assert(RHSExpr && "ActOnBinOp(): missing right expression");

	// Emit warnings for tricky precedence issues, e.g. "bitfield & 0x4 == 0"
	DiagnoseBinOpPrecedence(*this, Opc, TokLoc, LHSExpr, RHSExpr);

	return BuildBinOp(S, TokLoc, Opc, LHSExpr, RHSExpr);
	}

	void Sema::LookupBinOp(Scope *S, SourceLocation OpLoc, BinaryOperatorKind Opc,
	UnresolvedSetImpl &Functions) {
	OverloadedOperatorKind OverOp = BinaryOperator::getOverloadedOperator(Opc);
	if (OverOp != OO_None && OverOp != OO_Equal)
	LookupOverloadedOperatorName(OverOp, S, Functions);

	// In C++20 onwards, we may have a second operator to look up.
	if (getLangOpts().CPlusPlus20) {
	if (OverloadedOperatorKind ExtraOp = getRewrittenOverloadedOperator(OverOp))
	LookupOverloadedOperatorName(ExtraOp, S, Functions);
	}
	}

	/// Build an overloaded binary operator expression in the given scope.
	static ExprResult BuildOverloadedBinOp(Sema &S, Scope *Sc, SourceLocation OpLoc,
	BinaryOperatorKind Opc,
	Expr LHS, Expr RHS) {
	switch (Opc) {
	case BO_Assign:
	// In the non-overloaded case, we warn about self-assignment (x = x) for
	// both simple assignment and certain compound assignments where algebra
	// tells us the operation yields a constant result. When the operator is
	// overloaded, we can't do the latter because we don't want to assume that
	// those algebraic identities still apply; for example, a path-building
	// library might use operator/= to append paths. But it's still reasonable
	// to assume that simple assignment is just moving/copying values around
	// and so self-assignment is likely a bug.
	DiagnoseSelfAssignment(S, LHS, RHS, OpLoc, false);
	[[fallthrough]];
	case BO_DivAssign:
	case BO_RemAssign:
	case BO_SubAssign:
	case BO_AndAssign:
	case BO_OrAssign:
	case BO_XorAssign:
	CheckIdentityFieldAssignment(LHS, RHS, OpLoc, S);
	break;
	default:
	break;
	}

	// Find all of the overloaded operators visible from this point.
	UnresolvedSet<16> Functions;
	S.LookupBinOp(Sc, OpLoc, Opc, Functions);

	// Build the (potentially-overloaded, potentially-dependent)
	// binary operation.
	return S.CreateOverloadedBinOp(OpLoc, Opc, Functions, LHS, RHS);
	}

	ExprResult Sema::BuildBinOp(Scope *S, SourceLocation OpLoc,
	BinaryOperatorKind Opc,
	Expr LHSExpr, Expr RHSExpr) {
	ExprResult LHS, RHS;
	std::tie(LHS, RHS) = CorrectDelayedTyposInBinOp(*this, Opc, LHSExpr, RHSExpr);
	if (!LHS.isUsable() \|\| !RHS.isUsable())
	return ExprError();
	LHSExpr = LHS.get();
	RHSExpr = RHS.get();

	// We want to end up calling one of SemaPseudoObject::checkAssignment
	// (if the LHS is a pseudo-object), BuildOverloadedBinOp (if
	// both expressions are overloadable or either is type-dependent),
	// or CreateBuiltinBinOp (in any other case). We also want to get
	// any placeholder types out of the way.

	// Handle pseudo-objects in the LHS.
	if (const BuiltinType *pty = LHSExpr->getType()->getAsPlaceholderType()) {
	// Assignments with a pseudo-object l-value need special analysis.
	if (pty->getKind() == BuiltinType::PseudoObject &&
	BinaryOperator::isAssignmentOp(Opc))
	return PseudoObject().checkAssignment(S, OpLoc, Opc, LHSExpr, RHSExpr);

	// Don't resolve overloads if the other type is overloadable.
	if (getLangOpts().CPlusPlus && pty->getKind() == BuiltinType::Overload) {
	// We can't actually test that if we still have a placeholder,
	// though. Fortunately, none of the exceptions we see in that
	// code below are valid when the LHS is an overload set. Note
	// that an overload set can be dependently-typed, but it never
	// instantiates to having an overloadable type.
	ExprResult resolvedRHS = CheckPlaceholderExpr(RHSExpr);
	if (resolvedRHS.isInvalid()) return ExprError();
	RHSExpr = resolvedRHS.get();

	if (RHSExpr->isTypeDependent() \|\|
	RHSExpr->getType()->isOverloadableType())
	return BuildOverloadedBinOp(*this, S, OpLoc, Opc, LHSExpr, RHSExpr);
	}

	// If we're instantiating "a.x < b" or "A::x < b" and 'x' names a function
	// template, diagnose the missing 'template' keyword instead of diagnosing
	// an invalid use of a bound member function.
	//
	// Note that "A::x < b" might be valid if 'b' has an overloadable type due
	// to C++1z [over.over]/1.4, but we already checked for that case above.
	if (Opc == BO_LT && inTemplateInstantiation() &&
	(pty->getKind() == BuiltinType::BoundMember \|\|
	pty->getKind() == BuiltinType::Overload)) {
	auto *OE = dyn_cast<OverloadExpr>(LHSExpr);
	if (OE && !OE->hasTemplateKeyword() && !OE->hasExplicitTemplateArgs() &&
	llvm::any_of(OE->decls(), [](NamedDecl *ND) {
	return isa<FunctionTemplateDecl>(ND);
	})) {
	Diag(OE->getQualifier() ? OE->getQualifierLoc().getBeginLoc()
	: OE->getNameLoc(),
	diag::err_template_kw_missing)
	<< OE->getName().getAsString() << "";
	return ExprError();
	}
	}

	ExprResult LHS = CheckPlaceholderExpr(LHSExpr);
	if (LHS.isInvalid()) return ExprError();
	LHSExpr = LHS.get();
	}

	// Handle pseudo-objects in the RHS.
	if (const BuiltinType *pty = RHSExpr->getType()->getAsPlaceholderType()) {
	// An overload in the RHS can potentially be resolved by the type
	// being assigned to.
	if (Opc == BO_Assign && pty->getKind() == BuiltinType::Overload) {
	if (getLangOpts().CPlusPlus &&
	(LHSExpr->isTypeDependent() \|\| RHSExpr->isTypeDependent() \|\|
	LHSExpr->getType()->isOverloadableType()))
	return BuildOverloadedBinOp(*this, S, OpLoc, Opc, LHSExpr, RHSExpr);

	return CreateBuiltinBinOp(OpLoc, Opc, LHSExpr, RHSExpr);
	}

	// Don't resolve overloads if the other type is overloadable.
	if (getLangOpts().CPlusPlus && pty->getKind() == BuiltinType::Overload &&
	LHSExpr->getType()->isOverloadableType())
	return BuildOverloadedBinOp(*this, S, OpLoc, Opc, LHSExpr, RHSExpr);

	ExprResult resolvedRHS = CheckPlaceholderExpr(RHSExpr);
	if (!resolvedRHS.isUsable()) return ExprError();
	RHSExpr = resolvedRHS.get();
	}

	if (getLangOpts().CPlusPlus) {
	// Otherwise, build an overloaded op if either expression is type-dependent
	// or has an overloadable type.
	if (LHSExpr->isTypeDependent() \|\| RHSExpr->isTypeDependent() \|\|
	LHSExpr->getType()->isOverloadableType() \|\|
	RHSExpr->getType()->isOverloadableType())
	return BuildOverloadedBinOp(*this, S, OpLoc, Opc, LHSExpr, RHSExpr);
	}

	if (getLangOpts().RecoveryAST &&
	(LHSExpr->isTypeDependent() \|\| RHSExpr->isTypeDependent())) {
	assert(!getLangOpts().CPlusPlus);
	assert((LHSExpr->containsErrors() \|\| RHSExpr->containsErrors()) &&
	"Should only occur in error-recovery path.");
	if (BinaryOperator::isCompoundAssignmentOp(Opc))
	// C [6.15.16] p3:
	// An assignment expression has the value of the left operand after the
	// assignment, but is not an lvalue.
	return CompoundAssignOperator::Create(
	Context, LHSExpr, RHSExpr, Opc,
	LHSExpr->getType().getUnqualifiedType(), VK_PRValue, OK_Ordinary,
	OpLoc, CurFPFeatureOverrides());
	QualType ResultType;
	switch (Opc) {
	case BO_Assign:
	ResultType = LHSExpr->getType().getUnqualifiedType();
	break;
	case BO_LT:
	case BO_GT:
	case BO_LE:
	case BO_GE:
	case BO_EQ:
	case BO_NE:
	case BO_LAnd:
	case BO_LOr:
	// These operators have a fixed result type regardless of operands.
	ResultType = Context.IntTy;
	break;
	case BO_Comma:
	ResultType = RHSExpr->getType();
	break;
	default:
	ResultType = Context.DependentTy;
	break;
	}
	return BinaryOperator::Create(Context, LHSExpr, RHSExpr, Opc, ResultType,
	VK_PRValue, OK_Ordinary, OpLoc,
	CurFPFeatureOverrides());
	}

	// Build a built-in binary operation.
	return CreateBuiltinBinOp(OpLoc, Opc, LHSExpr, RHSExpr);
	}

	static bool isOverflowingIntegerType(ASTContext &Ctx, QualType T) {
	if (T.isNull() \|\| T->isDependentType())
	return false;

	if (!Ctx.isPromotableIntegerType(T))
	return true;

	return Ctx.getIntWidth(T) >= Ctx.getIntWidth(Ctx.IntTy);
	}

	ExprResult Sema::CreateBuiltinUnaryOp(SourceLocation OpLoc,
	UnaryOperatorKind Opc, Expr *InputExpr,
	bool IsAfterAmp) {
	ExprResult Input = InputExpr;
	ExprValueKind VK = VK_PRValue;
	ExprObjectKind OK = OK_Ordinary;
	QualType resultType;
	bool CanOverflow = false;

	bool ConvertHalfVec = false;
	if (getLangOpts().OpenCL) {
	QualType Ty = InputExpr->getType();
	// The only legal unary operation for atomics is '&'.
	if ((Opc != UO_AddrOf && Ty->isAtomicType()) \|\|
	// OpenCL special types - image, sampler, pipe, and blocks are to be used
	// only with a builtin functions and therefore should be disallowed here.
	(Ty->isImageType() \|\| Ty->isSamplerT() \|\| Ty->isPipeType()
	\|\| Ty->isBlockPointerType())) {
	return ExprError(Diag(OpLoc, diag::err_typecheck_unary_expr)
	<< InputExpr->getType()
	<< Input.get()->getSourceRange());
	}
	}

	if (getLangOpts().HLSL && OpLoc.isValid()) {
	if (Opc == UO_AddrOf)
	return ExprError(Diag(OpLoc, diag::err_hlsl_operator_unsupported) << 0);
	if (Opc == UO_Deref)
	return ExprError(Diag(OpLoc, diag::err_hlsl_operator_unsupported) << 1);
	}

	if (InputExpr->isTypeDependent() &&
	InputExpr->getType()->isSpecificBuiltinType(BuiltinType::Dependent)) {
	resultType = Context.DependentTy;
	} else {
	switch (Opc) {
	case UO_PreInc:
	case UO_PreDec:
	case UO_PostInc:
	case UO_PostDec:
	resultType =
	CheckIncrementDecrementOperand(*this, Input.get(), VK, OK, OpLoc,
	Opc == UO_PreInc \|\| Opc == UO_PostInc,
	Opc == UO_PreInc \|\| Opc == UO_PreDec);
	CanOverflow = isOverflowingIntegerType(Context, resultType);
	break;
	case UO_AddrOf:
	resultType = CheckAddressOfOperand(Input, OpLoc);
	CheckAddressOfNoDeref(InputExpr);
	RecordModifiableNonNullParam(*this, InputExpr);
	break;
	case UO_Deref: {
	Input = DefaultFunctionArrayLvalueConversion(Input.get());
	if (Input.isInvalid())
	return ExprError();
	resultType =
	CheckIndirectionOperand(*this, Input.get(), VK, OpLoc, IsAfterAmp);
	break;
	}
	case UO_Plus:
	case UO_Minus:
	CanOverflow = Opc == UO_Minus &&
	isOverflowingIntegerType(Context, Input.get()->getType());
	Input = UsualUnaryConversions(Input.get());
	if (Input.isInvalid())
	return ExprError();
	// Unary plus and minus require promoting an operand of half vector to a
	// float vector and truncating the result back to a half vector. For now,
	// we do this only when HalfArgsAndReturns is set (that is, when the
	// target is arm or arm64).
	ConvertHalfVec = needsConversionOfHalfVec(true, Context, Input.get());

	// If the operand is a half vector, promote it to a float vector.
	if (ConvertHalfVec)
	Input = convertVector(Input.get(), Context.FloatTy, *this);
	resultType = Input.get()->getType();
	if (resultType->isArithmeticType()) // C99 6.5.3.3p1
	break;
	else if (resultType->isVectorType() &&
	// The z vector extensions don't allow + or - with bool vectors.
	(!Context.getLangOpts().ZVector \|\|
	resultType->castAs<VectorType>()->getVectorKind() !=
	VectorKind::AltiVecBool))
	break;
	else if (resultType->isSveVLSBuiltinType()) // SVE vectors allow + and -
	break;
	else if (getLangOpts().CPlusPlus && // C++ [expr.unary.op]p6
	Opc == UO_Plus && resultType->isPointerType())
	break;

	return ExprError(Diag(OpLoc, diag::err_typecheck_unary_expr)
	<< resultType << Input.get()->getSourceRange());

	case UO_Not: // bitwise complement
	Input = UsualUnaryConversions(Input.get());
	if (Input.isInvalid())
	return ExprError();
	resultType = Input.get()->getType();
	// C99 6.5.3.3p1. We allow complex int and float as a GCC extension.
	if (resultType->isComplexType() \|\| resultType->isComplexIntegerType())
	// C99 does not support '~' for complex conjugation.
	Diag(OpLoc, diag::ext_integer_complement_complex)
	<< resultType << Input.get()->getSourceRange();
	else if (resultType->hasIntegerRepresentation())
	break;
	else if (resultType->isExtVectorType() && Context.getLangOpts().OpenCL) {
	// OpenCL v1.1 s6.3.f: The bitwise operator not (~) does not operate
	// on vector float types.
	QualType T = resultType->castAs<ExtVectorType>()->getElementType();
	if (!T->isIntegerType())
	return ExprError(Diag(OpLoc, diag::err_typecheck_unary_expr)
	<< resultType << Input.get()->getSourceRange());
	} else {
	return ExprError(Diag(OpLoc, diag::err_typecheck_unary_expr)
	<< resultType << Input.get()->getSourceRange());
	}
	break;

	case UO_LNot: // logical negation
	// Unlike +/-/~, integer promotions aren't done here (C99 6.5.3.3p5).
	Input = DefaultFunctionArrayLvalueConversion(Input.get());
	if (Input.isInvalid())
	return ExprError();
	resultType = Input.get()->getType();

	// Though we still have to promote half FP to float...
	if (resultType->isHalfType() && !Context.getLangOpts().NativeHalfType) {
	Input = ImpCastExprToType(Input.get(), Context.FloatTy, CK_FloatingCast)
	.get();
	resultType = Context.FloatTy;
	}

	// WebAsembly tables can't be used in unary expressions.
	if (resultType->isPointerType() &&
	resultType->getPointeeType().isWebAssemblyReferenceType()) {
	return ExprError(Diag(OpLoc, diag::err_typecheck_unary_expr)
	<< resultType << Input.get()->getSourceRange());
	}

	if (resultType->isScalarType() && !isScopedEnumerationType(resultType)) {
	// C99 6.5.3.3p1: ok, fallthrough;
	if (Context.getLangOpts().CPlusPlus) {
	// C++03 [expr.unary.op]p8, C++0x [expr.unary.op]p9:
	// operand contextually converted to bool.
	Input = ImpCastExprToType(Input.get(), Context.BoolTy,
	ScalarTypeToBooleanCastKind(resultType));
	} else if (Context.getLangOpts().OpenCL &&
	Context.getLangOpts().OpenCLVersion < 120) {
	// OpenCL v1.1 6.3.h: The logical operator not (!) does not
	// operate on scalar float types.
	if (!resultType->isIntegerType() && !resultType->isPointerType())
	return ExprError(Diag(OpLoc, diag::err_typecheck_unary_expr)
	<< resultType << Input.get()->getSourceRange());
	}
	} else if (resultType->isExtVectorType()) {
	if (Context.getLangOpts().OpenCL &&
	Context.getLangOpts().getOpenCLCompatibleVersion() < 120) {
	// OpenCL v1.1 6.3.h: The logical operator not (!) does not
	// operate on vector float types.
	QualType T = resultType->castAs<ExtVectorType>()->getElementType();
	if (!T->isIntegerType())
	return ExprError(Diag(OpLoc, diag::err_typecheck_unary_expr)
	<< resultType << Input.get()->getSourceRange());
	}
	// Vector logical not returns the signed variant of the operand type.
	resultType = GetSignedVectorType(resultType);
	break;
	} else if (Context.getLangOpts().CPlusPlus &&
	resultType->isVectorType()) {
	const VectorType *VTy = resultType->castAs<VectorType>();
	if (VTy->getVectorKind() != VectorKind::Generic)
	return ExprError(Diag(OpLoc, diag::err_typecheck_unary_expr)
	<< resultType << Input.get()->getSourceRange());

	// Vector logical not returns the signed variant of the operand type.
	resultType = GetSignedVectorType(resultType);
	break;
	} else {
	return ExprError(Diag(OpLoc, diag::err_typecheck_unary_expr)
	<< resultType << Input.get()->getSourceRange());
	}

	// LNot always has type int. C99 6.5.3.3p5.
	// In C++, it's bool. C++ 5.3.1p8
	resultType = Context.getLogicalOperationType();
	break;
	case UO_Real:
	case UO_Imag:
	resultType = CheckRealImagOperand(*this, Input, OpLoc, Opc == UO_Real);
	// _Real maps ordinary l-values into ordinary l-values. _Imag maps
	// ordinary complex l-values to ordinary l-values and all other values to
	// r-values.
	if (Input.isInvalid())
	return ExprError();
	if (Opc == UO_Real \|\| Input.get()->getType()->isAnyComplexType()) {
	if (Input.get()->isGLValue() &&
	Input.get()->getObjectKind() == OK_Ordinary)
	VK = Input.get()->getValueKind();
	} else if (!getLangOpts().CPlusPlus) {
	// In C, a volatile scalar is read by __imag. In C++, it is not.
	Input = DefaultLvalueConversion(Input.get());
	}
	break;
	case UO_Extension:
	resultType = Input.get()->getType();
	VK = Input.get()->getValueKind();
	OK = Input.get()->getObjectKind();
	break;
	case UO_Coawait:
	// It's unnecessary to represent the pass-through operator co_await in the
	// AST; just return the input expression instead.
	assert(!Input.get()->getType()->isDependentType() &&
	"the co_await expression must be non-dependant before "
	"building operator co_await");
	return Input;
	}
	}
	if (resultType.isNull() \|\| Input.isInvalid())
	return ExprError();

	// Check for array bounds violations in the operand of the UnaryOperator,
	// except for the '*' and '&' operators that have to be handled specially
	// by CheckArrayAccess (as there are special cases like &array[arraysize]
	// that are explicitly defined as valid by the standard).
	if (Opc != UO_AddrOf && Opc != UO_Deref)
	CheckArrayAccess(Input.get());

	auto *UO =
	UnaryOperator::Create(Context, Input.get(), Opc, resultType, VK, OK,
	OpLoc, CanOverflow, CurFPFeatureOverrides());

	if (Opc == UO_Deref && UO->getType()->hasAttr(attr::NoDeref) &&
	!isa<ArrayType>(UO->getType().getDesugaredType(Context)) &&
	!isUnevaluatedContext())
	ExprEvalContexts.back().PossibleDerefs.insert(UO);

	// Convert the result back to a half vector.
	if (ConvertHalfVec)
	return convertVector(UO, Context.HalfTy, *this);
	return UO;
	}

	bool Sema::isQualifiedMemberAccess(Expr *E) {
	if (DeclRefExpr *DRE = dyn_cast<DeclRefExpr>(E)) {
	if (!DRE->getQualifier())
	return false;

	ValueDecl *VD = DRE->getDecl();
	if (!VD->isCXXClassMember())
	return false;

	if (isa<FieldDecl>(VD) \|\| isa<IndirectFieldDecl>(VD))
	return true;
	if (CXXMethodDecl *Method = dyn_cast<CXXMethodDecl>(VD))
	return Method->isImplicitObjectMemberFunction();

	return false;
	}

	if (UnresolvedLookupExpr *ULE = dyn_cast<UnresolvedLookupExpr>(E)) {
	if (!ULE->getQualifier())
	return false;

	for (NamedDecl *D : ULE->decls()) {
	if (CXXMethodDecl *Method = dyn_cast<CXXMethodDecl>(D)) {
	if (Method->isImplicitObjectMemberFunction())
	return true;
	} else {
	// Overload set does not contain methods.
	break;
	}
	}

	return false;
	}

	return false;
	}

	ExprResult Sema::BuildUnaryOp(Scope *S, SourceLocation OpLoc,
	UnaryOperatorKind Opc, Expr *Input,
	bool IsAfterAmp) {
	// First things first: handle placeholders so that the
	// overloaded-operator check considers the right type.
	if (const BuiltinType *pty = Input->getType()->getAsPlaceholderType()) {
	// Increment and decrement of pseudo-object references.
	if (pty->getKind() == BuiltinType::PseudoObject &&
	UnaryOperator::isIncrementDecrementOp(Opc))
	return PseudoObject().checkIncDec(S, OpLoc, Opc, Input);

	// extension is always a builtin operator.
	if (Opc == UO_Extension)
	return CreateBuiltinUnaryOp(OpLoc, Opc, Input);

	// & gets special logic for several kinds of placeholder.
	// The builtin code knows what to do.
	if (Opc == UO_AddrOf &&
	(pty->getKind() == BuiltinType::Overload \|\|
	pty->getKind() == BuiltinType::UnknownAny \|\|
	pty->getKind() == BuiltinType::BoundMember))
	return CreateBuiltinUnaryOp(OpLoc, Opc, Input);

	// Anything else needs to be handled now.
	ExprResult Result = CheckPlaceholderExpr(Input);
	if (Result.isInvalid()) return ExprError();
	Input = Result.get();
	}

	if (getLangOpts().CPlusPlus && Input->getType()->isOverloadableType() &&
	UnaryOperator::getOverloadedOperator(Opc) != OO_None &&
	!(Opc == UO_AddrOf && isQualifiedMemberAccess(Input))) {
	// Find all of the overloaded operators visible from this point.
	UnresolvedSet<16> Functions;
	OverloadedOperatorKind OverOp = UnaryOperator::getOverloadedOperator(Opc);
	if (S && OverOp != OO_None)
	LookupOverloadedOperatorName(OverOp, S, Functions);

	return CreateOverloadedUnaryOp(OpLoc, Opc, Functions, Input);
	}

	return CreateBuiltinUnaryOp(OpLoc, Opc, Input, IsAfterAmp);
	}

	ExprResult Sema::ActOnUnaryOp(Scope *S, SourceLocation OpLoc, tok::TokenKind Op,
	Expr *Input, bool IsAfterAmp) {
	return BuildUnaryOp(S, OpLoc, ConvertTokenKindToUnaryOpcode(Op), Input,
	IsAfterAmp);
	}

	ExprResult Sema::ActOnAddrLabel(SourceLocation OpLoc, SourceLocation LabLoc,
	LabelDecl *TheDecl) {
	TheDecl->markUsed(Context);
	// Create the AST node. The address of a label always has type 'void*'.
	auto *Res = new (Context) AddrLabelExpr(
	OpLoc, LabLoc, TheDecl, Context.getPointerType(Context.VoidTy));

	if (getCurFunction())
	getCurFunction()->AddrLabels.push_back(Res);

	return Res;
	}

	void Sema::ActOnStartStmtExpr() {
	PushExpressionEvaluationContext(ExprEvalContexts.back().Context);
	// Make sure we diagnose jumping into a statement expression.
	setFunctionHasBranchProtectedScope();
	}

	void Sema::ActOnStmtExprError() {
	// Note that function is also called by TreeTransform when leaving a
	// StmtExpr scope without rebuilding anything.

	DiscardCleanupsInEvaluationContext();
	PopExpressionEvaluationContext();
	}

	ExprResult Sema::ActOnStmtExpr(Scope S, SourceLocation LPLoc, Stmt SubStmt,
	SourceLocation RPLoc) {
	return BuildStmtExpr(LPLoc, SubStmt, RPLoc, getTemplateDepth(S));
	}

	ExprResult Sema::BuildStmtExpr(SourceLocation LPLoc, Stmt *SubStmt,
	SourceLocation RPLoc, unsigned TemplateDepth) {
	assert(SubStmt && isa<CompoundStmt>(SubStmt) && "Invalid action invocation!");
	CompoundStmt *Compound = cast<CompoundStmt>(SubStmt);

	if (hasAnyUnrecoverableErrorsInThisFunction())
	DiscardCleanupsInEvaluationContext();
	assert(!Cleanup.exprNeedsCleanups() &&
	"cleanups within StmtExpr not correctly bound!");
	PopExpressionEvaluationContext();

	// FIXME: there are a variety of strange constraints to enforce here, for
	// example, it is not possible to goto into a stmt expression apparently.
	// More semantic analysis is needed.

	// If there are sub-stmts in the compound stmt, take the type of the last one
	// as the type of the stmtexpr.
	QualType Ty = Context.VoidTy;
	bool StmtExprMayBindToTemp = false;
	if (!Compound->body_empty()) {
	// For GCC compatibility we get the last Stmt excluding trailing NullStmts.
	if (const auto *LastStmt =
	dyn_cast<ValueStmt>(Compound->getStmtExprResult())) {
	if (const Expr *Value = LastStmt->getExprStmt()) {
	StmtExprMayBindToTemp = true;
	Ty = Value->getType();
	}
	}
	}

	// FIXME: Check that expression type is complete/non-abstract; statement
	// expressions are not lvalues.
	Expr *ResStmtExpr =
	new (Context) StmtExpr(Compound, Ty, LPLoc, RPLoc, TemplateDepth);
	if (StmtExprMayBindToTemp)
	return MaybeBindToTemporary(ResStmtExpr);
	return ResStmtExpr;
	}

	ExprResult Sema::ActOnStmtExprResult(ExprResult ER) {
	if (ER.isInvalid())
	return ExprError();

	// Do function/array conversion on the last expression, but not
	// lvalue-to-rvalue. However, initialize an unqualified type.
	ER = DefaultFunctionArrayConversion(ER.get());
	if (ER.isInvalid())
	return ExprError();
	Expr *E = ER.get();

	if (E->isTypeDependent())
	return E;

	// In ARC, if the final expression ends in a consume, splice
	// the consume out and bind it later. In the alternate case
	// (when dealing with a retainable type), the result
	// initialization will create a produce. In both cases the
	// result will be +1, and we'll need to balance that out with
	// a bind.
	auto *Cast = dyn_cast<ImplicitCastExpr>(E);
	if (Cast && Cast->getCastKind() == CK_ARCConsumeObject)
	return Cast->getSubExpr();

	// FIXME: Provide a better location for the initialization.
	return PerformCopyInitialization(
	InitializedEntity::InitializeStmtExprResult(
	E->getBeginLoc(), E->getType().getUnqualifiedType()),
	SourceLocation(), E);
	}

	ExprResult Sema::BuildBuiltinOffsetOf(SourceLocation BuiltinLoc,
	TypeSourceInfo *TInfo,
	ArrayRef<OffsetOfComponent> Components,
	SourceLocation RParenLoc) {
	QualType ArgTy = TInfo->getType();
	bool Dependent = ArgTy->isDependentType();
	SourceRange TypeRange = TInfo->getTypeLoc().getLocalSourceRange();

	// We must have at least one component that refers to the type, and the first
	// one is known to be a field designator. Verify that the ArgTy represents
	// a struct/union/class.
	if (!Dependent && !ArgTy->isRecordType())
	return ExprError(Diag(BuiltinLoc, diag::err_offsetof_record_type)
	<< ArgTy << TypeRange);

	// Type must be complete per C99 7.17p3 because a declaring a variable
	// with an incomplete type would be ill-formed.
	if (!Dependent
	&& RequireCompleteType(BuiltinLoc, ArgTy,
	diag::err_offsetof_incomplete_type, TypeRange))
	return ExprError();

	bool DidWarnAboutNonPOD = false;
	QualType CurrentType = ArgTy;
	SmallVector<OffsetOfNode, 4> Comps;
	SmallVector<Expr*, 4> Exprs;
	for (const OffsetOfComponent &OC : Components) {
	if (OC.isBrackets) {
	// Offset of an array sub-field. TODO: Should we allow vector elements?
	if (!CurrentType->isDependentType()) {
	const ArrayType *AT = Context.getAsArrayType(CurrentType);
	if(!AT)
	return ExprError(Diag(OC.LocEnd, diag::err_offsetof_array_type)
	<< CurrentType);
	CurrentType = AT->getElementType();
	} else
	CurrentType = Context.DependentTy;

	ExprResult IdxRval = DefaultLvalueConversion(static_cast<Expr*>(OC.U.E));
	if (IdxRval.isInvalid())
	return ExprError();
	Expr *Idx = IdxRval.get();

	// The expression must be an integral expression.
	// FIXME: An integral constant expression?
	if (!Idx->isTypeDependent() && !Idx->isValueDependent() &&
	!Idx->getType()->isIntegerType())
	return ExprError(
	Diag(Idx->getBeginLoc(), diag::err_typecheck_subscript_not_integer)
	<< Idx->getSourceRange());

	// Record this array index.
	Comps.push_back(OffsetOfNode(OC.LocStart, Exprs.size(), OC.LocEnd));
	Exprs.push_back(Idx);
	continue;
	}

	// Offset of a field.
	if (CurrentType->isDependentType()) {
	// We have the offset of a field, but we can't look into the dependent
	// type. Just record the identifier of the field.
	Comps.push_back(OffsetOfNode(OC.LocStart, OC.U.IdentInfo, OC.LocEnd));
	CurrentType = Context.DependentTy;
	continue;
	}

	// We need to have a complete type to look into.
	if (RequireCompleteType(OC.LocStart, CurrentType,
	diag::err_offsetof_incomplete_type))
	return ExprError();

	// Look for the designated field.
	const RecordType *RC = CurrentType->getAs<RecordType>();
	if (!RC)
	return ExprError(Diag(OC.LocEnd, diag::err_offsetof_record_type)
	<< CurrentType);
	RecordDecl *RD = RC->getDecl();

	// C++ [lib.support.types]p5:
	// The macro offsetof accepts a restricted set of type arguments in this
	// International Standard. type shall be a POD structure or a POD union
	// (clause 9).
	// C++11 [support.types]p4:
	// If type is not a standard-layout class (Clause 9), the results are
	// undefined.
	if (CXXRecordDecl *CRD = dyn_cast<CXXRecordDecl>(RD)) {
	bool IsSafe = LangOpts.CPlusPlus11? CRD->isStandardLayout() : CRD->isPOD();
	unsigned DiagID =
	LangOpts.CPlusPlus11? diag::ext_offsetof_non_standardlayout_type
	: diag::ext_offsetof_non_pod_type;

	if (!IsSafe && !DidWarnAboutNonPOD && !isUnevaluatedContext()) {
	Diag(BuiltinLoc, DiagID)
	<< SourceRange(Components[0].LocStart, OC.LocEnd) << CurrentType;
	DidWarnAboutNonPOD = true;
	}
	}

	// Look for the field.
	LookupResult R(*this, OC.U.IdentInfo, OC.LocStart, LookupMemberName);
	LookupQualifiedName(R, RD);
	FieldDecl *MemberDecl = R.getAsSingle<FieldDecl>();
	IndirectFieldDecl *IndirectMemberDecl = nullptr;
	if (!MemberDecl) {
	if ((IndirectMemberDecl = R.getAsSingle<IndirectFieldDecl>()))
	MemberDecl = IndirectMemberDecl->getAnonField();
	}

	if (!MemberDecl) {
	// Lookup could be ambiguous when looking up a placeholder variable
	// __builtin_offsetof(S, _).
	// In that case we would already have emitted a diagnostic
	if (!R.isAmbiguous())
	Diag(BuiltinLoc, diag::err_no_member)
	<< OC.U.IdentInfo << RD << SourceRange(OC.LocStart, OC.LocEnd);
	return ExprError();
	}

	// C99 7.17p3:
	// (If the specified member is a bit-field, the behavior is undefined.)
	//
	// We diagnose this as an error.
	if (MemberDecl->isBitField()) {
	Diag(OC.LocEnd, diag::err_offsetof_bitfield)
	<< MemberDecl->getDeclName()
	<< SourceRange(BuiltinLoc, RParenLoc);
	Diag(MemberDecl->getLocation(), diag::note_bitfield_decl);
	return ExprError();
	}

	RecordDecl *Parent = MemberDecl->getParent();
	if (IndirectMemberDecl)
	Parent = cast<RecordDecl>(IndirectMemberDecl->getDeclContext());

	// If the member was found in a base class, introduce OffsetOfNodes for
	// the base class indirections.
	CXXBasePaths Paths;
	if (IsDerivedFrom(OC.LocStart, CurrentType, Context.getTypeDeclType(Parent),
	Paths)) {
	if (Paths.getDetectedVirtual()) {
	Diag(OC.LocEnd, diag::err_offsetof_field_of_virtual_base)
	<< MemberDecl->getDeclName()
	<< SourceRange(BuiltinLoc, RParenLoc);
	return ExprError();
	}

	CXXBasePath &Path = Paths.front();
	for (const CXXBasePathElement &B : Path)
	Comps.push_back(OffsetOfNode(B.Base));
	}

	if (IndirectMemberDecl) {
	for (auto *FI : IndirectMemberDecl->chain()) {
	assert(isa<FieldDecl>(FI));
	Comps.push_back(OffsetOfNode(OC.LocStart,
	cast<FieldDecl>(FI), OC.LocEnd));
	}
	} else
	Comps.push_back(OffsetOfNode(OC.LocStart, MemberDecl, OC.LocEnd));

	CurrentType = MemberDecl->getType().getNonReferenceType();
	}

	return OffsetOfExpr::Create(Context, Context.getSizeType(), BuiltinLoc, TInfo,
	Comps, Exprs, RParenLoc);
	}

	ExprResult Sema::ActOnBuiltinOffsetOf(Scope *S,
	SourceLocation BuiltinLoc,
	SourceLocation TypeLoc,
	ParsedType ParsedArgTy,
	ArrayRef<OffsetOfComponent> Components,
	SourceLocation RParenLoc) {

	TypeSourceInfo *ArgTInfo;
	QualType ArgTy = GetTypeFromParser(ParsedArgTy, &ArgTInfo);
	if (ArgTy.isNull())
	return ExprError();

	if (!ArgTInfo)
	ArgTInfo = Context.getTrivialTypeSourceInfo(ArgTy, TypeLoc);

	return BuildBuiltinOffsetOf(BuiltinLoc, ArgTInfo, Components, RParenLoc);
	}


	ExprResult Sema::ActOnChooseExpr(SourceLocation BuiltinLoc,
	Expr *CondExpr,
	Expr LHSExpr, Expr RHSExpr,
	SourceLocation RPLoc) {
	assert((CondExpr && LHSExpr && RHSExpr) && "Missing type argument(s)");

	ExprValueKind VK = VK_PRValue;
	ExprObjectKind OK = OK_Ordinary;
	QualType resType;
	bool CondIsTrue = false;
	if (CondExpr->isTypeDependent() \|\| CondExpr->isValueDependent()) {
	resType = Context.DependentTy;
	} else {
	// The conditional expression is required to be a constant expression.
	llvm::APSInt condEval(32);
	ExprResult CondICE = VerifyIntegerConstantExpression(
	CondExpr, &condEval, diag::err_typecheck_choose_expr_requires_constant);
	if (CondICE.isInvalid())
	return ExprError();
	CondExpr = CondICE.get();
	CondIsTrue = condEval.getZExtValue();

	// If the condition is > zero, then the AST type is the same as the LHSExpr.
	Expr *ActiveExpr = CondIsTrue ? LHSExpr : RHSExpr;

	resType = ActiveExpr->getType();
	VK = ActiveExpr->getValueKind();
	OK = ActiveExpr->getObjectKind();
	}

	return new (Context) ChooseExpr(BuiltinLoc, CondExpr, LHSExpr, RHSExpr,
	resType, VK, OK, RPLoc, CondIsTrue);
	}

	//===----------------------------------------------------------------------===//
	// Clang Extensions.
	//===----------------------------------------------------------------------===//

	void Sema::ActOnBlockStart(SourceLocation CaretLoc, Scope *CurScope) {
	BlockDecl *Block = BlockDecl::Create(Context, CurContext, CaretLoc);

	if (LangOpts.CPlusPlus) {
	MangleNumberingContext *MCtx;
	Decl *ManglingContextDecl;
	std::tie(MCtx, ManglingContextDecl) =
	getCurrentMangleNumberContext(Block->getDeclContext());
	if (MCtx) {
	unsigned ManglingNumber = MCtx->getManglingNumber(Block);
	Block->setBlockMangling(ManglingNumber, ManglingContextDecl);
	}
	}

	PushBlockScope(CurScope, Block);
	CurContext->addDecl(Block);
	if (CurScope)
	PushDeclContext(CurScope, Block);
	else
	CurContext = Block;

	getCurBlock()->HasImplicitReturnType = true;

	// Enter a new evaluation context to insulate the block from any
	// cleanups from the enclosing full-expression.
	PushExpressionEvaluationContext(
	ExpressionEvaluationContext::PotentiallyEvaluated);
	}

	void Sema::ActOnBlockArguments(SourceLocation CaretLoc, Declarator &ParamInfo,
	Scope *CurScope) {
	assert(ParamInfo.getIdentifier() == nullptr &&
	"block-id should have no identifier!");
	assert(ParamInfo.getContext() == DeclaratorContext::BlockLiteral);
	BlockScopeInfo *CurBlock = getCurBlock();

	TypeSourceInfo *Sig = GetTypeForDeclarator(ParamInfo);
	QualType T = Sig->getType();

	// FIXME: We should allow unexpanded parameter packs here, but that would,
	// in turn, make the block expression contain unexpanded parameter packs.
	if (DiagnoseUnexpandedParameterPack(CaretLoc, Sig, UPPC_Block)) {
	// Drop the parameters.
	FunctionProtoType::ExtProtoInfo EPI;
	EPI.HasTrailingReturn = false;
	EPI.TypeQuals.addConst();
	T = Context.getFunctionType(Context.DependentTy, std::nullopt, EPI);
	Sig = Context.getTrivialTypeSourceInfo(T);
	}

	// GetTypeForDeclarator always produces a function type for a block
	// literal signature. Furthermore, it is always a FunctionProtoType
	// unless the function was written with a typedef.
	assert(T->isFunctionType() &&
	"GetTypeForDeclarator made a non-function block signature");

	// Look for an explicit signature in that function type.
	FunctionProtoTypeLoc ExplicitSignature;

	if ((ExplicitSignature = Sig->getTypeLoc()
	.getAsAdjusted<FunctionProtoTypeLoc>())) {

	// Check whether that explicit signature was synthesized by
	// GetTypeForDeclarator. If so, don't save that as part of the
	// written signature.
	if (ExplicitSignature.getLocalRangeBegin() ==
	ExplicitSignature.getLocalRangeEnd()) {
	// This would be much cheaper if we stored TypeLocs instead of
	// TypeSourceInfos.
	TypeLoc Result = ExplicitSignature.getReturnLoc();
	unsigned Size = Result.getFullDataSize();
	Sig = Context.CreateTypeSourceInfo(Result.getType(), Size);
	Sig->getTypeLoc().initializeFullCopy(Result, Size);

	ExplicitSignature = FunctionProtoTypeLoc();
	}
	}

	CurBlock->TheDecl->setSignatureAsWritten(Sig);
	CurBlock->FunctionType = T;

	const auto *Fn = T->castAs<FunctionType>();
	QualType RetTy = Fn->getReturnType();
	bool isVariadic =
	(isa<FunctionProtoType>(Fn) && cast<FunctionProtoType>(Fn)->isVariadic());

	CurBlock->TheDecl->setIsVariadic(isVariadic);

	// Context.DependentTy is used as a placeholder for a missing block
	// return type. TODO: what should we do with declarators like:
	// ^ * { ... }
	// If the answer is "apply template argument deduction"....
	if (RetTy != Context.DependentTy) {
	CurBlock->ReturnType = RetTy;
	CurBlock->TheDecl->setBlockMissingReturnType(false);
	CurBlock->HasImplicitReturnType = false;
	}

	// Push block parameters from the declarator if we had them.
	SmallVector<ParmVarDecl*, 8> Params;
	if (ExplicitSignature) {
	for (unsigned I = 0, E = ExplicitSignature.getNumParams(); I != E; ++I) {
	ParmVarDecl *Param = ExplicitSignature.getParam(I);
	if (Param->getIdentifier() == nullptr && !Param->isImplicit() &&
	!Param->isInvalidDecl() && !getLangOpts().CPlusPlus) {
	// Diagnose this as an extension in C17 and earlier.
	if (!getLangOpts().C23)
	Diag(Param->getLocation(), diag::ext_parameter_name_omitted_c23);
	}
	Params.push_back(Param);
	}

	// Fake up parameter variables if we have a typedef, like
	// ^ fntype { ... }
	} else if (const FunctionProtoType *Fn = T->getAs<FunctionProtoType>()) {
	for (const auto &I : Fn->param_types()) {
	ParmVarDecl *Param = BuildParmVarDeclForTypedef(
	CurBlock->TheDecl, ParamInfo.getBeginLoc(), I);
	Params.push_back(Param);
	}
	}

	// Set the parameters on the block decl.
	if (!Params.empty()) {
	CurBlock->TheDecl->setParams(Params);
	CheckParmsForFunctionDef(CurBlock->TheDecl->parameters(),
	/CheckParameterNames=/false);
	}

	// Finally we can process decl attributes.
	ProcessDeclAttributes(CurScope, CurBlock->TheDecl, ParamInfo);

	// Put the parameter variables in scope.
	for (auto *AI : CurBlock->TheDecl->parameters()) {
	AI->setOwningFunction(CurBlock->TheDecl);

	// If this has an identifier, add it to the scope stack.
	if (AI->getIdentifier()) {
	CheckShadow(CurBlock->TheScope, AI);

	PushOnScopeChains(AI, CurBlock->TheScope);
	}

	if (AI->isInvalidDecl())
	CurBlock->TheDecl->setInvalidDecl();
	}
	}

	void Sema::ActOnBlockError(SourceLocation CaretLoc, Scope *CurScope) {
	// Leave the expression-evaluation context.
	DiscardCleanupsInEvaluationContext();
	PopExpressionEvaluationContext();

	// Pop off CurBlock, handle nested blocks.
	PopDeclContext();
	PopFunctionScopeInfo();
	}

	ExprResult Sema::ActOnBlockStmtExpr(SourceLocation CaretLoc,
	Stmt Body, Scope CurScope) {
	// If blocks are disabled, emit an error.
	if (!LangOpts.Blocks)
	Diag(CaretLoc, diag::err_blocks_disable) << LangOpts.OpenCL;

	// Leave the expression-evaluation context.
	if (hasAnyUnrecoverableErrorsInThisFunction())
	DiscardCleanupsInEvaluationContext();
	assert(!Cleanup.exprNeedsCleanups() &&
	"cleanups within block not correctly bound!");
	PopExpressionEvaluationContext();

	BlockScopeInfo *BSI = cast<BlockScopeInfo>(FunctionScopes.back());
	BlockDecl *BD = BSI->TheDecl;

	if (BSI->HasImplicitReturnType)
	deduceClosureReturnType(*BSI);

	QualType RetTy = Context.VoidTy;
	if (!BSI->ReturnType.isNull())
	RetTy = BSI->ReturnType;

	bool NoReturn = BD->hasAttr<NoReturnAttr>();
	QualType BlockTy;

	// If the user wrote a function type in some form, try to use that.
	if (!BSI->FunctionType.isNull()) {
	const FunctionType *FTy = BSI->FunctionType->castAs<FunctionType>();

	FunctionType::ExtInfo Ext = FTy->getExtInfo();
	if (NoReturn && !Ext.getNoReturn()) Ext = Ext.withNoReturn(true);

	// Turn protoless block types into nullary block types.
	if (isa<FunctionNoProtoType>(FTy)) {
	FunctionProtoType::ExtProtoInfo EPI;
	EPI.ExtInfo = Ext;
	BlockTy = Context.getFunctionType(RetTy, std::nullopt, EPI);

	// Otherwise, if we don't need to change anything about the function type,
	// preserve its sugar structure.
	} else if (FTy->getReturnType() == RetTy &&
	(!NoReturn \|\| FTy->getNoReturnAttr())) {
	BlockTy = BSI->FunctionType;

	// Otherwise, make the minimal modifications to the function type.
	} else {
	const FunctionProtoType *FPT = cast<FunctionProtoType>(FTy);
	FunctionProtoType::ExtProtoInfo EPI = FPT->getExtProtoInfo();
	EPI.TypeQuals = Qualifiers();
	EPI.ExtInfo = Ext;
	BlockTy = Context.getFunctionType(RetTy, FPT->getParamTypes(), EPI);
	}

	// If we don't have a function type, just build one from nothing.
	} else {
	FunctionProtoType::ExtProtoInfo EPI;
	EPI.ExtInfo = FunctionType::ExtInfo().withNoReturn(NoReturn);
	BlockTy = Context.getFunctionType(RetTy, std::nullopt, EPI);
	}

	DiagnoseUnusedParameters(BD->parameters());
	BlockTy = Context.getBlockPointerType(BlockTy);

	// If needed, diagnose invalid gotos and switches in the block.
	if (getCurFunction()->NeedsScopeChecking() &&
	!PP.isCodeCompletionEnabled())
	DiagnoseInvalidJumps(cast<CompoundStmt>(Body));

	BD->setBody(cast<CompoundStmt>(Body));

	if (Body && getCurFunction()->HasPotentialAvailabilityViolations)
	DiagnoseUnguardedAvailabilityViolations(BD);

	// Try to apply the named return value optimization. We have to check again
	// if we can do this, though, because blocks keep return statements around
	// to deduce an implicit return type.
	if (getLangOpts().CPlusPlus && RetTy->isRecordType() &&
	!BD->isDependentContext())
	computeNRVO(Body, BSI);

	if (RetTy.hasNonTrivialToPrimitiveDestructCUnion() \|\|
	RetTy.hasNonTrivialToPrimitiveCopyCUnion())
	checkNonTrivialCUnion(RetTy, BD->getCaretLocation(), NTCUC_FunctionReturn,
	NTCUK_Destruct\|NTCUK_Copy);

	PopDeclContext();

	// Set the captured variables on the block.
	SmallVector<BlockDecl::Capture, 4> Captures;
	for (Capture &Cap : BSI->Captures) {
	if (Cap.isInvalid() \|\| Cap.isThisCapture())
	continue;
	// Cap.getVariable() is always a VarDecl because
	// blocks cannot capture structured bindings or other ValueDecl kinds.
	auto *Var = cast<VarDecl>(Cap.getVariable());
	Expr *CopyExpr = nullptr;
	if (getLangOpts().CPlusPlus && Cap.isCopyCapture()) {
	if (const RecordType *Record =
	Cap.getCaptureType()->getAs<RecordType>()) {
	// The capture logic needs the destructor, so make sure we mark it.
	// Usually this is unnecessary because most local variables have
	// their destructors marked at declaration time, but parameters are
	// an exception because it's technically only the call site that
	// actually requires the destructor.
	if (isa<ParmVarDecl>(Var))
	FinalizeVarWithDestructor(Var, Record);

	// Enter a separate potentially-evaluated context while building block
	// initializers to isolate their cleanups from those of the block
	// itself.
	// FIXME: Is this appropriate even when the block itself occurs in an
	// unevaluated operand?
	EnterExpressionEvaluationContext EvalContext(
	*this, ExpressionEvaluationContext::PotentiallyEvaluated);

	SourceLocation Loc = Cap.getLocation();

	ExprResult Result = BuildDeclarationNameExpr(
	CXXScopeSpec(), DeclarationNameInfo(Var->getDeclName(), Loc), Var);

	// According to the blocks spec, the capture of a variable from
	// the stack requires a const copy constructor. This is not true
	// of the copy/move done to move a __block variable to the heap.
	if (!Result.isInvalid() &&
	!Result.get()->getType().isConstQualified()) {
	Result = ImpCastExprToType(Result.get(),
	Result.get()->getType().withConst(),
	CK_NoOp, VK_LValue);
	}

	if (!Result.isInvalid()) {
	Result = PerformCopyInitialization(
	InitializedEntity::InitializeBlock(Var->getLocation(),
	Cap.getCaptureType()),
	Loc, Result.get());
	}

	// Build a full-expression copy expression if initialization
	// succeeded and used a non-trivial constructor. Recover from
	// errors by pretending that the copy isn't necessary.
	if (!Result.isInvalid() &&
	!cast<CXXConstructExpr>(Result.get())->getConstructor()
	->isTrivial()) {
	Result = MaybeCreateExprWithCleanups(Result);
	CopyExpr = Result.get();
	}
	}
	}

	BlockDecl::Capture NewCap(Var, Cap.isBlockCapture(), Cap.isNested(),
	CopyExpr);
	Captures.push_back(NewCap);
	}
	BD->setCaptures(Context, Captures, BSI->CXXThisCaptureIndex != 0);

	// Pop the block scope now but keep it alive to the end of this function.
	AnalysisBasedWarnings::Policy WP = AnalysisWarnings.getDefaultPolicy();
	PoppedFunctionScopePtr ScopeRAII = PopFunctionScopeInfo(&WP, BD, BlockTy);

	BlockExpr *Result = new (Context) BlockExpr(BD, BlockTy);

	// If the block isn't obviously global, i.e. it captures anything at
	// all, then we need to do a few things in the surrounding context:
	if (Result->getBlockDecl()->hasCaptures()) {
	// First, this expression has a new cleanup object.
	ExprCleanupObjects.push_back(Result->getBlockDecl());
	Cleanup.setExprNeedsCleanups(true);

	// It also gets a branch-protected scope if any of the captured
	// variables needs destruction.
	for (const auto &CI : Result->getBlockDecl()->captures()) {
	const VarDecl *var = CI.getVariable();
	if (var->getType().isDestructedType() != QualType::DK_none) {
	setFunctionHasBranchProtectedScope();
	break;
	}
	}
	}

	if (getCurFunction())
	getCurFunction()->addBlock(BD);

	if (BD->isInvalidDecl())
	return CreateRecoveryExpr(Result->getBeginLoc(), Result->getEndLoc(),
	{Result}, Result->getType());
	return Result;
	}

	ExprResult Sema::ActOnVAArg(SourceLocation BuiltinLoc, Expr *E, ParsedType Ty,
	SourceLocation RPLoc) {
	TypeSourceInfo *TInfo;
	GetTypeFromParser(Ty, &TInfo);
	return BuildVAArgExpr(BuiltinLoc, E, TInfo, RPLoc);
	}

	ExprResult Sema::BuildVAArgExpr(SourceLocation BuiltinLoc,
	Expr E, TypeSourceInfo TInfo,
	SourceLocation RPLoc) {
	Expr *OrigExpr = E;
	bool IsMS = false;

	// CUDA device code does not support varargs.
	if (getLangOpts().CUDA && getLangOpts().CUDAIsDevice) {
	if (const FunctionDecl *F = dyn_cast<FunctionDecl>(CurContext)) {
	CUDAFunctionTarget T = CUDA().IdentifyTarget(F);
	if (T == CUDAFunctionTarget::Global \|\| T == CUDAFunctionTarget::Device \|\|
	T == CUDAFunctionTarget::HostDevice)
	return ExprError(Diag(E->getBeginLoc(), diag::err_va_arg_in_device));
	}
	}

	// NVPTX does not support va_arg expression.
	if (getLangOpts().OpenMP && getLangOpts().OpenMPIsTargetDevice &&
	Context.getTargetInfo().getTriple().isNVPTX())
	targetDiag(E->getBeginLoc(), diag::err_va_arg_in_device);

	// It might be a __builtin_ms_va_list. (But don't ever mark a va_arg()
	// as Microsoft ABI on an actual Microsoft platform, where
	// __builtin_ms_va_list and __builtin_va_list are the same.)
	if (!E->isTypeDependent() && Context.getTargetInfo().hasBuiltinMSVaList() &&
	Context.getTargetInfo().getBuiltinVaListKind() != TargetInfo::CharPtrBuiltinVaList) {
	QualType MSVaListType = Context.getBuiltinMSVaListType();
	if (Context.hasSameType(MSVaListType, E->getType())) {
	if (CheckForModifiableLvalue(E, BuiltinLoc, *this))
	return ExprError();
	IsMS = true;
	}
	}

	// Get the va_list type
	QualType VaListType = Context.getBuiltinVaListType();
	if (!IsMS) {
	if (VaListType->isArrayType()) {
	// Deal with implicit array decay; for example, on x86-64,
	// va_list is an array, but it's supposed to decay to
	// a pointer for va_arg.
	VaListType = Context.getArrayDecayedType(VaListType);
	// Make sure the input expression also decays appropriately.
	ExprResult Result = UsualUnaryConversions(E);
	if (Result.isInvalid())
	return ExprError();
	E = Result.get();
	} else if (VaListType->isRecordType() && getLangOpts().CPlusPlus) {
	// If va_list is a record type and we are compiling in C++ mode,
	// check the argument using reference binding.
	InitializedEntity Entity = InitializedEntity::InitializeParameter(
	Context, Context.getLValueReferenceType(VaListType), false);
	ExprResult Init = PerformCopyInitialization(Entity, SourceLocation(), E);
	if (Init.isInvalid())
	return ExprError();
	E = Init.getAs<Expr>();
	} else {
	// Otherwise, the va_list argument must be an l-value because
	// it is modified by va_arg.
	if (!E->isTypeDependent() &&
	CheckForModifiableLvalue(E, BuiltinLoc, *this))
	return ExprError();
	}
	}

	if (!IsMS && !E->isTypeDependent() &&
	!Context.hasSameType(VaListType, E->getType()))
	return ExprError(
	Diag(E->getBeginLoc(),
	diag::err_first_argument_to_va_arg_not_of_type_va_list)
	<< OrigExpr->getType() << E->getSourceRange());

	if (!TInfo->getType()->isDependentType()) {
	if (RequireCompleteType(TInfo->getTypeLoc().getBeginLoc(), TInfo->getType(),
	diag::err_second_parameter_to_va_arg_incomplete,
	TInfo->getTypeLoc()))
	return ExprError();

	if (RequireNonAbstractType(TInfo->getTypeLoc().getBeginLoc(),
	TInfo->getType(),
	diag::err_second_parameter_to_va_arg_abstract,
	TInfo->getTypeLoc()))
	return ExprError();

	if (!TInfo->getType().isPODType(Context)) {
	Diag(TInfo->getTypeLoc().getBeginLoc(),
	TInfo->getType()->isObjCLifetimeType()
	? diag::warn_second_parameter_to_va_arg_ownership_qualified
	: diag::warn_second_parameter_to_va_arg_not_pod)
	<< TInfo->getType()
	<< TInfo->getTypeLoc().getSourceRange();
	}

	// Check for va_arg where arguments of the given type will be promoted
	// (i.e. this va_arg is guaranteed to have undefined behavior).
	QualType PromoteType;
	if (Context.isPromotableIntegerType(TInfo->getType())) {
	PromoteType = Context.getPromotedIntegerType(TInfo->getType());
	// [cstdarg.syn]p1 defers the C++ behavior to what the C standard says,
	// and C23 7.16.1.1p2 says, in part:
	// If type is not compatible with the type of the actual next argument
	// (as promoted according to the default argument promotions), the
	// behavior is undefined, except for the following cases:
	// - both types are pointers to qualified or unqualified versions of
	// compatible types;
	// - one type is compatible with a signed integer type, the other
	// type is compatible with the corresponding unsigned integer type,
	// and the value is representable in both types;
	// - one type is pointer to qualified or unqualified void and the
	// other is a pointer to a qualified or unqualified character type;
	// - or, the type of the next argument is nullptr_t and type is a
	// pointer type that has the same representation and alignment
	// requirements as a pointer to a character type.
	// Given that type compatibility is the primary requirement (ignoring
	// qualifications), you would think we could call typesAreCompatible()
	// directly to test this. However, in C++, that checks for same type,
	// which causes false positives when passing an enumeration type to
	// va_arg. Instead, get the underlying type of the enumeration and pass
	// that.
	QualType UnderlyingType = TInfo->getType();
	if (const auto *ET = UnderlyingType->getAs<EnumType>())
	UnderlyingType = ET->getDecl()->getIntegerType();
	if (Context.typesAreCompatible(PromoteType, UnderlyingType,
	/CompareUnqualified/ true))
	PromoteType = QualType();

	// If the types are still not compatible, we need to test whether the
	// promoted type and the underlying type are the same except for
	// signedness. Ask the AST for the correctly corresponding type and see
	// if that's compatible.
	if (!PromoteType.isNull() && !UnderlyingType->isBooleanType() &&
	PromoteType->isUnsignedIntegerType() !=
	UnderlyingType->isUnsignedIntegerType()) {
	UnderlyingType =
	UnderlyingType->isUnsignedIntegerType()
	? Context.getCorrespondingSignedType(UnderlyingType)
	: Context.getCorrespondingUnsignedType(UnderlyingType);
	if (Context.typesAreCompatible(PromoteType, UnderlyingType,
	/CompareUnqualified/ true))
	PromoteType = QualType();
	}
	}
	if (TInfo->getType()->isSpecificBuiltinType(BuiltinType::Float))
	PromoteType = Context.DoubleTy;
	if (!PromoteType.isNull())
	DiagRuntimeBehavior(TInfo->getTypeLoc().getBeginLoc(), E,
	PDiag(diag::warn_second_parameter_to_va_arg_never_compatible)
	<< TInfo->getType()
	<< PromoteType
	<< TInfo->getTypeLoc().getSourceRange());
	}

	QualType T = TInfo->getType().getNonLValueExprType(Context);
	return new (Context) VAArgExpr(BuiltinLoc, E, TInfo, RPLoc, T, IsMS);
	}

	ExprResult Sema::ActOnGNUNullExpr(SourceLocation TokenLoc) {
	// The type of __null will be int or long, depending on the size of
	// pointers on the target.
	QualType Ty;
	unsigned pw = Context.getTargetInfo().getPointerWidth(LangAS::Default);
	if (pw == Context.getTargetInfo().getIntWidth())
	Ty = Context.IntTy;
	else if (pw == Context.getTargetInfo().getLongWidth())
	Ty = Context.LongTy;
	else if (pw == Context.getTargetInfo().getLongLongWidth())
	Ty = Context.LongLongTy;
	else {
	llvm_unreachable("I don't know size of pointer!");
	}

	return new (Context) GNUNullExpr(Ty, TokenLoc);
	}

	static CXXRecordDecl *LookupStdSourceLocationImpl(Sema &S, SourceLocation Loc) {
	CXXRecordDecl *ImplDecl = nullptr;

	// Fetch the std::source_location::__impl decl.
	if (NamespaceDecl *Std = S.getStdNamespace()) {
	LookupResult ResultSL(S, &S.PP.getIdentifierTable().get("source_location"),
	Loc, Sema::LookupOrdinaryName);
	if (S.LookupQualifiedName(ResultSL, Std)) {
	if (auto *SLDecl = ResultSL.getAsSingle<RecordDecl>()) {
	LookupResult ResultImpl(S, &S.PP.getIdentifierTable().get("__impl"),
	Loc, Sema::LookupOrdinaryName);
	if ((SLDecl->isCompleteDefinition() \|\| SLDecl->isBeingDefined()) &&
	S.LookupQualifiedName(ResultImpl, SLDecl)) {
	ImplDecl = ResultImpl.getAsSingle<CXXRecordDecl>();
	}
	}
	}
	}

	if (!ImplDecl \|\| !ImplDecl->isCompleteDefinition()) {
	S.Diag(Loc, diag::err_std_source_location_impl_not_found);
	return nullptr;
	}

	// Verify that __impl is a trivial struct type, with no base classes, and with
	// only the four expected fields.
	if (ImplDecl->isUnion() \|\| !ImplDecl->isStandardLayout() \|\|
	ImplDecl->getNumBases() != 0) {
	S.Diag(Loc, diag::err_std_source_location_impl_malformed);
	return nullptr;
	}

	unsigned Count = 0;
	for (FieldDecl *F : ImplDecl->fields()) {
	StringRef Name = F->getName();

	if (Name == "_M_file_name") {
	if (F->getType() !=
	S.Context.getPointerType(S.Context.CharTy.withConst()))
	break;
	Count++;
	} else if (Name == "_M_function_name") {
	if (F->getType() !=
	S.Context.getPointerType(S.Context.CharTy.withConst()))
	break;
	Count++;
	} else if (Name == "_M_line") {
	if (!F->getType()->isIntegerType())
	break;
	Count++;
	} else if (Name == "_M_column") {
	if (!F->getType()->isIntegerType())
	break;
	Count++;
	} else {
	Count = 100; // invalid
	break;
	}
	}
	if (Count != 4) {
	S.Diag(Loc, diag::err_std_source_location_impl_malformed);
	return nullptr;
	}

	return ImplDecl;
	}

	ExprResult Sema::ActOnSourceLocExpr(SourceLocIdentKind Kind,
	SourceLocation BuiltinLoc,
	SourceLocation RPLoc) {
	QualType ResultTy;
	switch (Kind) {
	case SourceLocIdentKind::File:
	case SourceLocIdentKind::FileName:
	case SourceLocIdentKind::Function:
	case SourceLocIdentKind::FuncSig: {
	QualType ArrTy = Context.getStringLiteralArrayType(Context.CharTy, 0);
	ResultTy =
	Context.getPointerType(ArrTy->getAsArrayTypeUnsafe()->getElementType());
	break;
	}
	case SourceLocIdentKind::Line:
	case SourceLocIdentKind::Column:
	ResultTy = Context.UnsignedIntTy;
	break;
	case SourceLocIdentKind::SourceLocStruct:
	if (!StdSourceLocationImplDecl) {
	StdSourceLocationImplDecl =
	LookupStdSourceLocationImpl(*this, BuiltinLoc);
	if (!StdSourceLocationImplDecl)
	return ExprError();
	}
	ResultTy = Context.getPointerType(
	Context.getRecordType(StdSourceLocationImplDecl).withConst());
	break;
	}

	return BuildSourceLocExpr(Kind, ResultTy, BuiltinLoc, RPLoc, CurContext);
	}

	ExprResult Sema::BuildSourceLocExpr(SourceLocIdentKind Kind, QualType ResultTy,
	SourceLocation BuiltinLoc,
	SourceLocation RPLoc,
	DeclContext *ParentContext) {
	return new (Context)
	SourceLocExpr(Context, Kind, ResultTy, BuiltinLoc, RPLoc, ParentContext);
	}

	ExprResult Sema::ActOnEmbedExpr(SourceLocation EmbedKeywordLoc,
	StringLiteral *BinaryData) {
	EmbedDataStorage *Data = new (Context) EmbedDataStorage;
	Data->BinaryData = BinaryData;
	return new (Context)
	EmbedExpr(Context, EmbedKeywordLoc, Data, /NumOfElements=/0,
	Data->getDataElementCount());
	}

	static bool maybeDiagnoseAssignmentToFunction(Sema &S, QualType DstType,
	const Expr *SrcExpr) {
	if (!DstType->isFunctionPointerType() \|\|
	!SrcExpr->getType()->isFunctionType())
	return false;

	auto *DRE = dyn_cast<DeclRefExpr>(SrcExpr->IgnoreParenImpCasts());
	if (!DRE)
	return false;

	auto *FD = dyn_cast<FunctionDecl>(DRE->getDecl());
	if (!FD)
	return false;

	return !S.checkAddressOfFunctionIsAvailable(FD,
	/Complain=/true,
	SrcExpr->getBeginLoc());
	}

	bool Sema::DiagnoseAssignmentResult(AssignConvertType ConvTy,
	SourceLocation Loc,
	QualType DstType, QualType SrcType,
	Expr *SrcExpr, AssignmentAction Action,
	bool *Complained) {
	if (Complained)
	*Complained = false;

	// Decode the result (notice that AST's are still created for extensions).
	bool CheckInferredResultType = false;
	bool isInvalid = false;
	unsigned DiagKind = 0;
	ConversionFixItGenerator ConvHints;
	bool MayHaveConvFixit = false;
	bool MayHaveFunctionDiff = false;
	const ObjCInterfaceDecl *IFace = nullptr;
	const ObjCProtocolDecl *PDecl = nullptr;

	switch (ConvTy) {
	case Compatible:
	DiagnoseAssignmentEnum(DstType, SrcType, SrcExpr);
	return false;

	case PointerToInt:
	if (getLangOpts().CPlusPlus) {
	DiagKind = diag::err_typecheck_convert_pointer_int;
	isInvalid = true;
	} else {
	DiagKind = diag::ext_typecheck_convert_pointer_int;
	}
	ConvHints.tryToFixConversion(SrcExpr, SrcType, DstType, *this);
	MayHaveConvFixit = true;
	break;
	case IntToPointer:
	if (getLangOpts().CPlusPlus) {
	DiagKind = diag::err_typecheck_convert_int_pointer;
	isInvalid = true;
	} else {
	DiagKind = diag::ext_typecheck_convert_int_pointer;
	}
	ConvHints.tryToFixConversion(SrcExpr, SrcType, DstType, *this);
	MayHaveConvFixit = true;
	break;
	case IncompatibleFunctionPointerStrict:
	DiagKind =
	diag::warn_typecheck_convert_incompatible_function_pointer_strict;
	ConvHints.tryToFixConversion(SrcExpr, SrcType, DstType, *this);
	MayHaveConvFixit = true;
	break;
	case IncompatibleFunctionPointer:
	if (getLangOpts().CPlusPlus) {
	DiagKind = diag::err_typecheck_convert_incompatible_function_pointer;
	isInvalid = true;
	} else {
	DiagKind = diag::ext_typecheck_convert_incompatible_function_pointer;
	}
	ConvHints.tryToFixConversion(SrcExpr, SrcType, DstType, *this);
	MayHaveConvFixit = true;
	break;
	case IncompatiblePointer:
	if (Action == AA_Passing_CFAudited) {
	DiagKind = diag::err_arc_typecheck_convert_incompatible_pointer;
	} else if (getLangOpts().CPlusPlus) {
	DiagKind = diag::err_typecheck_convert_incompatible_pointer;
	isInvalid = true;
	} else {
	DiagKind = diag::ext_typecheck_convert_incompatible_pointer;
	}
	CheckInferredResultType = DstType->isObjCObjectPointerType() &&
	SrcType->isObjCObjectPointerType();
	if (CheckInferredResultType) {
	SrcType = SrcType.getUnqualifiedType();
	DstType = DstType.getUnqualifiedType();
	} else {
	ConvHints.tryToFixConversion(SrcExpr, SrcType, DstType, *this);
	}
	MayHaveConvFixit = true;
	break;
	case IncompatiblePointerSign:
	if (getLangOpts().CPlusPlus) {
	DiagKind = diag::err_typecheck_convert_incompatible_pointer_sign;
	isInvalid = true;
	} else {
	DiagKind = diag::ext_typecheck_convert_incompatible_pointer_sign;
	}
	break;
	case FunctionVoidPointer:
	if (getLangOpts().CPlusPlus) {
	DiagKind = diag::err_typecheck_convert_pointer_void_func;
	isInvalid = true;
	} else {
	DiagKind = diag::ext_typecheck_convert_pointer_void_func;
	}
	break;
	case IncompatiblePointerDiscardsQualifiers: {
	// Perform array-to-pointer decay if necessary.
	if (SrcType->isArrayType()) SrcType = Context.getArrayDecayedType(SrcType);

	isInvalid = true;

	Qualifiers lhq = SrcType->getPointeeType().getQualifiers();
	Qualifiers rhq = DstType->getPointeeType().getQualifiers();
	if (lhq.getAddressSpace() != rhq.getAddressSpace()) {
	DiagKind = diag::err_typecheck_incompatible_address_space;
	break;
	} else if (lhq.getObjCLifetime() != rhq.getObjCLifetime()) {
	DiagKind = diag::err_typecheck_incompatible_ownership;
	break;
	}

	llvm_unreachable("unknown error case for discarding qualifiers!");
	// fallthrough
	}
	case CompatiblePointerDiscardsQualifiers:
	// If the qualifiers lost were because we were applying the
	// (deprecated) C++ conversion from a string literal to a char*
	// (or wchar_t*), then there was no error (C++ 4.2p2). FIXME:
	// Ideally, this check would be performed in
	// checkPointerTypesForAssignment. However, that would require a
	// bit of refactoring (so that the second argument is an
	// expression, rather than a type), which should be done as part
	// of a larger effort to fix checkPointerTypesForAssignment for
	// C++ semantics.
	if (getLangOpts().CPlusPlus &&
	IsStringLiteralToNonConstPointerConversion(SrcExpr, DstType))
	return false;
	if (getLangOpts().CPlusPlus) {
	DiagKind = diag::err_typecheck_convert_discards_qualifiers;
	isInvalid = true;
	} else {
	DiagKind = diag::ext_typecheck_convert_discards_qualifiers;
	}

	break;
	case IncompatibleNestedPointerQualifiers:
	if (getLangOpts().CPlusPlus) {
	isInvalid = true;
	DiagKind = diag::err_nested_pointer_qualifier_mismatch;
	} else {
	DiagKind = diag::ext_nested_pointer_qualifier_mismatch;
	}
	break;
	case IncompatibleNestedPointerAddressSpaceMismatch:
	DiagKind = diag::err_typecheck_incompatible_nested_address_space;
	isInvalid = true;
	break;
	case IntToBlockPointer:
	DiagKind = diag::err_int_to_block_pointer;
	isInvalid = true;
	break;
	case IncompatibleBlockPointer:
	DiagKind = diag::err_typecheck_convert_incompatible_block_pointer;
	isInvalid = true;
	break;
	case IncompatibleObjCQualifiedId: {
	if (SrcType->isObjCQualifiedIdType()) {
	const ObjCObjectPointerType *srcOPT =
	SrcType->castAs<ObjCObjectPointerType>();
	for (auto *srcProto : srcOPT->quals()) {
	PDecl = srcProto;
	break;
	}
	if (const ObjCInterfaceType *IFaceT =
	DstType->castAs<ObjCObjectPointerType>()->getInterfaceType())
	IFace = IFaceT->getDecl();
	}
	else if (DstType->isObjCQualifiedIdType()) {
	const ObjCObjectPointerType *dstOPT =
	DstType->castAs<ObjCObjectPointerType>();
	for (auto *dstProto : dstOPT->quals()) {
	PDecl = dstProto;
	break;
	}
	if (const ObjCInterfaceType *IFaceT =
	SrcType->castAs<ObjCObjectPointerType>()->getInterfaceType())
	IFace = IFaceT->getDecl();
	}
	if (getLangOpts().CPlusPlus) {
	DiagKind = diag::err_incompatible_qualified_id;
	isInvalid = true;
	} else {
	DiagKind = diag::warn_incompatible_qualified_id;
	}
	break;
	}
	case IncompatibleVectors:
	if (getLangOpts().CPlusPlus) {
	DiagKind = diag::err_incompatible_vectors;
	isInvalid = true;
	} else {
	DiagKind = diag::warn_incompatible_vectors;
	}
	break;
	case IncompatibleObjCWeakRef:
	DiagKind = diag::err_arc_weak_unavailable_assign;
	isInvalid = true;
	break;
	case Incompatible:
	if (maybeDiagnoseAssignmentToFunction(*this, DstType, SrcExpr)) {
	if (Complained)
	*Complained = true;
	return true;
	}

	DiagKind = diag::err_typecheck_convert_incompatible;
	ConvHints.tryToFixConversion(SrcExpr, SrcType, DstType, *this);
	MayHaveConvFixit = true;
	isInvalid = true;
	MayHaveFunctionDiff = true;
	break;
	}

	QualType FirstType, SecondType;
	switch (Action) {
	case AA_Assigning:
	case AA_Initializing:
	// The destination type comes first.
	FirstType = DstType;
	SecondType = SrcType;
	break;

	case AA_Returning:
	case AA_Passing:
	case AA_Passing_CFAudited:
	case AA_Converting:
	case AA_Sending:
	case AA_Casting:
	// The source type comes first.
	FirstType = SrcType;
	SecondType = DstType;
	break;
	}

	PartialDiagnostic FDiag = PDiag(DiagKind);
	AssignmentAction ActionForDiag = Action;
	if (Action == AA_Passing_CFAudited)
	ActionForDiag = AA_Passing;

	FDiag << FirstType << SecondType << ActionForDiag
	<< SrcExpr->getSourceRange();

	if (DiagKind == diag::ext_typecheck_convert_incompatible_pointer_sign \|\|
	DiagKind == diag::err_typecheck_convert_incompatible_pointer_sign) {
	auto isPlainChar = [](const clang::Type *Type) {
	return Type->isSpecificBuiltinType(BuiltinType::Char_S) \|\|
	Type->isSpecificBuiltinType(BuiltinType::Char_U);
	};
	FDiag << (isPlainChar(FirstType->getPointeeOrArrayElementType()) \|\|
	isPlainChar(SecondType->getPointeeOrArrayElementType()));
	}

	// If we can fix the conversion, suggest the FixIts.
	if (!ConvHints.isNull()) {
	for (FixItHint &H : ConvHints.Hints)
	FDiag << H;
	}

	if (MayHaveConvFixit) { FDiag << (unsigned) (ConvHints.Kind); }

	if (MayHaveFunctionDiff)
	HandleFunctionTypeMismatch(FDiag, SecondType, FirstType);

	Diag(Loc, FDiag);
	if ((DiagKind == diag::warn_incompatible_qualified_id \|\|
	DiagKind == diag::err_incompatible_qualified_id) &&
	PDecl && IFace && !IFace->hasDefinition())
	Diag(IFace->getLocation(), diag::note_incomplete_class_and_qualified_id)
	<< IFace << PDecl;

	if (SecondType == Context.OverloadTy)
	NoteAllOverloadCandidates(OverloadExpr::find(SrcExpr).Expression,
	FirstType, /TakingAddress=/true);

	if (CheckInferredResultType)
	ObjC().EmitRelatedResultTypeNote(SrcExpr);

	if (Action == AA_Returning && ConvTy == IncompatiblePointer)
	ObjC().EmitRelatedResultTypeNoteForReturn(DstType);

	if (Complained)
	*Complained = true;
	return isInvalid;
	}

	ExprResult Sema::VerifyIntegerConstantExpression(Expr *E,
	llvm::APSInt *Result,
	AllowFoldKind CanFold) {
	class SimpleICEDiagnoser : public VerifyICEDiagnoser {
	public:
	SemaDiagnosticBuilder diagnoseNotICEType(Sema &S, SourceLocation Loc,
	QualType T) override {
	return S.Diag(Loc, diag::err_ice_not_integral)
	<< T << S.LangOpts.CPlusPlus;
	}
	SemaDiagnosticBuilder diagnoseNotICE(Sema &S, SourceLocation Loc) override {
	return S.Diag(Loc, diag::err_expr_not_ice) << S.LangOpts.CPlusPlus;
	}
	} Diagnoser;

	return VerifyIntegerConstantExpression(E, Result, Diagnoser, CanFold);
	}

	ExprResult Sema::VerifyIntegerConstantExpression(Expr *E,
	llvm::APSInt *Result,
	unsigned DiagID,
	AllowFoldKind CanFold) {
	class IDDiagnoser : public VerifyICEDiagnoser {
	unsigned DiagID;

	public:
	IDDiagnoser(unsigned DiagID)
	: VerifyICEDiagnoser(DiagID == 0), DiagID(DiagID) { }

	SemaDiagnosticBuilder diagnoseNotICE(Sema &S, SourceLocation Loc) override {
	return S.Diag(Loc, DiagID);
	}
	} Diagnoser(DiagID);

	return VerifyIntegerConstantExpression(E, Result, Diagnoser, CanFold);
	}

	Sema::SemaDiagnosticBuilder
	Sema::VerifyICEDiagnoser::diagnoseNotICEType(Sema &S, SourceLocation Loc,
	QualType T) {
	return diagnoseNotICE(S, Loc);
	}

	Sema::SemaDiagnosticBuilder
	Sema::VerifyICEDiagnoser::diagnoseFold(Sema &S, SourceLocation Loc) {
	return S.Diag(Loc, diag::ext_expr_not_ice) << S.LangOpts.CPlusPlus;
	}

	ExprResult
	Sema::VerifyIntegerConstantExpression(Expr E, llvm::APSInt Result,
	VerifyICEDiagnoser &Diagnoser,
	AllowFoldKind CanFold) {
	SourceLocation DiagLoc = E->getBeginLoc();

	if (getLangOpts().CPlusPlus11) {
	// C++11 [expr.const]p5:
	// If an expression of literal class type is used in a context where an
	// integral constant expression is required, then that class type shall
	// have a single non-explicit conversion function to an integral or
	// unscoped enumeration type
	ExprResult Converted;
	class CXX11ConvertDiagnoser : public ICEConvertDiagnoser {
	VerifyICEDiagnoser &BaseDiagnoser;
	public:
	CXX11ConvertDiagnoser(VerifyICEDiagnoser &BaseDiagnoser)
	: ICEConvertDiagnoser(/AllowScopedEnumerations/ false,
	BaseDiagnoser.Suppress, true),
	BaseDiagnoser(BaseDiagnoser) {}

	SemaDiagnosticBuilder diagnoseNotInt(Sema &S, SourceLocation Loc,
	QualType T) override {
	return BaseDiagnoser.diagnoseNotICEType(S, Loc, T);
	}

	SemaDiagnosticBuilder diagnoseIncomplete(
	Sema &S, SourceLocation Loc, QualType T) override {
	return S.Diag(Loc, diag::err_ice_incomplete_type) << T;
	}

	SemaDiagnosticBuilder diagnoseExplicitConv(
	Sema &S, SourceLocation Loc, QualType T, QualType ConvTy) override {
	return S.Diag(Loc, diag::err_ice_explicit_conversion) << T << ConvTy;
	}

	SemaDiagnosticBuilder noteExplicitConv(
	Sema &S, CXXConversionDecl *Conv, QualType ConvTy) override {
	return S.Diag(Conv->getLocation(), diag::note_ice_conversion_here)
	<< ConvTy->isEnumeralType() << ConvTy;
	}

	SemaDiagnosticBuilder diagnoseAmbiguous(
	Sema &S, SourceLocation Loc, QualType T) override {
	return S.Diag(Loc, diag::err_ice_ambiguous_conversion) << T;
	}

	SemaDiagnosticBuilder noteAmbiguous(
	Sema &S, CXXConversionDecl *Conv, QualType ConvTy) override {
	return S.Diag(Conv->getLocation(), diag::note_ice_conversion_here)
	<< ConvTy->isEnumeralType() << ConvTy;
	}

	SemaDiagnosticBuilder diagnoseConversion(
	Sema &S, SourceLocation Loc, QualType T, QualType ConvTy) override {
	llvm_unreachable("conversion functions are permitted");
	}
	} ConvertDiagnoser(Diagnoser);

	Converted = PerformContextualImplicitConversion(DiagLoc, E,
	ConvertDiagnoser);
	if (Converted.isInvalid())
	return Converted;
	E = Converted.get();
	// The 'explicit' case causes us to get a RecoveryExpr. Give up here so we
	// don't try to evaluate it later. We also don't want to return the
	// RecoveryExpr here, as it results in this call succeeding, thus callers of
	// this function will attempt to use 'Value'.
	if (isa<RecoveryExpr>(E))
	return ExprError();
	if (!E->getType()->isIntegralOrUnscopedEnumerationType())
	return ExprError();
	} else if (!E->getType()->isIntegralOrUnscopedEnumerationType()) {
	// An ICE must be of integral or unscoped enumeration type.
	if (!Diagnoser.Suppress)
	Diagnoser.diagnoseNotICEType(*this, DiagLoc, E->getType())
	<< E->getSourceRange();
	return ExprError();
	}

	ExprResult RValueExpr = DefaultLvalueConversion(E);
	if (RValueExpr.isInvalid())
	return ExprError();

	E = RValueExpr.get();

	// Circumvent ICE checking in C++11 to avoid evaluating the expression twice
	// in the non-ICE case.
	if (!getLangOpts().CPlusPlus11 && E->isIntegerConstantExpr(Context)) {
	SmallVector<PartialDiagnosticAt, 8> Notes;
	if (Result)
	*Result = E->EvaluateKnownConstIntCheckOverflow(Context, &Notes);
	if (!isa<ConstantExpr>(E))
	E = Result ? ConstantExpr::Create(Context, E, APValue(*Result))
	: ConstantExpr::Create(Context, E);

	if (Notes.empty())
	return E;

	// If our only note is the usual "invalid subexpression" note, just point
	// the caret at its location rather than producing an essentially
	// redundant note.
	if (Notes.size() == 1 && Notes[0].second.getDiagID() ==
	diag::note_invalid_subexpr_in_const_expr) {
	DiagLoc = Notes[0].first;
	Notes.clear();
	}

	if (getLangOpts().CPlusPlus) {
	if (!Diagnoser.Suppress) {
	Diagnoser.diagnoseNotICE(*this, DiagLoc) << E->getSourceRange();
	for (const PartialDiagnosticAt &Note : Notes)
	Diag(Note.first, Note.second);
	}
	return ExprError();
	}

	Diagnoser.diagnoseFold(*this, DiagLoc) << E->getSourceRange();
	for (const PartialDiagnosticAt &Note : Notes)
	Diag(Note.first, Note.second);

	return E;
	}

	Expr::EvalResult EvalResult;
	SmallVector<PartialDiagnosticAt, 8> Notes;
	EvalResult.Diag = &Notes;

	// Try to evaluate the expression, and produce diagnostics explaining why it's
	// not a constant expression as a side-effect.
	bool Folded =
	E->EvaluateAsRValue(EvalResult, Context, /isConstantContext/ true) &&
	EvalResult.Val.isInt() && !EvalResult.HasSideEffects &&
	(!getLangOpts().CPlusPlus \|\| !EvalResult.HasUndefinedBehavior);

	if (!isa<ConstantExpr>(E))
	E = ConstantExpr::Create(Context, E, EvalResult.Val);

	// In C++11, we can rely on diagnostics being produced for any expression
	// which is not a constant expression. If no diagnostics were produced, then
	// this is a constant expression.
	if (Folded && getLangOpts().CPlusPlus11 && Notes.empty()) {
	if (Result)
	*Result = EvalResult.Val.getInt();
	return E;
	}

	// If our only note is the usual "invalid subexpression" note, just point
	// the caret at its location rather than producing an essentially
	// redundant note.
	if (Notes.size() == 1 && Notes[0].second.getDiagID() ==
	diag::note_invalid_subexpr_in_const_expr) {
	DiagLoc = Notes[0].first;
	Notes.clear();
	}

	if (!Folded \|\| !CanFold) {
	if (!Diagnoser.Suppress) {
	Diagnoser.diagnoseNotICE(*this, DiagLoc) << E->getSourceRange();
	for (const PartialDiagnosticAt &Note : Notes)
	Diag(Note.first, Note.second);
	}

	return ExprError();
	}

	Diagnoser.diagnoseFold(*this, DiagLoc) << E->getSourceRange();
	for (const PartialDiagnosticAt &Note : Notes)
	Diag(Note.first, Note.second);

	if (Result)
	*Result = EvalResult.Val.getInt();
	return E;
	}

	namespace {
	// Handle the case where we conclude a expression which we speculatively
	// considered to be unevaluated is actually evaluated.
	class TransformToPE : public TreeTransform<TransformToPE> {
	typedef TreeTransform<TransformToPE> BaseTransform;

	public:
	TransformToPE(Sema &SemaRef) : BaseTransform(SemaRef) { }

	// Make sure we redo semantic analysis
	bool AlwaysRebuild() { return true; }
	bool ReplacingOriginal() { return true; }

	// We need to special-case DeclRefExprs referring to FieldDecls which
	// are not part of a member pointer formation; normal TreeTransforming
	// doesn't catch this case because of the way we represent them in the AST.
	// FIXME: This is a bit ugly; is it really the best way to handle this
	// case?
	//
	// Error on DeclRefExprs referring to FieldDecls.
	ExprResult TransformDeclRefExpr(DeclRefExpr *E) {
	if (isa<FieldDecl>(E->getDecl()) &&
	!SemaRef.isUnevaluatedContext())
	return SemaRef.Diag(E->getLocation(),
	diag::err_invalid_non_static_member_use)
	<< E->getDecl() << E->getSourceRange();

	return BaseTransform::TransformDeclRefExpr(E);
	}

	// Exception: filter out member pointer formation
	ExprResult TransformUnaryOperator(UnaryOperator *E) {
	if (E->getOpcode() == UO_AddrOf && E->getType()->isMemberPointerType())
	return E;

	return BaseTransform::TransformUnaryOperator(E);
	}

	// The body of a lambda-expression is in a separate expression evaluation
	// context so never needs to be transformed.
	// FIXME: Ideally we wouldn't transform the closure type either, and would
	// just recreate the capture expressions and lambda expression.
	StmtResult TransformLambdaBody(LambdaExpr E, Stmt Body) {
	return SkipLambdaBody(E, Body);
	}
	};
	}

	ExprResult Sema::TransformToPotentiallyEvaluated(Expr *E) {
	assert(isUnevaluatedContext() &&
	"Should only transform unevaluated expressions");
	ExprEvalContexts.back().Context =
	ExprEvalContexts[ExprEvalContexts.size()-2].Context;
	if (isUnevaluatedContext())
	return E;
	return TransformToPE(*this).TransformExpr(E);
	}

	TypeSourceInfo Sema::TransformToPotentiallyEvaluated(TypeSourceInfo TInfo) {
	assert(isUnevaluatedContext() &&
	"Should only transform unevaluated expressions");
	ExprEvalContexts.back().Context = parentEvaluationContext().Context;
	if (isUnevaluatedContext())
	return TInfo;
	return TransformToPE(*this).TransformType(TInfo);
	}

	void
	Sema::PushExpressionEvaluationContext(
	ExpressionEvaluationContext NewContext, Decl *LambdaContextDecl,
	ExpressionEvaluationContextRecord::ExpressionKind ExprContext) {
	ExprEvalContexts.emplace_back(NewContext, ExprCleanupObjects.size(), Cleanup,
	LambdaContextDecl, ExprContext);

	// Discarded statements and immediate contexts nested in other
	// discarded statements or immediate context are themselves
	// a discarded statement or an immediate context, respectively.
	ExprEvalContexts.back().InDiscardedStatement =
	parentEvaluationContext().isDiscardedStatementContext();

	// C++23 [expr.const]/p15
	// An expression or conversion is in an immediate function context if [...]
	// it is a subexpression of a manifestly constant-evaluated expression or
	// conversion.
	const auto &Prev = parentEvaluationContext();
	ExprEvalContexts.back().InImmediateFunctionContext =
	Prev.isImmediateFunctionContext() \|\| Prev.isConstantEvaluated();

	ExprEvalContexts.back().InImmediateEscalatingFunctionContext =
	Prev.InImmediateEscalatingFunctionContext;

	Cleanup.reset();
	if (!MaybeODRUseExprs.empty())
	std::swap(MaybeODRUseExprs, ExprEvalContexts.back().SavedMaybeODRUseExprs);
	}

	void
	Sema::PushExpressionEvaluationContext(
	ExpressionEvaluationContext NewContext, ReuseLambdaContextDecl_t,
	ExpressionEvaluationContextRecord::ExpressionKind ExprContext) {
	Decl *ClosureContextDecl = ExprEvalContexts.back().ManglingContextDecl;
	PushExpressionEvaluationContext(NewContext, ClosureContextDecl, ExprContext);
	}

	namespace {

	const DeclRefExpr CheckPossibleDeref(Sema &S, const Expr PossibleDeref) {
	PossibleDeref = PossibleDeref->IgnoreParenImpCasts();
	if (const auto *E = dyn_cast<UnaryOperator>(PossibleDeref)) {
	if (E->getOpcode() == UO_Deref)
	return CheckPossibleDeref(S, E->getSubExpr());
	} else if (const auto *E = dyn_cast<ArraySubscriptExpr>(PossibleDeref)) {
	return CheckPossibleDeref(S, E->getBase());
	} else if (const auto *E = dyn_cast<MemberExpr>(PossibleDeref)) {
	return CheckPossibleDeref(S, E->getBase());
	} else if (const auto E = dyn_cast<DeclRefExpr>(PossibleDeref)) {
	QualType Inner;
	QualType Ty = E->getType();
	if (const auto *Ptr = Ty->getAs<PointerType>())
	Inner = Ptr->getPointeeType();
	else if (const auto *Arr = S.Context.getAsArrayType(Ty))
	Inner = Arr->getElementType();
	else
	return nullptr;

	if (Inner->hasAttr(attr::NoDeref))
	return E;
	}
	return nullptr;
	}

	} // namespace

	void Sema::WarnOnPendingNoDerefs(ExpressionEvaluationContextRecord &Rec) {
	for (const Expr *E : Rec.PossibleDerefs) {
	const DeclRefExpr DeclRef = CheckPossibleDeref(this, E);
	if (DeclRef) {
	const ValueDecl *Decl = DeclRef->getDecl();
	Diag(E->getExprLoc(), diag::warn_dereference_of_noderef_type)
	<< Decl->getName() << E->getSourceRange();
	Diag(Decl->getLocation(), diag::note_previous_decl) << Decl->getName();
	} else {
	Diag(E->getExprLoc(), diag::warn_dereference_of_noderef_type_no_decl)
	<< E->getSourceRange();
	}
	}
	Rec.PossibleDerefs.clear();
	}

	void Sema::CheckUnusedVolatileAssignment(Expr *E) {
	if (!E->getType().isVolatileQualified() \|\| !getLangOpts().CPlusPlus20)
	return;

	// Note: ignoring parens here is not justified by the standard rules, but
	// ignoring parentheses seems like a more reasonable approach, and this only
	// drives a deprecation warning so doesn't affect conformance.
	if (auto *BO = dyn_cast<BinaryOperator>(E->IgnoreParenImpCasts())) {
	if (BO->getOpcode() == BO_Assign) {
	auto &LHSs = ExprEvalContexts.back().VolatileAssignmentLHSs;
	llvm::erase(LHSs, BO->getLHS());
	}
	}
	}

	void Sema::MarkExpressionAsImmediateEscalating(Expr *E) {
	assert(getLangOpts().CPlusPlus20 &&
	ExprEvalContexts.back().InImmediateEscalatingFunctionContext &&
	"Cannot mark an immediate escalating expression outside of an "
	"immediate escalating context");
	if (auto *Call = dyn_cast<CallExpr>(E->IgnoreImplicit());
	Call && Call->getCallee()) {
	if (auto *DeclRef =
	dyn_cast<DeclRefExpr>(Call->getCallee()->IgnoreImplicit()))
	DeclRef->setIsImmediateEscalating(true);
	} else if (auto *Ctr = dyn_cast<CXXConstructExpr>(E->IgnoreImplicit())) {
	Ctr->setIsImmediateEscalating(true);
	} else if (auto *DeclRef = dyn_cast<DeclRefExpr>(E->IgnoreImplicit())) {
	DeclRef->setIsImmediateEscalating(true);
	} else {
	assert(false && "expected an immediately escalating expression");
	}
	if (FunctionScopeInfo *FI = getCurFunction())
	FI->FoundImmediateEscalatingExpression = true;
	}

	ExprResult Sema::CheckForImmediateInvocation(ExprResult E, FunctionDecl *Decl) {
	if (isUnevaluatedContext() \|\| !E.isUsable() \|\| !Decl \|\|
	!Decl->isImmediateFunction() \|\| isAlwaysConstantEvaluatedContext() \|\|
	isCheckingDefaultArgumentOrInitializer() \|\|
	RebuildingImmediateInvocation \|\| isImmediateFunctionContext())
	return E;

	/// Opportunistically remove the callee from ReferencesToConsteval if we can.
	/// It's OK if this fails; we'll also remove this in
	/// HandleImmediateInvocations, but catching it here allows us to avoid
	/// walking the AST looking for it in simple cases.
	if (auto *Call = dyn_cast<CallExpr>(E.get()->IgnoreImplicit()))
	if (auto *DeclRef =
	dyn_cast<DeclRefExpr>(Call->getCallee()->IgnoreImplicit()))
	ExprEvalContexts.back().ReferenceToConsteval.erase(DeclRef);

	// C++23 [expr.const]/p16
	// An expression or conversion is immediate-escalating if it is not initially
	// in an immediate function context and it is [...] an immediate invocation
	// that is not a constant expression and is not a subexpression of an
	// immediate invocation.
	APValue Cached;
	auto CheckConstantExpressionAndKeepResult = [&]() {
	llvm::SmallVector<PartialDiagnosticAt, 8> Notes;
	Expr::EvalResult Eval;
	Eval.Diag = &Notes;
	bool Res = E.get()->EvaluateAsConstantExpr(
	Eval, getASTContext(), ConstantExprKind::ImmediateInvocation);
	if (Res && Notes.empty()) {
	Cached = std::move(Eval.Val);
	return true;
	}
	return false;
	};

	if (!E.get()->isValueDependent() &&
	ExprEvalContexts.back().InImmediateEscalatingFunctionContext &&
	!CheckConstantExpressionAndKeepResult()) {
	MarkExpressionAsImmediateEscalating(E.get());
	return E;
	}

	if (Cleanup.exprNeedsCleanups()) {
	// Since an immediate invocation is a full expression itself - it requires
	// an additional ExprWithCleanups node, but it can participate to a bigger
	// full expression which actually requires cleanups to be run after so
	// create ExprWithCleanups without using MaybeCreateExprWithCleanups as it
	// may discard cleanups for outer expression too early.

	// Note that ExprWithCleanups created here must always have empty cleanup
	// objects:
	// - compound literals do not create cleanup objects in C++ and immediate
	// invocations are C++-only.
	// - blocks are not allowed inside constant expressions and compiler will
	// issue an error if they appear there.
	//
	// Hence, in correct code any cleanup objects created inside current
	// evaluation context must be outside the immediate invocation.
	E = ExprWithCleanups::Create(getASTContext(), E.get(),
	Cleanup.cleanupsHaveSideEffects(), {});
	}

	ConstantExpr *Res = ConstantExpr::Create(
	getASTContext(), E.get(),
	ConstantExpr::getStorageKind(Decl->getReturnType().getTypePtr(),
	getASTContext()),
	/IsImmediateInvocation/ true);
	if (Cached.hasValue())
	Res->MoveIntoResult(Cached, getASTContext());
	/// Value-dependent constant expressions should not be immediately
	/// evaluated until they are instantiated.
	if (!Res->isValueDependent())
	ExprEvalContexts.back().ImmediateInvocationCandidates.emplace_back(Res, 0);
	return Res;
	}

	static void EvaluateAndDiagnoseImmediateInvocation(
	Sema &SemaRef, Sema::ImmediateInvocationCandidate Candidate) {
	llvm::SmallVector<PartialDiagnosticAt, 8> Notes;
	Expr::EvalResult Eval;
	Eval.Diag = &Notes;
	ConstantExpr *CE = Candidate.getPointer();
	bool Result = CE->EvaluateAsConstantExpr(
	Eval, SemaRef.getASTContext(), ConstantExprKind::ImmediateInvocation);
	if (!Result \|\| !Notes.empty()) {
	SemaRef.FailedImmediateInvocations.insert(CE);
	Expr *InnerExpr = CE->getSubExpr()->IgnoreImplicit();
	if (auto *FunctionalCast = dyn_cast<CXXFunctionalCastExpr>(InnerExpr))
	InnerExpr = FunctionalCast->getSubExpr()->IgnoreImplicit();
	FunctionDecl *FD = nullptr;
	if (auto *Call = dyn_cast<CallExpr>(InnerExpr))
	FD = cast<FunctionDecl>(Call->getCalleeDecl());
	else if (auto *Call = dyn_cast<CXXConstructExpr>(InnerExpr))
	FD = Call->getConstructor();
	else if (auto *Cast = dyn_cast<CastExpr>(InnerExpr))
	FD = dyn_cast_or_null<FunctionDecl>(Cast->getConversionFunction());

	assert(FD && FD->isImmediateFunction() &&
	"could not find an immediate function in this expression");
	if (FD->isInvalidDecl())
	return;
	SemaRef.Diag(CE->getBeginLoc(), diag::err_invalid_consteval_call)
	<< FD << FD->isConsteval();
	if (auto Context =
	SemaRef.InnermostDeclarationWithDelayedImmediateInvocations()) {
	SemaRef.Diag(Context->Loc, diag::note_invalid_consteval_initializer)
	<< Context->Decl;
	SemaRef.Diag(Context->Decl->getBeginLoc(), diag::note_declared_at);
	}
	if (!FD->isConsteval())
	SemaRef.DiagnoseImmediateEscalatingReason(FD);
	for (auto &Note : Notes)
	SemaRef.Diag(Note.first, Note.second);
	return;
	}
	CE->MoveIntoResult(Eval.Val, SemaRef.getASTContext());
	}

	static void RemoveNestedImmediateInvocation(
	Sema &SemaRef, Sema::ExpressionEvaluationContextRecord &Rec,
	SmallVector<Sema::ImmediateInvocationCandidate, 4>::reverse_iterator It) {
	struct ComplexRemove : TreeTransform<ComplexRemove> {
	using Base = TreeTransform<ComplexRemove>;
	llvm::SmallPtrSetImpl<DeclRefExpr *> &DRSet;
	SmallVector<Sema::ImmediateInvocationCandidate, 4> &IISet;
	SmallVector<Sema::ImmediateInvocationCandidate, 4>::reverse_iterator
	CurrentII;
	ComplexRemove(Sema &SemaRef, llvm::SmallPtrSetImpl<DeclRefExpr *> &DR,
	SmallVector<Sema::ImmediateInvocationCandidate, 4> &II,
	SmallVector<Sema::ImmediateInvocationCandidate,
	4>::reverse_iterator Current)
	: Base(SemaRef), DRSet(DR), IISet(II), CurrentII(Current) {}
	void RemoveImmediateInvocation(ConstantExpr* E) {
	auto It = std::find_if(CurrentII, IISet.rend(),
	[E](Sema::ImmediateInvocationCandidate Elem) {
	return Elem.getPointer() == E;
	});
	// It is possible that some subexpression of the current immediate
	// invocation was handled from another expression evaluation context. Do
	// not handle the current immediate invocation if some of its
	// subexpressions failed before.
	if (It == IISet.rend()) {
	if (SemaRef.FailedImmediateInvocations.contains(E))
	CurrentII->setInt(1);
	} else {
	It->setInt(1); // Mark as deleted
	}
	}
	ExprResult TransformConstantExpr(ConstantExpr *E) {
	if (!E->isImmediateInvocation())
	return Base::TransformConstantExpr(E);
	RemoveImmediateInvocation(E);
	return Base::TransformExpr(E->getSubExpr());
	}
	/// Base::TransfromCXXOperatorCallExpr doesn't traverse the callee so
	/// we need to remove its DeclRefExpr from the DRSet.
	ExprResult TransformCXXOperatorCallExpr(CXXOperatorCallExpr *E) {
	DRSet.erase(cast<DeclRefExpr>(E->getCallee()->IgnoreImplicit()));
	return Base::TransformCXXOperatorCallExpr(E);
	}
	/// Base::TransformUserDefinedLiteral doesn't preserve the
	/// UserDefinedLiteral node.
	ExprResult TransformUserDefinedLiteral(UserDefinedLiteral *E) { return E; }
	/// Base::TransformInitializer skips ConstantExpr so we need to visit them
	/// here.
	ExprResult TransformInitializer(Expr *Init, bool NotCopyInit) {
	if (!Init)
	return Init;
	/// ConstantExpr are the first layer of implicit node to be removed so if
	/// Init isn't a ConstantExpr, no ConstantExpr will be skipped.
	if (auto *CE = dyn_cast<ConstantExpr>(Init))
	if (CE->isImmediateInvocation())
	RemoveImmediateInvocation(CE);
	return Base::TransformInitializer(Init, NotCopyInit);
	}
	ExprResult TransformDeclRefExpr(DeclRefExpr *E) {
	DRSet.erase(E);
	return E;
	}
	ExprResult TransformLambdaExpr(LambdaExpr *E) {
	// Do not rebuild lambdas to avoid creating a new type.
	// Lambdas have already been processed inside their eval context.
	return E;
	}
	bool AlwaysRebuild() { return false; }
	bool ReplacingOriginal() { return true; }
	bool AllowSkippingCXXConstructExpr() {
	bool Res = AllowSkippingFirstCXXConstructExpr;
	AllowSkippingFirstCXXConstructExpr = true;
	return Res;
	}
	bool AllowSkippingFirstCXXConstructExpr = true;
	} Transformer(SemaRef, Rec.ReferenceToConsteval,
	Rec.ImmediateInvocationCandidates, It);

	/// CXXConstructExpr with a single argument are getting skipped by
	/// TreeTransform in some situtation because they could be implicit. This
	/// can only occur for the top-level CXXConstructExpr because it is used
	/// nowhere in the expression being transformed therefore will not be rebuilt.
	/// Setting AllowSkippingFirstCXXConstructExpr to false will prevent from
	/// skipping the first CXXConstructExpr.
	if (isa<CXXConstructExpr>(It->getPointer()->IgnoreImplicit()))
	Transformer.AllowSkippingFirstCXXConstructExpr = false;

	ExprResult Res = Transformer.TransformExpr(It->getPointer()->getSubExpr());
	// The result may not be usable in case of previous compilation errors.
	// In this case evaluation of the expression may result in crash so just
	// don't do anything further with the result.
	if (Res.isUsable()) {
	Res = SemaRef.MaybeCreateExprWithCleanups(Res);
	It->getPointer()->setSubExpr(Res.get());
	}
	}

	static void
	HandleImmediateInvocations(Sema &SemaRef,
	Sema::ExpressionEvaluationContextRecord &Rec) {
	if ((Rec.ImmediateInvocationCandidates.size() == 0 &&
	Rec.ReferenceToConsteval.size() == 0) \|\|
	Rec.isImmediateFunctionContext() \|\| SemaRef.RebuildingImmediateInvocation)
	return;

	/// When we have more than 1 ImmediateInvocationCandidates or previously
	/// failed immediate invocations, we need to check for nested
	/// ImmediateInvocationCandidates in order to avoid duplicate diagnostics.
	/// Otherwise we only need to remove ReferenceToConsteval in the immediate
	/// invocation.
	if (Rec.ImmediateInvocationCandidates.size() > 1 \|\|
	!SemaRef.FailedImmediateInvocations.empty()) {

	/// Prevent sema calls during the tree transform from adding pointers that
	/// are already in the sets.
	llvm::SaveAndRestore DisableIITracking(
	SemaRef.RebuildingImmediateInvocation, true);

	/// Prevent diagnostic during tree transfrom as they are duplicates
	Sema::TentativeAnalysisScope DisableDiag(SemaRef);

	for (auto It = Rec.ImmediateInvocationCandidates.rbegin();
	It != Rec.ImmediateInvocationCandidates.rend(); It++)
	if (!It->getInt())
	RemoveNestedImmediateInvocation(SemaRef, Rec, It);
	} else if (Rec.ImmediateInvocationCandidates.size() == 1 &&
	Rec.ReferenceToConsteval.size()) {
	struct SimpleRemove : RecursiveASTVisitor<SimpleRemove> {
	llvm::SmallPtrSetImpl<DeclRefExpr *> &DRSet;
	SimpleRemove(llvm::SmallPtrSetImpl<DeclRefExpr *> &S) : DRSet(S) {}
	bool VisitDeclRefExpr(DeclRefExpr *E) {
	DRSet.erase(E);
	return DRSet.size();
	}
	} Visitor(Rec.ReferenceToConsteval);
	Visitor.TraverseStmt(
	Rec.ImmediateInvocationCandidates.front().getPointer()->getSubExpr());
	}
	for (auto CE : Rec.ImmediateInvocationCandidates)
	if (!CE.getInt())
	EvaluateAndDiagnoseImmediateInvocation(SemaRef, CE);
	for (auto *DR : Rec.ReferenceToConsteval) {
	// If the expression is immediate escalating, it is not an error;
	// The outer context itself becomes immediate and further errors,
	// if any, will be handled by DiagnoseImmediateEscalatingReason.
	if (DR->isImmediateEscalating())
	continue;
	auto *FD = cast<FunctionDecl>(DR->getDecl());
	const NamedDecl *ND = FD;
	if (const auto *MD = dyn_cast<CXXMethodDecl>(ND);
	MD && (MD->isLambdaStaticInvoker() \|\| isLambdaCallOperator(MD)))
	ND = MD->getParent();

	// C++23 [expr.const]/p16
	// An expression or conversion is immediate-escalating if it is not
	// initially in an immediate function context and it is [...] a
	// potentially-evaluated id-expression that denotes an immediate function
	// that is not a subexpression of an immediate invocation.
	bool ImmediateEscalating = false;
	bool IsPotentiallyEvaluated =
	Rec.Context ==
	Sema::ExpressionEvaluationContext::PotentiallyEvaluated \|\|
	Rec.Context ==
	Sema::ExpressionEvaluationContext::PotentiallyEvaluatedIfUsed;
	if (SemaRef.inTemplateInstantiation() && IsPotentiallyEvaluated)
	ImmediateEscalating = Rec.InImmediateEscalatingFunctionContext;

	if (!Rec.InImmediateEscalatingFunctionContext \|\|
	(SemaRef.inTemplateInstantiation() && !ImmediateEscalating)) {
	SemaRef.Diag(DR->getBeginLoc(), diag::err_invalid_consteval_take_address)
	<< ND << isa<CXXRecordDecl>(ND) << FD->isConsteval();
	SemaRef.Diag(ND->getLocation(), diag::note_declared_at);
	if (auto Context =
	SemaRef.InnermostDeclarationWithDelayedImmediateInvocations()) {
	SemaRef.Diag(Context->Loc, diag::note_invalid_consteval_initializer)
	<< Context->Decl;
	SemaRef.Diag(Context->Decl->getBeginLoc(), diag::note_declared_at);
	}
	if (FD->isImmediateEscalating() && !FD->isConsteval())
	SemaRef.DiagnoseImmediateEscalatingReason(FD);

	} else {
	SemaRef.MarkExpressionAsImmediateEscalating(DR);
	}
	}
	}

	void Sema::PopExpressionEvaluationContext() {
	ExpressionEvaluationContextRecord& Rec = ExprEvalContexts.back();
	unsigned NumTypos = Rec.NumTypos;

	if (!Rec.Lambdas.empty()) {
	using ExpressionKind = ExpressionEvaluationContextRecord::ExpressionKind;
	if (!getLangOpts().CPlusPlus20 &&
	(Rec.ExprContext == ExpressionKind::EK_TemplateArgument \|\|
	Rec.isUnevaluated() \|\|
	(Rec.isConstantEvaluated() && !getLangOpts().CPlusPlus17))) {
	unsigned D;
	if (Rec.isUnevaluated()) {
	// C++11 [expr.prim.lambda]p2:
	// A lambda-expression shall not appear in an unevaluated operand
	// (Clause 5).
	D = diag::err_lambda_unevaluated_operand;
	} else if (Rec.isConstantEvaluated() && !getLangOpts().CPlusPlus17) {
	// C++1y [expr.const]p2:
	// A conditional-expression e is a core constant expression unless the
	// evaluation of e, following the rules of the abstract machine, would
	// evaluate [...] a lambda-expression.
	D = diag::err_lambda_in_constant_expression;
	} else if (Rec.ExprContext == ExpressionKind::EK_TemplateArgument) {
	// C++17 [expr.prim.lamda]p2:
	// A lambda-expression shall not appear [...] in a template-argument.
	D = diag::err_lambda_in_invalid_context;
	} else
	llvm_unreachable("Couldn't infer lambda error message.");

	for (const auto *L : Rec.Lambdas)
	Diag(L->getBeginLoc(), D);
	}
	}

	// Append the collected materialized temporaries into previous context before
	// exit if the previous also is a lifetime extending context.
	auto &PrevRecord = parentEvaluationContext();
	if (getLangOpts().CPlusPlus23 && Rec.InLifetimeExtendingContext &&
	PrevRecord.InLifetimeExtendingContext &&
	!Rec.ForRangeLifetimeExtendTemps.empty()) {
	PrevRecord.ForRangeLifetimeExtendTemps.append(
	Rec.ForRangeLifetimeExtendTemps);
	}

	WarnOnPendingNoDerefs(Rec);
	HandleImmediateInvocations(*this, Rec);

	// Warn on any volatile-qualified simple-assignments that are not discarded-
	// value expressions nor unevaluated operands (those cases get removed from
	// this list by CheckUnusedVolatileAssignment).
	for (auto *BO : Rec.VolatileAssignmentLHSs)
	Diag(BO->getBeginLoc(), diag::warn_deprecated_simple_assign_volatile)
	<< BO->getType();

	// When are coming out of an unevaluated context, clear out any
	// temporaries that we may have created as part of the evaluation of
	// the expression in that context: they aren't relevant because they
	// will never be constructed.
	if (Rec.isUnevaluated() \|\| Rec.isConstantEvaluated()) {
	ExprCleanupObjects.erase(ExprCleanupObjects.begin() + Rec.NumCleanupObjects,
	ExprCleanupObjects.end());
	Cleanup = Rec.ParentCleanup;
	CleanupVarDeclMarking();
	std::swap(MaybeODRUseExprs, Rec.SavedMaybeODRUseExprs);
	// Otherwise, merge the contexts together.
	} else {
	Cleanup.mergeFrom(Rec.ParentCleanup);
	MaybeODRUseExprs.insert(Rec.SavedMaybeODRUseExprs.begin(),
	Rec.SavedMaybeODRUseExprs.end());
	}

	// Pop the current expression evaluation context off the stack.
	ExprEvalContexts.pop_back();

	// The global expression evaluation context record is never popped.
	ExprEvalContexts.back().NumTypos += NumTypos;
	}

	void Sema::DiscardCleanupsInEvaluationContext() {
	ExprCleanupObjects.erase(
	ExprCleanupObjects.begin() + ExprEvalContexts.back().NumCleanupObjects,
	ExprCleanupObjects.end());
	Cleanup.reset();
	MaybeODRUseExprs.clear();
	}

	ExprResult Sema::HandleExprEvaluationContextForTypeof(Expr *E) {
	ExprResult Result = CheckPlaceholderExpr(E);
	if (Result.isInvalid())
	return ExprError();
	E = Result.get();
	if (!E->getType()->isVariablyModifiedType())
	return E;
	return TransformToPotentiallyEvaluated(E);
	}

	/// Are we in a context that is potentially constant evaluated per C++20
	/// [expr.const]p12?
	static bool isPotentiallyConstantEvaluatedContext(Sema &SemaRef) {
	/// C++2a [expr.const]p12:
	// An expression or conversion is potentially constant evaluated if it is
	switch (SemaRef.ExprEvalContexts.back().Context) {
	case Sema::ExpressionEvaluationContext::ConstantEvaluated:
	case Sema::ExpressionEvaluationContext::ImmediateFunctionContext:

	// -- a manifestly constant-evaluated expression,
	case Sema::ExpressionEvaluationContext::PotentiallyEvaluated:
	case Sema::ExpressionEvaluationContext::PotentiallyEvaluatedIfUsed:
	case Sema::ExpressionEvaluationContext::DiscardedStatement:
	// -- a potentially-evaluated expression,
	case Sema::ExpressionEvaluationContext::UnevaluatedList:
	// -- an immediate subexpression of a braced-init-list,

	// -- [FIXME] an expression of the form & cast-expression that occurs
	// within a templated entity
	// -- a subexpression of one of the above that is not a subexpression of
	// a nested unevaluated operand.
	return true;

	case Sema::ExpressionEvaluationContext::Unevaluated:
	case Sema::ExpressionEvaluationContext::UnevaluatedAbstract:
	// Expressions in this context are never evaluated.
	return false;
	}
	llvm_unreachable("Invalid context");
	}

	/// Return true if this function has a calling convention that requires mangling
	/// in the size of the parameter pack.
	static bool funcHasParameterSizeMangling(Sema &S, FunctionDecl *FD) {
	// These manglings don't do anything on non-Windows or non-x86 platforms, so
	// we don't need parameter type sizes.
	const llvm::Triple &TT = S.Context.getTargetInfo().getTriple();
	if (!TT.isOSWindows() \|\| !TT.isX86())
	return false;

	// If this is C++ and this isn't an extern "C" function, parameters do not
	// need to be complete. In this case, C++ mangling will apply, which doesn't
	// use the size of the parameters.
	if (S.getLangOpts().CPlusPlus && !FD->isExternC())
	return false;

	// Stdcall, fastcall, and vectorcall need this special treatment.
	CallingConv CC = FD->getType()->castAs<FunctionType>()->getCallConv();
	switch (CC) {
	case CC_X86StdCall:
	case CC_X86FastCall:
	case CC_X86VectorCall:
	return true;
	default:
	break;
	}
	return false;
	}

	/// Require that all of the parameter types of function be complete. Normally,
	/// parameter types are only required to be complete when a function is called
	/// or defined, but to mangle functions with certain calling conventions, the
	/// mangler needs to know the size of the parameter list. In this situation,
	/// MSVC doesn't emit an error or instantiate templates. Instead, MSVC mangles
	/// the function as _foo@0, i.e. zero bytes of parameters, which will usually
	/// result in a linker error. Clang doesn't implement this behavior, and instead
	/// attempts to error at compile time.
	static void CheckCompleteParameterTypesForMangler(Sema &S, FunctionDecl *FD,
	SourceLocation Loc) {
	class ParamIncompleteTypeDiagnoser : public Sema::TypeDiagnoser {
	FunctionDecl *FD;
	ParmVarDecl *Param;

	public:
	ParamIncompleteTypeDiagnoser(FunctionDecl FD, ParmVarDecl Param)
	: FD(FD), Param(Param) {}

	void diagnose(Sema &S, SourceLocation Loc, QualType T) override {
	CallingConv CC = FD->getType()->castAs<FunctionType>()->getCallConv();
	StringRef CCName;
	switch (CC) {
	case CC_X86StdCall:
	CCName = "stdcall";
	break;
	case CC_X86FastCall:
	CCName = "fastcall";
	break;
	case CC_X86VectorCall:
	CCName = "vectorcall";
	break;
	default:
	llvm_unreachable("CC does not need mangling");
	}

	S.Diag(Loc, diag::err_cconv_incomplete_param_type)
	<< Param->getDeclName() << FD->getDeclName() << CCName;
	}
	};

	for (ParmVarDecl *Param : FD->parameters()) {
	ParamIncompleteTypeDiagnoser Diagnoser(FD, Param);
	S.RequireCompleteType(Loc, Param->getType(), Diagnoser);
	}
	}

	namespace {
	enum class OdrUseContext {
	/// Declarations in this context are not odr-used.
	None,
	/// Declarations in this context are formally odr-used, but this is a
	/// dependent context.
	Dependent,
	/// Declarations in this context are odr-used but not actually used (yet).
	FormallyOdrUsed,
	/// Declarations in this context are used.
	Used
	};
	}

	/// Are we within a context in which references to resolved functions or to
	/// variables result in odr-use?
	static OdrUseContext isOdrUseContext(Sema &SemaRef) {
	OdrUseContext Result;

	switch (SemaRef.ExprEvalContexts.back().Context) {
	case Sema::ExpressionEvaluationContext::Unevaluated:
	case Sema::ExpressionEvaluationContext::UnevaluatedList:
	case Sema::ExpressionEvaluationContext::UnevaluatedAbstract:
	return OdrUseContext::None;

	case Sema::ExpressionEvaluationContext::ConstantEvaluated:
	case Sema::ExpressionEvaluationContext::ImmediateFunctionContext:
	case Sema::ExpressionEvaluationContext::PotentiallyEvaluated:
	Result = OdrUseContext::Used;
	break;

	case Sema::ExpressionEvaluationContext::DiscardedStatement:
	Result = OdrUseContext::FormallyOdrUsed;
	break;

	case Sema::ExpressionEvaluationContext::PotentiallyEvaluatedIfUsed:
	// A default argument formally results in odr-use, but doesn't actually
	// result in a use in any real sense until it itself is used.
	Result = OdrUseContext::FormallyOdrUsed;
	break;
	}

	if (SemaRef.CurContext->isDependentContext())
	return OdrUseContext::Dependent;

	return Result;
	}

	static bool isImplicitlyDefinableConstexprFunction(FunctionDecl *Func) {
	if (!Func->isConstexpr())
	return false;

	if (Func->isImplicitlyInstantiable() \|\| !Func->isUserProvided())
	return true;
	auto *CCD = dyn_cast<CXXConstructorDecl>(Func);
	return CCD && CCD->getInheritedConstructor();
	}

	void Sema::MarkFunctionReferenced(SourceLocation Loc, FunctionDecl *Func,
	bool MightBeOdrUse) {
	assert(Func && "No function?");

	Func->setReferenced();

	// Recursive functions aren't really used until they're used from some other
	// context.
	bool IsRecursiveCall = CurContext == Func;

	// C++11 [basic.def.odr]p3:
	// A function whose name appears as a potentially-evaluated expression is
	// odr-used if it is the unique lookup result or the selected member of a
	// set of overloaded functions [...].
	//
	// We (incorrectly) mark overload resolution as an unevaluated context, so we
	// can just check that here.
	OdrUseContext OdrUse =
	MightBeOdrUse ? isOdrUseContext(*this) : OdrUseContext::None;
	if (IsRecursiveCall && OdrUse == OdrUseContext::Used)
	OdrUse = OdrUseContext::FormallyOdrUsed;

	// Trivial default constructors and destructors are never actually used.
	// FIXME: What about other special members?
	if (Func->isTrivial() && !Func->hasAttr<DLLExportAttr>() &&
	OdrUse == OdrUseContext::Used) {
	if (auto *Constructor = dyn_cast<CXXConstructorDecl>(Func))
	if (Constructor->isDefaultConstructor())
	OdrUse = OdrUseContext::FormallyOdrUsed;
	if (isa<CXXDestructorDecl>(Func))
	OdrUse = OdrUseContext::FormallyOdrUsed;
	}

	// C++20 [expr.const]p12:
	// A function [...] is needed for constant evaluation if it is [...] a
	// constexpr function that is named by an expression that is potentially
	// constant evaluated
	bool NeededForConstantEvaluation =
	isPotentiallyConstantEvaluatedContext(*this) &&
	isImplicitlyDefinableConstexprFunction(Func);

	// Determine whether we require a function definition to exist, per
	// C++11 [temp.inst]p3:
	// Unless a function template specialization has been explicitly
	// instantiated or explicitly specialized, the function template
	// specialization is implicitly instantiated when the specialization is
	// referenced in a context that requires a function definition to exist.
	// C++20 [temp.inst]p7:
	// The existence of a definition of a [...] function is considered to
	// affect the semantics of the program if the [...] function is needed for
	// constant evaluation by an expression
	// C++20 [basic.def.odr]p10:
	// Every program shall contain exactly one definition of every non-inline
	// function or variable that is odr-used in that program outside of a
	// discarded statement
	// C++20 [special]p1:
	// The implementation will implicitly define [defaulted special members]
	// if they are odr-used or needed for constant evaluation.
	//
	// Note that we skip the implicit instantiation of templates that are only
	// used in unused default arguments or by recursive calls to themselves.
	// This is formally non-conforming, but seems reasonable in practice.
	bool NeedDefinition =
	!IsRecursiveCall &&
	(OdrUse == OdrUseContext::Used \|\|
	(NeededForConstantEvaluation && !Func->isPureVirtual()));

	// C++14 [temp.expl.spec]p6:
	// If a template [...] is explicitly specialized then that specialization
	// shall be declared before the first use of that specialization that would
	// cause an implicit instantiation to take place, in every translation unit
	// in which such a use occurs
	if (NeedDefinition &&
	(Func->getTemplateSpecializationKind() != TSK_Undeclared \|\|
	Func->getMemberSpecializationInfo()))
	checkSpecializationReachability(Loc, Func);

	if (getLangOpts().CUDA)
	CUDA().CheckCall(Loc, Func);

	// If we need a definition, try to create one.
	if (NeedDefinition && !Func->getBody()) {
	runWithSufficientStackSpace(Loc, [&] {
	if (CXXConstructorDecl *Constructor =
	dyn_cast<CXXConstructorDecl>(Func)) {
	Constructor = cast<CXXConstructorDecl>(Constructor->getFirstDecl());
	if (Constructor->isDefaulted() && !Constructor->isDeleted()) {
	if (Constructor->isDefaultConstructor()) {
	if (Constructor->isTrivial() &&
	!Constructor->hasAttr<DLLExportAttr>())
	return;
	DefineImplicitDefaultConstructor(Loc, Constructor);
	} else if (Constructor->isCopyConstructor()) {
	DefineImplicitCopyConstructor(Loc, Constructor);
	} else if (Constructor->isMoveConstructor()) {
	DefineImplicitMoveConstructor(Loc, Constructor);
	}
	} else if (Constructor->getInheritedConstructor()) {
	DefineInheritingConstructor(Loc, Constructor);
	}
	} else if (CXXDestructorDecl *Destructor =
	dyn_cast<CXXDestructorDecl>(Func)) {
	Destructor = cast<CXXDestructorDecl>(Destructor->getFirstDecl());
	if (Destructor->isDefaulted() && !Destructor->isDeleted()) {
	if (Destructor->isTrivial() && !Destructor->hasAttr<DLLExportAttr>())
	return;
	DefineImplicitDestructor(Loc, Destructor);
	}
	if (Destructor->isVirtual() && getLangOpts().AppleKext)
	MarkVTableUsed(Loc, Destructor->getParent());
	} else if (CXXMethodDecl *MethodDecl = dyn_cast<CXXMethodDecl>(Func)) {
	if (MethodDecl->isOverloadedOperator() &&
	MethodDecl->getOverloadedOperator() == OO_Equal) {
	MethodDecl = cast<CXXMethodDecl>(MethodDecl->getFirstDecl());
	if (MethodDecl->isDefaulted() && !MethodDecl->isDeleted()) {
	if (MethodDecl->isCopyAssignmentOperator())
	DefineImplicitCopyAssignment(Loc, MethodDecl);
	else if (MethodDecl->isMoveAssignmentOperator())
	DefineImplicitMoveAssignment(Loc, MethodDecl);
	}
	} else if (isa<CXXConversionDecl>(MethodDecl) &&
	MethodDecl->getParent()->isLambda()) {
	CXXConversionDecl *Conversion =
	cast<CXXConversionDecl>(MethodDecl->getFirstDecl());
	if (Conversion->isLambdaToBlockPointerConversion())
	DefineImplicitLambdaToBlockPointerConversion(Loc, Conversion);
	else
	DefineImplicitLambdaToFunctionPointerConversion(Loc, Conversion);
	} else if (MethodDecl->isVirtual() && getLangOpts().AppleKext)
	MarkVTableUsed(Loc, MethodDecl->getParent());
	}

	if (Func->isDefaulted() && !Func->isDeleted()) {
	DefaultedComparisonKind DCK = getDefaultedComparisonKind(Func);
	if (DCK != DefaultedComparisonKind::None)
	DefineDefaultedComparison(Loc, Func, DCK);
	}

	// Implicit instantiation of function templates and member functions of
	// class templates.
	if (Func->isImplicitlyInstantiable()) {
	TemplateSpecializationKind TSK =
	Func->getTemplateSpecializationKindForInstantiation();
	SourceLocation PointOfInstantiation = Func->getPointOfInstantiation();
	bool FirstInstantiation = PointOfInstantiation.isInvalid();
	if (FirstInstantiation) {
	PointOfInstantiation = Loc;
	if (auto *MSI = Func->getMemberSpecializationInfo())
	MSI->setPointOfInstantiation(Loc);
	// FIXME: Notify listener.
	else
	Func->setTemplateSpecializationKind(TSK, PointOfInstantiation);
	} else if (TSK != TSK_ImplicitInstantiation) {
	// Use the point of use as the point of instantiation, instead of the
	// point of explicit instantiation (which we track as the actual point
	// of instantiation). This gives better backtraces in diagnostics.
	PointOfInstantiation = Loc;
	}

	if (FirstInstantiation \|\| TSK != TSK_ImplicitInstantiation \|\|
	Func->isConstexpr()) {
	if (isa<CXXRecordDecl>(Func->getDeclContext()) &&
	cast<CXXRecordDecl>(Func->getDeclContext())->isLocalClass() &&
	CodeSynthesisContexts.size())
	PendingLocalImplicitInstantiations.push_back(
	std::make_pair(Func, PointOfInstantiation));
	else if (Func->isConstexpr())
	// Do not defer instantiations of constexpr functions, to avoid the
	// expression evaluator needing to call back into Sema if it sees a
	// call to such a function.
	InstantiateFunctionDefinition(PointOfInstantiation, Func);
	else {
	Func->setInstantiationIsPending(true);
	PendingInstantiations.push_back(
	std::make_pair(Func, PointOfInstantiation));
	// Notify the consumer that a function was implicitly instantiated.
	Consumer.HandleCXXImplicitFunctionInstantiation(Func);
	}
	}
	} else {
	// Walk redefinitions, as some of them may be instantiable.
	for (auto *i : Func->redecls()) {
	if (!i->isUsed(false) && i->isImplicitlyInstantiable())
	MarkFunctionReferenced(Loc, i, MightBeOdrUse);
	}
	}
	});
	}

	// If a constructor was defined in the context of a default parameter
	// or of another default member initializer (ie a PotentiallyEvaluatedIfUsed
	// context), its initializers may not be referenced yet.
	if (CXXConstructorDecl *Constructor = dyn_cast<CXXConstructorDecl>(Func)) {
	EnterExpressionEvaluationContext EvalContext(
	*this,
	Constructor->isImmediateFunction()
	? ExpressionEvaluationContext::ImmediateFunctionContext
	: ExpressionEvaluationContext::PotentiallyEvaluated,
	Constructor);
	for (CXXCtorInitializer *Init : Constructor->inits()) {
	if (Init->isInClassMemberInitializer())
	runWithSufficientStackSpace(Init->getSourceLocation(), [&]() {
	MarkDeclarationsReferencedInExpr(Init->getInit());
	});
	}
	}

	// C++14 [except.spec]p17:
	// An exception-specification is considered to be needed when:
	// - the function is odr-used or, if it appears in an unevaluated operand,
	// would be odr-used if the expression were potentially-evaluated;
	//
	// Note, we do this even if MightBeOdrUse is false. That indicates that the
	// function is a pure virtual function we're calling, and in that case the
	// function was selected by overload resolution and we need to resolve its
	// exception specification for a different reason.
	const FunctionProtoType *FPT = Func->getType()->getAs<FunctionProtoType>();
	if (FPT && isUnresolvedExceptionSpec(FPT->getExceptionSpecType()))
	ResolveExceptionSpec(Loc, FPT);

	// A callee could be called by a host function then by a device function.
	// If we only try recording once, we will miss recording the use on device
	// side. Therefore keep trying until it is recorded.
	if (LangOpts.OffloadImplicitHostDeviceTemplates && LangOpts.CUDAIsDevice &&
	!getASTContext().CUDAImplicitHostDeviceFunUsedByDevice.count(Func))
	CUDA().RecordImplicitHostDeviceFuncUsedByDevice(Func);

	// If this is the first "real" use, act on that.
	if (OdrUse == OdrUseContext::Used && !Func->isUsed(/CheckUsedAttr=/false)) {
	// Keep track of used but undefined functions.
	if (!Func->isDefined()) {
	if (mightHaveNonExternalLinkage(Func))
	UndefinedButUsed.insert(std::make_pair(Func->getCanonicalDecl(), Loc));
	else if (Func->getMostRecentDecl()->isInlined() &&
	!LangOpts.GNUInline &&
	!Func->getMostRecentDecl()->hasAttr<GNUInlineAttr>())
	UndefinedButUsed.insert(std::make_pair(Func->getCanonicalDecl(), Loc));
	else if (isExternalWithNoLinkageType(Func))
	UndefinedButUsed.insert(std::make_pair(Func->getCanonicalDecl(), Loc));
	}

	// Some x86 Windows calling conventions mangle the size of the parameter
	// pack into the name. Computing the size of the parameters requires the
	// parameter types to be complete. Check that now.
	if (funcHasParameterSizeMangling(*this, Func))
	CheckCompleteParameterTypesForMangler(*this, Func, Loc);

	// In the MS C++ ABI, the compiler emits destructor variants where they are
	// used. If the destructor is used here but defined elsewhere, mark the
	// virtual base destructors referenced. If those virtual base destructors
	// are inline, this will ensure they are defined when emitting the complete
	// destructor variant. This checking may be redundant if the destructor is
	// provided later in this TU.
	if (Context.getTargetInfo().getCXXABI().isMicrosoft()) {
	if (auto *Dtor = dyn_cast<CXXDestructorDecl>(Func)) {
	CXXRecordDecl *Parent = Dtor->getParent();
	if (Parent->getNumVBases() > 0 && !Dtor->getBody())
	CheckCompleteDestructorVariant(Loc, Dtor);
	}
	}

	Func->markUsed(Context);
	}
	}

	/// Directly mark a variable odr-used. Given a choice, prefer to use
	/// MarkVariableReferenced since it does additional checks and then
	/// calls MarkVarDeclODRUsed.
	/// If the variable must be captured:
	/// - if FunctionScopeIndexToStopAt is null, capture it in the CurContext
	/// - else capture it in the DeclContext that maps to the
	/// *FunctionScopeIndexToStopAt on the FunctionScopeInfo stack.
	static void
	MarkVarDeclODRUsed(ValueDecl *V, SourceLocation Loc, Sema &SemaRef,
	const unsigned *const FunctionScopeIndexToStopAt = nullptr) {
	// Keep track of used but undefined variables.
	// FIXME: We shouldn't suppress this warning for static data members.
	VarDecl *Var = V->getPotentiallyDecomposedVarDecl();
	assert(Var && "expected a capturable variable");

	if (Var->hasDefinition(SemaRef.Context) == VarDecl::DeclarationOnly &&
	(!Var->isExternallyVisible() \|\| Var->isInline() \|\|
	SemaRef.isExternalWithNoLinkageType(Var)) &&
	!(Var->isStaticDataMember() && Var->hasInit())) {
	SourceLocation &old = SemaRef.UndefinedButUsed[Var->getCanonicalDecl()];
	if (old.isInvalid())
	old = Loc;
	}
	QualType CaptureType, DeclRefType;
	if (SemaRef.LangOpts.OpenMP)
	SemaRef.OpenMP().tryCaptureOpenMPLambdas(V);
	SemaRef.tryCaptureVariable(V, Loc, Sema::TryCapture_Implicit,
	/EllipsisLoc/ SourceLocation(),
	/BuildAndDiagnose/ true, CaptureType,
	DeclRefType, FunctionScopeIndexToStopAt);

	if (SemaRef.LangOpts.CUDA && Var->hasGlobalStorage()) {
	auto *FD = dyn_cast_or_null<FunctionDecl>(SemaRef.CurContext);
	auto VarTarget = SemaRef.CUDA().IdentifyTarget(Var);
	auto UserTarget = SemaRef.CUDA().IdentifyTarget(FD);
	if (VarTarget == SemaCUDA::CVT_Host &&
	(UserTarget == CUDAFunctionTarget::Device \|\|
	UserTarget == CUDAFunctionTarget::HostDevice \|\|
	UserTarget == CUDAFunctionTarget::Global)) {
	// Diagnose ODR-use of host global variables in device functions.
	// Reference of device global variables in host functions is allowed
	// through shadow variables therefore it is not diagnosed.
	if (SemaRef.LangOpts.CUDAIsDevice && !SemaRef.LangOpts.HIPStdPar) {
	SemaRef.targetDiag(Loc, diag::err_ref_bad_target)
	<< /host/ 2 << /variable/ 1 << Var
	<< llvm::to_underlying(UserTarget);
	SemaRef.targetDiag(Var->getLocation(),
	Var->getType().isConstQualified()
	? diag::note_cuda_const_var_unpromoted
	: diag::note_cuda_host_var);
	}
	} else if (VarTarget == SemaCUDA::CVT_Device &&
	!Var->hasAttr<CUDASharedAttr>() &&
	(UserTarget == CUDAFunctionTarget::Host \|\|
	UserTarget == CUDAFunctionTarget::HostDevice)) {
	// Record a CUDA/HIP device side variable if it is ODR-used
	// by host code. This is done conservatively, when the variable is
	// referenced in any of the following contexts:
	// - a non-function context
	// - a host function
	// - a host device function
	// This makes the ODR-use of the device side variable by host code to
	// be visible in the device compilation for the compiler to be able to
	// emit template variables instantiated by host code only and to
	// externalize the static device side variable ODR-used by host code.
	if (!Var->hasExternalStorage())
	SemaRef.getASTContext().CUDADeviceVarODRUsedByHost.insert(Var);
	else if (SemaRef.LangOpts.GPURelocatableDeviceCode &&
	(!FD \|\| (!FD->getDescribedFunctionTemplate() &&
	SemaRef.getASTContext().GetGVALinkageForFunction(FD) ==
	GVA_StrongExternal)))
	SemaRef.getASTContext().CUDAExternalDeviceDeclODRUsedByHost.insert(Var);
	}
	}

	V->markUsed(SemaRef.Context);
	}

	void Sema::MarkCaptureUsedInEnclosingContext(ValueDecl *Capture,
	SourceLocation Loc,
	unsigned CapturingScopeIndex) {
	MarkVarDeclODRUsed(Capture, Loc, *this, &CapturingScopeIndex);
	}

	void diagnoseUncapturableValueReferenceOrBinding(Sema &S, SourceLocation loc,
	ValueDecl *var) {
	DeclContext *VarDC = var->getDeclContext();

	// If the parameter still belongs to the translation unit, then
	// we're actually just using one parameter in the declaration of
	// the next.
	if (isa<ParmVarDecl>(var) &&
	isa<TranslationUnitDecl>(VarDC))
	return;

	// For C code, don't diagnose about capture if we're not actually in code
	// right now; it's impossible to write a non-constant expression outside of
	// function context, so we'll get other (more useful) diagnostics later.
	//
	// For C++, things get a bit more nasty... it would be nice to suppress this
	// diagnostic for certain cases like using a local variable in an array bound
	// for a member of a local class, but the correct predicate is not obvious.
	if (!S.getLangOpts().CPlusPlus && !S.CurContext->isFunctionOrMethod())
	return;

	unsigned ValueKind = isa<BindingDecl>(var) ? 1 : 0;
	unsigned ContextKind = 3; // unknown
	if (isa<CXXMethodDecl>(VarDC) &&
	cast<CXXRecordDecl>(VarDC->getParent())->isLambda()) {
	ContextKind = 2;
	} else if (isa<FunctionDecl>(VarDC)) {
	ContextKind = 0;
	} else if (isa<BlockDecl>(VarDC)) {
	ContextKind = 1;
	}

	S.Diag(loc, diag::err_reference_to_local_in_enclosing_context)
	<< var << ValueKind << ContextKind << VarDC;
	S.Diag(var->getLocation(), diag::note_entity_declared_at)
	<< var;

	// FIXME: Add additional diagnostic info about class etc. which prevents
	// capture.
	}

	static bool isVariableAlreadyCapturedInScopeInfo(CapturingScopeInfo *CSI,
	ValueDecl *Var,
	bool &SubCapturesAreNested,
	QualType &CaptureType,
	QualType &DeclRefType) {
	// Check whether we've already captured it.
	if (CSI->CaptureMap.count(Var)) {
	// If we found a capture, any subcaptures are nested.
	SubCapturesAreNested = true;

	// Retrieve the capture type for this variable.
	CaptureType = CSI->getCapture(Var).getCaptureType();

	// Compute the type of an expression that refers to this variable.
	DeclRefType = CaptureType.getNonReferenceType();

	// Similarly to mutable captures in lambda, all the OpenMP captures by copy
	// are mutable in the sense that user can change their value - they are
	// private instances of the captured declarations.
	const Capture &Cap = CSI->getCapture(Var);
	if (Cap.isCopyCapture() &&
	!(isa<LambdaScopeInfo>(CSI) &&
	!cast<LambdaScopeInfo>(CSI)->lambdaCaptureShouldBeConst()) &&
	!(isa<CapturedRegionScopeInfo>(CSI) &&
	cast<CapturedRegionScopeInfo>(CSI)->CapRegionKind == CR_OpenMP))
	DeclRefType.addConst();
	return true;
	}
	return false;
	}

	// Only block literals, captured statements, and lambda expressions can
	// capture; other scopes don't work.
	static DeclContext getParentOfCapturingContextOrNull(DeclContext DC,
	ValueDecl *Var,
	SourceLocation Loc,
	const bool Diagnose,
	Sema &S) {
	if (isa<BlockDecl>(DC) \|\| isa<CapturedDecl>(DC) \|\| isLambdaCallOperator(DC))
	return getLambdaAwareParentOfDeclContext(DC);

	VarDecl *Underlying = Var->getPotentiallyDecomposedVarDecl();
	if (Underlying) {
	if (Underlying->hasLocalStorage() && Diagnose)
	diagnoseUncapturableValueReferenceOrBinding(S, Loc, Var);
	}
	return nullptr;
	}

	// Certain capturing entities (lambdas, blocks etc.) are not allowed to capture
	// certain types of variables (unnamed, variably modified types etc.)
	// so check for eligibility.
	static bool isVariableCapturable(CapturingScopeInfo CSI, ValueDecl Var,
	SourceLocation Loc, const bool Diagnose,
	Sema &S) {

	assert((isa<VarDecl, BindingDecl>(Var)) &&
	"Only variables and structured bindings can be captured");

	bool IsBlock = isa<BlockScopeInfo>(CSI);
	bool IsLambda = isa<LambdaScopeInfo>(CSI);

	// Lambdas are not allowed to capture unnamed variables
	// (e.g. anonymous unions).
	// FIXME: The C++11 rule don't actually state this explicitly, but I'm
	// assuming that's the intent.
	if (IsLambda && !Var->getDeclName()) {
	if (Diagnose) {
	S.Diag(Loc, diag::err_lambda_capture_anonymous_var);
	S.Diag(Var->getLocation(), diag::note_declared_at);
	}
	return false;
	}

	// Prohibit variably-modified types in blocks; they're difficult to deal with.
	if (Var->getType()->isVariablyModifiedType() && IsBlock) {
	if (Diagnose) {
	S.Diag(Loc, diag::err_ref_vm_type);
	S.Diag(Var->getLocation(), diag::note_previous_decl) << Var;
	}
	return false;
	}
	// Prohibit structs with flexible array members too.
	// We cannot capture what is in the tail end of the struct.
	if (const RecordType *VTTy = Var->getType()->getAs<RecordType>()) {
	if (VTTy->getDecl()->hasFlexibleArrayMember()) {
	if (Diagnose) {
	if (IsBlock)
	S.Diag(Loc, diag::err_ref_flexarray_type);
	else
	S.Diag(Loc, diag::err_lambda_capture_flexarray_type) << Var;
	S.Diag(Var->getLocation(), diag::note_previous_decl) << Var;
	}
	return false;
	}
	}
	const bool HasBlocksAttr = Var->hasAttr<BlocksAttr>();
	// Lambdas and captured statements are not allowed to capture __block
	// variables; they don't support the expected semantics.
	if (HasBlocksAttr && (IsLambda \|\| isa<CapturedRegionScopeInfo>(CSI))) {
	if (Diagnose) {
	S.Diag(Loc, diag::err_capture_block_variable) << Var << !IsLambda;
	S.Diag(Var->getLocation(), diag::note_previous_decl) << Var;
	}
	return false;
	}
	// OpenCL v2.0 s6.12.5: Blocks cannot reference/capture other blocks
	if (S.getLangOpts().OpenCL && IsBlock &&
	Var->getType()->isBlockPointerType()) {
	if (Diagnose)
	S.Diag(Loc, diag::err_opencl_block_ref_block);
	return false;
	}

	if (isa<BindingDecl>(Var)) {
	if (!IsLambda \|\| !S.getLangOpts().CPlusPlus) {
	if (Diagnose)
	diagnoseUncapturableValueReferenceOrBinding(S, Loc, Var);
	return false;
	} else if (Diagnose && S.getLangOpts().CPlusPlus) {
	S.Diag(Loc, S.LangOpts.CPlusPlus20
	? diag::warn_cxx17_compat_capture_binding
	: diag::ext_capture_binding)
	<< Var;
	S.Diag(Var->getLocation(), diag::note_entity_declared_at) << Var;
	}
	}

	return true;
	}

	// Returns true if the capture by block was successful.
	static bool captureInBlock(BlockScopeInfo BSI, ValueDecl Var,
	SourceLocation Loc, const bool BuildAndDiagnose,
	QualType &CaptureType, QualType &DeclRefType,
	const bool Nested, Sema &S, bool Invalid) {
	bool ByRef = false;

	// Blocks are not allowed to capture arrays, excepting OpenCL.
	// OpenCL v2.0 s1.12.5 (revision 40): arrays are captured by reference
	// (decayed to pointers).
	if (!Invalid && !S.getLangOpts().OpenCL && CaptureType->isArrayType()) {
	if (BuildAndDiagnose) {
	S.Diag(Loc, diag::err_ref_array_type);
	S.Diag(Var->getLocation(), diag::note_previous_decl) << Var;
	Invalid = true;
	} else {
	return false;
	}
	}

	// Forbid the block-capture of autoreleasing variables.
	if (!Invalid &&
	CaptureType.getObjCLifetime() == Qualifiers::OCL_Autoreleasing) {
	if (BuildAndDiagnose) {
	S.Diag(Loc, diag::err_arc_autoreleasing_capture)
	<< /block/ 0;
	S.Diag(Var->getLocation(), diag::note_previous_decl) << Var;
	Invalid = true;
	} else {
	return false;
	}
	}

	// Warn about implicitly autoreleasing indirect parameters captured by blocks.
	if (const auto *PT = CaptureType->getAs<PointerType>()) {
	QualType PointeeTy = PT->getPointeeType();

	if (!Invalid && PointeeTy->getAs<ObjCObjectPointerType>() &&
	PointeeTy.getObjCLifetime() == Qualifiers::OCL_Autoreleasing &&
	!S.Context.hasDirectOwnershipQualifier(PointeeTy)) {
	if (BuildAndDiagnose) {
	SourceLocation VarLoc = Var->getLocation();
	S.Diag(Loc, diag::warn_block_capture_autoreleasing);
	S.Diag(VarLoc, diag::note_declare_parameter_strong);
	}
	}
	}

	const bool HasBlocksAttr = Var->hasAttr<BlocksAttr>();
	if (HasBlocksAttr \|\| CaptureType->isReferenceType() \|\|
	(S.getLangOpts().OpenMP && S.OpenMP().isOpenMPCapturedDecl(Var))) {
	// Block capture by reference does not change the capture or
	// declaration reference types.
	ByRef = true;
	} else {
	// Block capture by copy introduces 'const'.
	CaptureType = CaptureType.getNonReferenceType().withConst();
	DeclRefType = CaptureType;
	}

	// Actually capture the variable.
	if (BuildAndDiagnose)
	BSI->addCapture(Var, HasBlocksAttr, ByRef, Nested, Loc, SourceLocation(),
	CaptureType, Invalid);

	return !Invalid;
	}

	/// Capture the given variable in the captured region.
	static bool captureInCapturedRegion(
	CapturedRegionScopeInfo RSI, ValueDecl Var, SourceLocation Loc,
	const bool BuildAndDiagnose, QualType &CaptureType, QualType &DeclRefType,
	const bool RefersToCapturedVariable, Sema::TryCaptureKind Kind,
	bool IsTopScope, Sema &S, bool Invalid) {
	// By default, capture variables by reference.
	bool ByRef = true;
	if (IsTopScope && Kind != Sema::TryCapture_Implicit) {
	ByRef = (Kind == Sema::TryCapture_ExplicitByRef);
	} else if (S.getLangOpts().OpenMP && RSI->CapRegionKind == CR_OpenMP) {
	// Using an LValue reference type is consistent with Lambdas (see below).
	if (S.OpenMP().isOpenMPCapturedDecl(Var)) {
	bool HasConst = DeclRefType.isConstQualified();
	DeclRefType = DeclRefType.getUnqualifiedType();
	// Don't lose diagnostics about assignments to const.
	if (HasConst)
	DeclRefType.addConst();
	}
	// Do not capture firstprivates in tasks.
	if (S.OpenMP().isOpenMPPrivateDecl(Var, RSI->OpenMPLevel,
	RSI->OpenMPCaptureLevel) != OMPC_unknown)
	return true;
	ByRef = S.OpenMP().isOpenMPCapturedByRef(Var, RSI->OpenMPLevel,
	RSI->OpenMPCaptureLevel);
	}

	if (ByRef)
	CaptureType = S.Context.getLValueReferenceType(DeclRefType);
	else
	CaptureType = DeclRefType;

	// Actually capture the variable.
	if (BuildAndDiagnose)
	RSI->addCapture(Var, /isBlock/ false, ByRef, RefersToCapturedVariable,
	Loc, SourceLocation(), CaptureType, Invalid);

	return !Invalid;
	}

	/// Capture the given variable in the lambda.
	static bool captureInLambda(LambdaScopeInfo LSI, ValueDecl Var,
	SourceLocation Loc, const bool BuildAndDiagnose,
	QualType &CaptureType, QualType &DeclRefType,
	const bool RefersToCapturedVariable,
	const Sema::TryCaptureKind Kind,
	SourceLocation EllipsisLoc, const bool IsTopScope,
	Sema &S, bool Invalid) {
	// Determine whether we are capturing by reference or by value.
	bool ByRef = false;
	if (IsTopScope && Kind != Sema::TryCapture_Implicit) {
	ByRef = (Kind == Sema::TryCapture_ExplicitByRef);
	} else {
	ByRef = (LSI->ImpCaptureStyle == LambdaScopeInfo::ImpCap_LambdaByref);
	}

	if (BuildAndDiagnose && S.Context.getTargetInfo().getTriple().isWasm() &&
	CaptureType.getNonReferenceType().isWebAssemblyReferenceType()) {
	S.Diag(Loc, diag::err_wasm_ca_reference) << 0;
	Invalid = true;
	}

	// Compute the type of the field that will capture this variable.
	if (ByRef) {
	// C++11 [expr.prim.lambda]p15:
	// An entity is captured by reference if it is implicitly or
	// explicitly captured but not captured by copy. It is
	// unspecified whether additional unnamed non-static data
	// members are declared in the closure type for entities
	// captured by reference.
	//
	// FIXME: It is not clear whether we want to build an lvalue reference
	// to the DeclRefType or to CaptureType.getNonReferenceType(). GCC appears
	// to do the former, while EDG does the latter. Core issue 1249 will
	// clarify, but for now we follow GCC because it's a more permissive and
	// easily defensible position.
	CaptureType = S.Context.getLValueReferenceType(DeclRefType);
	} else {
	// C++11 [expr.prim.lambda]p14:
	// For each entity captured by copy, an unnamed non-static
	// data member is declared in the closure type. The
	// declaration order of these members is unspecified. The type
	// of such a data member is the type of the corresponding
	// captured entity if the entity is not a reference to an
	// object, or the referenced type otherwise. [Note: If the
	// captured entity is a reference to a function, the
	// corresponding data member is also a reference to a
	// function. - end note ]
	if (const ReferenceType *RefType = CaptureType->getAs<ReferenceType>()){
	if (!RefType->getPointeeType()->isFunctionType())
	CaptureType = RefType->getPointeeType();
	}

	// Forbid the lambda copy-capture of autoreleasing variables.
	if (!Invalid &&
	CaptureType.getObjCLifetime() == Qualifiers::OCL_Autoreleasing) {
	if (BuildAndDiagnose) {
	S.Diag(Loc, diag::err_arc_autoreleasing_capture) << /lambda/ 1;
	S.Diag(Var->getLocation(), diag::note_previous_decl)
	<< Var->getDeclName();
	Invalid = true;
	} else {
	return false;
	}
	}

	// Make sure that by-copy captures are of a complete and non-abstract type.
	if (!Invalid && BuildAndDiagnose) {
	if (!CaptureType->isDependentType() &&
	S.RequireCompleteSizedType(
	Loc, CaptureType,
	diag::err_capture_of_incomplete_or_sizeless_type,
	Var->getDeclName()))
	Invalid = true;
	else if (S.RequireNonAbstractType(Loc, CaptureType,
	diag::err_capture_of_abstract_type))
	Invalid = true;
	}
	}

	// Compute the type of a reference to this captured variable.
	if (ByRef)
	DeclRefType = CaptureType.getNonReferenceType();
	else {
	// C++ [expr.prim.lambda]p5:
	// The closure type for a lambda-expression has a public inline
	// function call operator [...]. This function call operator is
	// declared const (9.3.1) if and only if the lambda-expression's
	// parameter-declaration-clause is not followed by mutable.
	DeclRefType = CaptureType.getNonReferenceType();
	bool Const = LSI->lambdaCaptureShouldBeConst();
	if (Const && !CaptureType->isReferenceType())
	DeclRefType.addConst();
	}

	// Add the capture.
	if (BuildAndDiagnose)
	LSI->addCapture(Var, /isBlock=/false, ByRef, RefersToCapturedVariable,
	Loc, EllipsisLoc, CaptureType, Invalid);

	return !Invalid;
	}

	static bool canCaptureVariableByCopy(ValueDecl *Var,
	const ASTContext &Context) {
	// Offer a Copy fix even if the type is dependent.
	if (Var->getType()->isDependentType())
	return true;
	QualType T = Var->getType().getNonReferenceType();
	if (T.isTriviallyCopyableType(Context))
	return true;
	if (CXXRecordDecl *RD = T->getAsCXXRecordDecl()) {

	if (!(RD = RD->getDefinition()))
	return false;
	if (RD->hasSimpleCopyConstructor())
	return true;
	if (RD->hasUserDeclaredCopyConstructor())
	for (CXXConstructorDecl *Ctor : RD->ctors())
	if (Ctor->isCopyConstructor())
	return !Ctor->isDeleted();
	}
	return false;
	}

	/// Create up to 4 fix-its for explicit reference and value capture of \p Var or
	/// default capture. Fixes may be omitted if they aren't allowed by the
	/// standard, for example we can't emit a default copy capture fix-it if we
	/// already explicitly copy capture capture another variable.
	static void buildLambdaCaptureFixit(Sema &Sema, LambdaScopeInfo *LSI,
	ValueDecl *Var) {
	assert(LSI->ImpCaptureStyle == CapturingScopeInfo::ImpCap_None);
	// Don't offer Capture by copy of default capture by copy fixes if Var is
	// known not to be copy constructible.
	bool ShouldOfferCopyFix = canCaptureVariableByCopy(Var, Sema.getASTContext());

	SmallString<32> FixBuffer;
	StringRef Separator = LSI->NumExplicitCaptures > 0 ? ", " : "";
	if (Var->getDeclName().isIdentifier() && !Var->getName().empty()) {
	SourceLocation VarInsertLoc = LSI->IntroducerRange.getEnd();
	if (ShouldOfferCopyFix) {
	// Offer fixes to insert an explicit capture for the variable.
	// [] -> [VarName]
	// [OtherCapture] -> [OtherCapture, VarName]
	FixBuffer.assign({Separator, Var->getName()});
	Sema.Diag(VarInsertLoc, diag::note_lambda_variable_capture_fixit)
	<< Var << /value/ 0
	<< FixItHint::CreateInsertion(VarInsertLoc, FixBuffer);
	}
	// As above but capture by reference.
	FixBuffer.assign({Separator, "&", Var->getName()});
	Sema.Diag(VarInsertLoc, diag::note_lambda_variable_capture_fixit)
	<< Var << /reference/ 1
	<< FixItHint::CreateInsertion(VarInsertLoc, FixBuffer);
	}

	// Only try to offer default capture if there are no captures excluding this
	// and init captures.
	// [this]: OK.
	// [X = Y]: OK.
	// [&A, &B]: Don't offer.
	// [A, B]: Don't offer.
	if (llvm::any_of(LSI->Captures, [](Capture &C) {
	return !C.isThisCapture() && !C.isInitCapture();
	}))
	return;

	// The default capture specifiers, '=' or '&', must appear first in the
	// capture body.
	SourceLocation DefaultInsertLoc =
	LSI->IntroducerRange.getBegin().getLocWithOffset(1);

	if (ShouldOfferCopyFix) {
	bool CanDefaultCopyCapture = true;
	// [=, *this] OK since c++17
	// [=, this] OK since c++20
	if (LSI->isCXXThisCaptured() && !Sema.getLangOpts().CPlusPlus20)
	CanDefaultCopyCapture = Sema.getLangOpts().CPlusPlus17
	? LSI->getCXXThisCapture().isCopyCapture()
	: false;
	// We can't use default capture by copy if any captures already specified
	// capture by copy.
	if (CanDefaultCopyCapture && llvm::none_of(LSI->Captures, [](Capture &C) {
	return !C.isThisCapture() && !C.isInitCapture() && C.isCopyCapture();
	})) {
	FixBuffer.assign({"=", Separator});
	Sema.Diag(DefaultInsertLoc, diag::note_lambda_default_capture_fixit)
	<< /value/ 0
	<< FixItHint::CreateInsertion(DefaultInsertLoc, FixBuffer);
	}
	}

	// We can't use default capture by reference if any captures already specified
	// capture by reference.
	if (llvm::none_of(LSI->Captures, [](Capture &C) {
	return !C.isInitCapture() && C.isReferenceCapture() &&
	!C.isThisCapture();
	})) {
	FixBuffer.assign({"&", Separator});
	Sema.Diag(DefaultInsertLoc, diag::note_lambda_default_capture_fixit)
	<< /reference/ 1
	<< FixItHint::CreateInsertion(DefaultInsertLoc, FixBuffer);
	}
	}

	bool Sema::tryCaptureVariable(
	ValueDecl *Var, SourceLocation ExprLoc, TryCaptureKind Kind,
	SourceLocation EllipsisLoc, bool BuildAndDiagnose, QualType &CaptureType,
	QualType &DeclRefType, const unsigned *const FunctionScopeIndexToStopAt) {
	// An init-capture is notionally from the context surrounding its
	// declaration, but its parent DC is the lambda class.
	DeclContext *VarDC = Var->getDeclContext();
	DeclContext *DC = CurContext;

	// Skip past RequiresExprBodys because they don't constitute function scopes.
	while (DC->isRequiresExprBody())
	DC = DC->getParent();

	// tryCaptureVariable is called every time a DeclRef is formed,
	// it can therefore have non-negigible impact on performances.
	// For local variables and when there is no capturing scope,
	// we can bailout early.
	if (CapturingFunctionScopes == 0 && (!BuildAndDiagnose \|\| VarDC == DC))
	return true;

	// Exception: Function parameters are not tied to the function's DeclContext
	// until we enter the function definition. Capturing them anyway would result
	// in an out-of-bounds error while traversing DC and its parents.
	if (isa<ParmVarDecl>(Var) && !VarDC->isFunctionOrMethod())
	return true;

	const auto *VD = dyn_cast<VarDecl>(Var);
	if (VD) {
	if (VD->isInitCapture())
	VarDC = VarDC->getParent();
	} else {
	VD = Var->getPotentiallyDecomposedVarDecl();
	}
	assert(VD && "Cannot capture a null variable");

	const unsigned MaxFunctionScopesIndex = FunctionScopeIndexToStopAt
	? *FunctionScopeIndexToStopAt : FunctionScopes.size() - 1;
	// We need to sync up the Declaration Context with the
	// FunctionScopeIndexToStopAt
	if (FunctionScopeIndexToStopAt) {
	unsigned FSIndex = FunctionScopes.size() - 1;
	while (FSIndex != MaxFunctionScopesIndex) {
	DC = getLambdaAwareParentOfDeclContext(DC);
	--FSIndex;
	}
	}

	// Capture global variables if it is required to use private copy of this
	// variable.
	bool IsGlobal = !VD->hasLocalStorage();
	if (IsGlobal && !(LangOpts.OpenMP &&
	OpenMP().isOpenMPCapturedDecl(Var, /CheckScopeInfo=/true,
	MaxFunctionScopesIndex)))
	return true;

	if (isa<VarDecl>(Var))
	Var = cast<VarDecl>(Var->getCanonicalDecl());

	// Walk up the stack to determine whether we can capture the variable,
	// performing the "simple" checks that don't depend on type. We stop when
	// we've either hit the declared scope of the variable or find an existing
	// capture of that variable. We start from the innermost capturing-entity
	// (the DC) and ensure that all intervening capturing-entities
	// (blocks/lambdas etc.) between the innermost capturer and the variable`s
	// declcontext can either capture the variable or have already captured
	// the variable.
	CaptureType = Var->getType();
	DeclRefType = CaptureType.getNonReferenceType();
	bool Nested = false;
	bool Explicit = (Kind != TryCapture_Implicit);
	unsigned FunctionScopesIndex = MaxFunctionScopesIndex;
	do {

	LambdaScopeInfo *LSI = nullptr;
	if (!FunctionScopes.empty())
	LSI = dyn_cast_or_null<LambdaScopeInfo>(
	FunctionScopes[FunctionScopesIndex]);

	bool IsInScopeDeclarationContext =
	!LSI \|\| LSI->AfterParameterList \|\| CurContext == LSI->CallOperator;

	if (LSI && !LSI->AfterParameterList) {
	// This allows capturing parameters from a default value which does not
	// seems correct
	if (isa<ParmVarDecl>(Var) && !Var->getDeclContext()->isFunctionOrMethod())
	return true;
	}
	// If the variable is declared in the current context, there is no need to
	// capture it.
	if (IsInScopeDeclarationContext &&
	FunctionScopesIndex == MaxFunctionScopesIndex && VarDC == DC)
	return true;

	// Only block literals, captured statements, and lambda expressions can
	// capture; other scopes don't work.
	DeclContext *ParentDC =
	!IsInScopeDeclarationContext
	? DC->getParent()
	: getParentOfCapturingContextOrNull(DC, Var, ExprLoc,
	BuildAndDiagnose, *this);
	// We need to check for the parent first because, if we have
	// private-captured a global variable, we need to recursively capture it in
	// intermediate blocks, lambdas, etc.
	if (!ParentDC) {
	if (IsGlobal) {
	FunctionScopesIndex = MaxFunctionScopesIndex - 1;
	break;
	}
	return true;
	}

	FunctionScopeInfo *FSI = FunctionScopes[FunctionScopesIndex];
	CapturingScopeInfo *CSI = cast<CapturingScopeInfo>(FSI);

	// Check whether we've already captured it.
	if (isVariableAlreadyCapturedInScopeInfo(CSI, Var, Nested, CaptureType,
	DeclRefType)) {
	CSI->getCapture(Var).markUsed(BuildAndDiagnose);
	break;
	}

	// When evaluating some attributes (like enable_if) we might refer to a
	// function parameter appertaining to the same declaration as that
	// attribute.
	if (const auto *Parm = dyn_cast<ParmVarDecl>(Var);
	Parm && Parm->getDeclContext() == DC)
	return true;

	// If we are instantiating a generic lambda call operator body,
	// we do not want to capture new variables. What was captured
	// during either a lambdas transformation or initial parsing
	// should be used.
	if (isGenericLambdaCallOperatorSpecialization(DC)) {
	if (BuildAndDiagnose) {
	LambdaScopeInfo *LSI = cast<LambdaScopeInfo>(CSI);
	if (LSI->ImpCaptureStyle == CapturingScopeInfo::ImpCap_None) {
	Diag(ExprLoc, diag::err_lambda_impcap) << Var;
	Diag(Var->getLocation(), diag::note_previous_decl) << Var;
	Diag(LSI->Lambda->getBeginLoc(), diag::note_lambda_decl);
	buildLambdaCaptureFixit(*this, LSI, Var);
	} else
	diagnoseUncapturableValueReferenceOrBinding(*this, ExprLoc, Var);
	}
	return true;
	}

	// Try to capture variable-length arrays types.
	if (Var->getType()->isVariablyModifiedType()) {
	// We're going to walk down into the type and look for VLA
	// expressions.
	QualType QTy = Var->getType();
	if (ParmVarDecl *PVD = dyn_cast_or_null<ParmVarDecl>(Var))
	QTy = PVD->getOriginalType();
	captureVariablyModifiedType(Context, QTy, CSI);
	}

	if (getLangOpts().OpenMP) {
	if (auto *RSI = dyn_cast<CapturedRegionScopeInfo>(CSI)) {
	// OpenMP private variables should not be captured in outer scope, so
	// just break here. Similarly, global variables that are captured in a
	// target region should not be captured outside the scope of the region.
	if (RSI->CapRegionKind == CR_OpenMP) {
	// FIXME: We should support capturing structured bindings in OpenMP.
	if (isa<BindingDecl>(Var)) {
	if (BuildAndDiagnose) {
	Diag(ExprLoc, diag::err_capture_binding_openmp) << Var;
	Diag(Var->getLocation(), diag::note_entity_declared_at) << Var;
	}
	return true;
	}
	OpenMPClauseKind IsOpenMPPrivateDecl = OpenMP().isOpenMPPrivateDecl(
	Var, RSI->OpenMPLevel, RSI->OpenMPCaptureLevel);
	// If the variable is private (i.e. not captured) and has variably
	// modified type, we still need to capture the type for correct
	// codegen in all regions, associated with the construct. Currently,
	// it is captured in the innermost captured region only.
	if (IsOpenMPPrivateDecl != OMPC_unknown &&
	Var->getType()->isVariablyModifiedType()) {
	QualType QTy = Var->getType();
	if (ParmVarDecl *PVD = dyn_cast_or_null<ParmVarDecl>(Var))
	QTy = PVD->getOriginalType();
	for (int I = 1,
	E = OpenMP().getNumberOfConstructScopes(RSI->OpenMPLevel);
	I < E; ++I) {
	auto *OuterRSI = cast<CapturedRegionScopeInfo>(
	FunctionScopes[FunctionScopesIndex - I]);
	assert(RSI->OpenMPLevel == OuterRSI->OpenMPLevel &&
	"Wrong number of captured regions associated with the "
	"OpenMP construct.");
	captureVariablyModifiedType(Context, QTy, OuterRSI);
	}
	}
	bool IsTargetCap =
	IsOpenMPPrivateDecl != OMPC_private &&
	OpenMP().isOpenMPTargetCapturedDecl(Var, RSI->OpenMPLevel,
	RSI->OpenMPCaptureLevel);
	// Do not capture global if it is not privatized in outer regions.
	bool IsGlobalCap =
	IsGlobal && OpenMP().isOpenMPGlobalCapturedDecl(
	Var, RSI->OpenMPLevel, RSI->OpenMPCaptureLevel);

	// When we detect target captures we are looking from inside the
	// target region, therefore we need to propagate the capture from the
	// enclosing region. Therefore, the capture is not initially nested.
	if (IsTargetCap)
	OpenMP().adjustOpenMPTargetScopeIndex(FunctionScopesIndex,
	RSI->OpenMPLevel);

	if (IsTargetCap \|\| IsOpenMPPrivateDecl == OMPC_private \|\|
	(IsGlobal && !IsGlobalCap)) {
	Nested = !IsTargetCap;
	bool HasConst = DeclRefType.isConstQualified();
	DeclRefType = DeclRefType.getUnqualifiedType();
	// Don't lose diagnostics about assignments to const.
	if (HasConst)
	DeclRefType.addConst();
	CaptureType = Context.getLValueReferenceType(DeclRefType);
	break;
	}
	}
	}
	}
	if (CSI->ImpCaptureStyle == CapturingScopeInfo::ImpCap_None && !Explicit) {
	// No capture-default, and this is not an explicit capture
	// so cannot capture this variable.
	if (BuildAndDiagnose) {
	Diag(ExprLoc, diag::err_lambda_impcap) << Var;
	Diag(Var->getLocation(), diag::note_previous_decl) << Var;
	auto *LSI = cast<LambdaScopeInfo>(CSI);
	if (LSI->Lambda) {
	Diag(LSI->Lambda->getBeginLoc(), diag::note_lambda_decl);
	buildLambdaCaptureFixit(*this, LSI, Var);
	}
	// FIXME: If we error out because an outer lambda can not implicitly
	// capture a variable that an inner lambda explicitly captures, we
	// should have the inner lambda do the explicit capture - because
	// it makes for cleaner diagnostics later. This would purely be done
	// so that the diagnostic does not misleadingly claim that a variable
	// can not be captured by a lambda implicitly even though it is captured
	// explicitly. Suggestion:
	// - create const bool VariableCaptureWasInitiallyExplicit = Explicit
	// at the function head
	// - cache the StartingDeclContext - this must be a lambda
	// - captureInLambda in the innermost lambda the variable.
	}
	return true;
	}
	Explicit = false;
	FunctionScopesIndex--;
	if (IsInScopeDeclarationContext)
	DC = ParentDC;
	} while (!VarDC->Equals(DC));

	// Walk back down the scope stack, (e.g. from outer lambda to inner lambda)
	// computing the type of the capture at each step, checking type-specific
	// requirements, and adding captures if requested.
	// If the variable had already been captured previously, we start capturing
	// at the lambda nested within that one.
	bool Invalid = false;
	for (unsigned I = ++FunctionScopesIndex, N = MaxFunctionScopesIndex + 1; I != N;
	++I) {
	CapturingScopeInfo *CSI = cast<CapturingScopeInfo>(FunctionScopes[I]);

	// Certain capturing entities (lambdas, blocks etc.) are not allowed to capture
	// certain types of variables (unnamed, variably modified types etc.)
	// so check for eligibility.
	if (!Invalid)
	Invalid =
	!isVariableCapturable(CSI, Var, ExprLoc, BuildAndDiagnose, *this);

	// After encountering an error, if we're actually supposed to capture, keep
	// capturing in nested contexts to suppress any follow-on diagnostics.
	if (Invalid && !BuildAndDiagnose)
	return true;

	if (BlockScopeInfo *BSI = dyn_cast<BlockScopeInfo>(CSI)) {
	Invalid = !captureInBlock(BSI, Var, ExprLoc, BuildAndDiagnose, CaptureType,
	DeclRefType, Nested, *this, Invalid);
	Nested = true;
	} else if (CapturedRegionScopeInfo *RSI = dyn_cast<CapturedRegionScopeInfo>(CSI)) {
	Invalid = !captureInCapturedRegion(
	RSI, Var, ExprLoc, BuildAndDiagnose, CaptureType, DeclRefType, Nested,
	Kind, /IsTopScope/ I == N - 1, *this, Invalid);
	Nested = true;
	} else {
	LambdaScopeInfo *LSI = cast<LambdaScopeInfo>(CSI);
	Invalid =
	!captureInLambda(LSI, Var, ExprLoc, BuildAndDiagnose, CaptureType,
	DeclRefType, Nested, Kind, EllipsisLoc,
	/IsTopScope/ I == N - 1, *this, Invalid);
	Nested = true;
	}

	if (Invalid && !BuildAndDiagnose)
	return true;
	}
	return Invalid;
	}

	bool Sema::tryCaptureVariable(ValueDecl *Var, SourceLocation Loc,
	TryCaptureKind Kind, SourceLocation EllipsisLoc) {
	QualType CaptureType;
	QualType DeclRefType;
	return tryCaptureVariable(Var, Loc, Kind, EllipsisLoc,
	/BuildAndDiagnose=/true, CaptureType,
	DeclRefType, nullptr);
	}

	bool Sema::NeedToCaptureVariable(ValueDecl *Var, SourceLocation Loc) {
	QualType CaptureType;
	QualType DeclRefType;
	return !tryCaptureVariable(Var, Loc, TryCapture_Implicit, SourceLocation(),
	/BuildAndDiagnose=/false, CaptureType,
	DeclRefType, nullptr);
	}

	QualType Sema::getCapturedDeclRefType(ValueDecl *Var, SourceLocation Loc) {
	QualType CaptureType;
	QualType DeclRefType;

	// Determine whether we can capture this variable.
	if (tryCaptureVariable(Var, Loc, TryCapture_Implicit, SourceLocation(),
	/BuildAndDiagnose=/false, CaptureType,
	DeclRefType, nullptr))
	return QualType();

	return DeclRefType;
	}

	namespace {
	// Helper to copy the template arguments from a DeclRefExpr or MemberExpr.
	// The produced TemplateArgumentListInfo* points to data stored within this
	// object, so should only be used in contexts where the pointer will not be
	// used after the CopiedTemplateArgs object is destroyed.
	class CopiedTemplateArgs {
	bool HasArgs;
	TemplateArgumentListInfo TemplateArgStorage;
	public:
	template<typename RefExpr>
	CopiedTemplateArgs(RefExpr *E) : HasArgs(E->hasExplicitTemplateArgs()) {
	if (HasArgs)
	E->copyTemplateArgumentsInto(TemplateArgStorage);
	}
	operator TemplateArgumentListInfo*()
	#ifdef __has_cpp_attribute
	#if __has_cpp_attribute(clang::lifetimebound)
	[[clang::lifetimebound]]
	#endif
	#endif
	{
	return HasArgs ? &TemplateArgStorage : nullptr;
	}
	};
	}

	/// Walk the set of potential results of an expression and mark them all as
	/// non-odr-uses if they satisfy the side-conditions of the NonOdrUseReason.
	///
	/// \return A new expression if we found any potential results, ExprEmpty() if
	/// not, and ExprError() if we diagnosed an error.
	static ExprResult rebuildPotentialResultsAsNonOdrUsed(Sema &S, Expr *E,
	NonOdrUseReason NOUR) {
	// Per C++11 [basic.def.odr], a variable is odr-used "unless it is
	// an object that satisfies the requirements for appearing in a
	// constant expression (5.19) and the lvalue-to-rvalue conversion (4.1)
	// is immediately applied." This function handles the lvalue-to-rvalue
	// conversion part.
	//
	// If we encounter a node that claims to be an odr-use but shouldn't be, we
	// transform it into the relevant kind of non-odr-use node and rebuild the
	// tree of nodes leading to it.
	//
	// This is a mini-TreeTransform that only transforms a restricted subset of
	// nodes (and only certain operands of them).

	// Rebuild a subexpression.
	auto Rebuild = [&](Expr *Sub) {
	return rebuildPotentialResultsAsNonOdrUsed(S, Sub, NOUR);
	};

	// Check whether a potential result satisfies the requirements of NOUR.
	auto IsPotentialResultOdrUsed = [&](NamedDecl *D) {
	// Any entity other than a VarDecl is always odr-used whenever it's named
	// in a potentially-evaluated expression.
	auto *VD = dyn_cast<VarDecl>(D);
	if (!VD)
	return true;

	// C++2a [basic.def.odr]p4:
	// A variable x whose name appears as a potentially-evalauted expression
	// e is odr-used by e unless
	// -- x is a reference that is usable in constant expressions, or
	// -- x is a variable of non-reference type that is usable in constant
	// expressions and has no mutable subobjects, and e is an element of
	// the set of potential results of an expression of
	// non-volatile-qualified non-class type to which the lvalue-to-rvalue
	// conversion is applied, or
	// -- x is a variable of non-reference type, and e is an element of the
	// set of potential results of a discarded-value expression to which
	// the lvalue-to-rvalue conversion is not applied
	//
	// We check the first bullet and the "potentially-evaluated" condition in
	// BuildDeclRefExpr. We check the type requirements in the second bullet
	// in CheckLValueToRValueConversionOperand below.
	switch (NOUR) {
	case NOUR_None:
	case NOUR_Unevaluated:
	llvm_unreachable("unexpected non-odr-use-reason");

	case NOUR_Constant:
	// Constant references were handled when they were built.
	if (VD->getType()->isReferenceType())
	return true;
	if (auto *RD = VD->getType()->getAsCXXRecordDecl())
	if (RD->hasMutableFields())
	return true;
	if (!VD->isUsableInConstantExpressions(S.Context))
	return true;
	break;

	case NOUR_Discarded:
	if (VD->getType()->isReferenceType())
	return true;
	break;
	}
	return false;
	};

	// Mark that this expression does not constitute an odr-use.
	auto MarkNotOdrUsed = [&] {
	S.MaybeODRUseExprs.remove(E);
	if (LambdaScopeInfo *LSI = S.getCurLambda())
	LSI->markVariableExprAsNonODRUsed(E);
	};

	// C++2a [basic.def.odr]p2:
	// The set of potential results of an expression e is defined as follows:
	switch (E->getStmtClass()) {
	// -- If e is an id-expression, ...
	case Expr::DeclRefExprClass: {
	auto *DRE = cast<DeclRefExpr>(E);
	if (DRE->isNonOdrUse() \|\| IsPotentialResultOdrUsed(DRE->getDecl()))
	break;

	// Rebuild as a non-odr-use DeclRefExpr.
	MarkNotOdrUsed();
	return DeclRefExpr::Create(
	S.Context, DRE->getQualifierLoc(), DRE->getTemplateKeywordLoc(),
	DRE->getDecl(), DRE->refersToEnclosingVariableOrCapture(),
	DRE->getNameInfo(), DRE->getType(), DRE->getValueKind(),
	DRE->getFoundDecl(), CopiedTemplateArgs(DRE), NOUR);
	}

	case Expr::FunctionParmPackExprClass: {
	auto *FPPE = cast<FunctionParmPackExpr>(E);
	// If any of the declarations in the pack is odr-used, then the expression
	// as a whole constitutes an odr-use.
	for (VarDecl D : FPPE)
	if (IsPotentialResultOdrUsed(D))
	return ExprEmpty();

	// FIXME: Rebuild as a non-odr-use FunctionParmPackExpr? In practice,
	// nothing cares about whether we marked this as an odr-use, but it might
	// be useful for non-compiler tools.
	MarkNotOdrUsed();
	break;
	}

	// -- If e is a subscripting operation with an array operand...
	case Expr::ArraySubscriptExprClass: {
	auto *ASE = cast<ArraySubscriptExpr>(E);
	Expr *OldBase = ASE->getBase()->IgnoreImplicit();
	if (!OldBase->getType()->isArrayType())
	break;
	ExprResult Base = Rebuild(OldBase);
	if (!Base.isUsable())
	return Base;
	Expr *LHS = ASE->getBase() == ASE->getLHS() ? Base.get() : ASE->getLHS();
	Expr *RHS = ASE->getBase() == ASE->getRHS() ? Base.get() : ASE->getRHS();
	SourceLocation LBracketLoc = ASE->getBeginLoc(); // FIXME: Not stored.
	return S.ActOnArraySubscriptExpr(nullptr, LHS, LBracketLoc, RHS,
	ASE->getRBracketLoc());
	}

	case Expr::MemberExprClass: {
	auto *ME = cast<MemberExpr>(E);
	// -- If e is a class member access expression [...] naming a non-static
	// data member...
	if (isa<FieldDecl>(ME->getMemberDecl())) {
	ExprResult Base = Rebuild(ME->getBase());
	if (!Base.isUsable())
	return Base;
	return MemberExpr::Create(
	S.Context, Base.get(), ME->isArrow(), ME->getOperatorLoc(),
	ME->getQualifierLoc(), ME->getTemplateKeywordLoc(),
	ME->getMemberDecl(), ME->getFoundDecl(), ME->getMemberNameInfo(),
	CopiedTemplateArgs(ME), ME->getType(), ME->getValueKind(),
	ME->getObjectKind(), ME->isNonOdrUse());
	}

	if (ME->getMemberDecl()->isCXXInstanceMember())
	break;

	// -- If e is a class member access expression naming a static data member,
	// ...
	if (ME->isNonOdrUse() \|\| IsPotentialResultOdrUsed(ME->getMemberDecl()))
	break;

	// Rebuild as a non-odr-use MemberExpr.
	MarkNotOdrUsed();
	return MemberExpr::Create(
	S.Context, ME->getBase(), ME->isArrow(), ME->getOperatorLoc(),
	ME->getQualifierLoc(), ME->getTemplateKeywordLoc(), ME->getMemberDecl(),
	ME->getFoundDecl(), ME->getMemberNameInfo(), CopiedTemplateArgs(ME),
	ME->getType(), ME->getValueKind(), ME->getObjectKind(), NOUR);
	}

	case Expr::BinaryOperatorClass: {
	auto *BO = cast<BinaryOperator>(E);
	Expr *LHS = BO->getLHS();
	Expr *RHS = BO->getRHS();
	// -- If e is a pointer-to-member expression of the form e1 .* e2 ...
	if (BO->getOpcode() == BO_PtrMemD) {
	ExprResult Sub = Rebuild(LHS);
	if (!Sub.isUsable())
	return Sub;
	BO->setLHS(Sub.get());
	// -- If e is a comma expression, ...
	} else if (BO->getOpcode() == BO_Comma) {
	ExprResult Sub = Rebuild(RHS);
	if (!Sub.isUsable())
	return Sub;
	BO->setRHS(Sub.get());
	} else {
	break;
	}
	return ExprResult(BO);
	}

	// -- If e has the form (e1)...
	case Expr::ParenExprClass: {
	auto *PE = cast<ParenExpr>(E);
	ExprResult Sub = Rebuild(PE->getSubExpr());
	if (!Sub.isUsable())
	return Sub;
	return S.ActOnParenExpr(PE->getLParen(), PE->getRParen(), Sub.get());
	}

	// -- If e is a glvalue conditional expression, ...
	// We don't apply this to a binary conditional operator. FIXME: Should we?
	case Expr::ConditionalOperatorClass: {
	auto *CO = cast<ConditionalOperator>(E);
	ExprResult LHS = Rebuild(CO->getLHS());
	if (LHS.isInvalid())
	return ExprError();
	ExprResult RHS = Rebuild(CO->getRHS());
	if (RHS.isInvalid())
	return ExprError();
	if (!LHS.isUsable() && !RHS.isUsable())
	return ExprEmpty();
	if (!LHS.isUsable())
	LHS = CO->getLHS();
	if (!RHS.isUsable())
	RHS = CO->getRHS();
	return S.ActOnConditionalOp(CO->getQuestionLoc(), CO->getColonLoc(),
	CO->getCond(), LHS.get(), RHS.get());
	}

	// [Clang extension]
	// -- If e has the form __extension__ e1...
	case Expr::UnaryOperatorClass: {
	auto *UO = cast<UnaryOperator>(E);
	if (UO->getOpcode() != UO_Extension)
	break;
	ExprResult Sub = Rebuild(UO->getSubExpr());
	if (!Sub.isUsable())
	return Sub;
	return S.BuildUnaryOp(nullptr, UO->getOperatorLoc(), UO_Extension,
	Sub.get());
	}

	// [Clang extension]
	// -- If e has the form _Generic(...), the set of potential results is the
	// union of the sets of potential results of the associated expressions.
	case Expr::GenericSelectionExprClass: {
	auto *GSE = cast<GenericSelectionExpr>(E);

	SmallVector<Expr *, 4> AssocExprs;
	bool AnyChanged = false;
	for (Expr *OrigAssocExpr : GSE->getAssocExprs()) {
	ExprResult AssocExpr = Rebuild(OrigAssocExpr);
	if (AssocExpr.isInvalid())
	return ExprError();
	if (AssocExpr.isUsable()) {
	AssocExprs.push_back(AssocExpr.get());
	AnyChanged = true;
	} else {
	AssocExprs.push_back(OrigAssocExpr);
	}
	}

	void *ExOrTy = nullptr;
	bool IsExpr = GSE->isExprPredicate();
	if (IsExpr)
	ExOrTy = GSE->getControllingExpr();
	else
	ExOrTy = GSE->getControllingType();
	return AnyChanged ? S.CreateGenericSelectionExpr(
	GSE->getGenericLoc(), GSE->getDefaultLoc(),
	GSE->getRParenLoc(), IsExpr, ExOrTy,
	GSE->getAssocTypeSourceInfos(), AssocExprs)
	: ExprEmpty();
	}

	// [Clang extension]
	// -- If e has the form __builtin_choose_expr(...), the set of potential
	// results is the union of the sets of potential results of the
	// second and third subexpressions.
	case Expr::ChooseExprClass: {
	auto *CE = cast<ChooseExpr>(E);

	ExprResult LHS = Rebuild(CE->getLHS());
	if (LHS.isInvalid())
	return ExprError();

	ExprResult RHS = Rebuild(CE->getLHS());
	if (RHS.isInvalid())
	return ExprError();

	if (!LHS.get() && !RHS.get())
	return ExprEmpty();
	if (!LHS.isUsable())
	LHS = CE->getLHS();
	if (!RHS.isUsable())
	RHS = CE->getRHS();

	return S.ActOnChooseExpr(CE->getBuiltinLoc(), CE->getCond(), LHS.get(),
	RHS.get(), CE->getRParenLoc());
	}

	// Step through non-syntactic nodes.
	case Expr::ConstantExprClass: {
	auto *CE = cast<ConstantExpr>(E);
	ExprResult Sub = Rebuild(CE->getSubExpr());
	if (!Sub.isUsable())
	return Sub;
	return ConstantExpr::Create(S.Context, Sub.get());
	}

	// We could mostly rely on the recursive rebuilding to rebuild implicit
	// casts, but not at the top level, so rebuild them here.
	case Expr::ImplicitCastExprClass: {
	auto *ICE = cast<ImplicitCastExpr>(E);
	// Only step through the narrow set of cast kinds we expect to encounter.
	// Anything else suggests we've left the region in which potential results
	// can be found.
	switch (ICE->getCastKind()) {
	case CK_NoOp:
	case CK_DerivedToBase:
	case CK_UncheckedDerivedToBase: {
	ExprResult Sub = Rebuild(ICE->getSubExpr());
	if (!Sub.isUsable())
	return Sub;
	CXXCastPath Path(ICE->path());
	return S.ImpCastExprToType(Sub.get(), ICE->getType(), ICE->getCastKind(),
	ICE->getValueKind(), &Path);
	}

	default:
	break;
	}
	break;
	}

	default:
	break;
	}

	// Can't traverse through this node. Nothing to do.
	return ExprEmpty();
	}

	ExprResult Sema::CheckLValueToRValueConversionOperand(Expr *E) {
	// Check whether the operand is or contains an object of non-trivial C union
	// type.
	if (E->getType().isVolatileQualified() &&
	(E->getType().hasNonTrivialToPrimitiveDestructCUnion() \|\|
	E->getType().hasNonTrivialToPrimitiveCopyCUnion()))
	checkNonTrivialCUnion(E->getType(), E->getExprLoc(),
	Sema::NTCUC_LValueToRValueVolatile,
	NTCUK_Destruct\|NTCUK_Copy);

	// C++2a [basic.def.odr]p4:
	// [...] an expression of non-volatile-qualified non-class type to which
	// the lvalue-to-rvalue conversion is applied [...]
	if (E->getType().isVolatileQualified() \|\| E->getType()->getAs<RecordType>())
	return E;

	ExprResult Result =
	rebuildPotentialResultsAsNonOdrUsed(*this, E, NOUR_Constant);
	if (Result.isInvalid())
	return ExprError();
	return Result.get() ? Result : E;
	}

	ExprResult Sema::ActOnConstantExpression(ExprResult Res) {
	Res = CorrectDelayedTyposInExpr(Res);

	if (!Res.isUsable())
	return Res;

	// If a constant-expression is a reference to a variable where we delay
	// deciding whether it is an odr-use, just assume we will apply the
	// lvalue-to-rvalue conversion. In the one case where this doesn't happen
	// (a non-type template argument), we have special handling anyway.
	return CheckLValueToRValueConversionOperand(Res.get());
	}

	void Sema::CleanupVarDeclMarking() {
	// Iterate through a local copy in case MarkVarDeclODRUsed makes a recursive
	// call.
	MaybeODRUseExprSet LocalMaybeODRUseExprs;
	std::swap(LocalMaybeODRUseExprs, MaybeODRUseExprs);

	for (Expr *E : LocalMaybeODRUseExprs) {
	if (auto *DRE = dyn_cast<DeclRefExpr>(E)) {
	MarkVarDeclODRUsed(cast<VarDecl>(DRE->getDecl()),
	DRE->getLocation(), *this);
	} else if (auto *ME = dyn_cast<MemberExpr>(E)) {
	MarkVarDeclODRUsed(cast<VarDecl>(ME->getMemberDecl()), ME->getMemberLoc(),
	*this);
	} else if (auto *FP = dyn_cast<FunctionParmPackExpr>(E)) {
	for (VarDecl VD : FP)
	MarkVarDeclODRUsed(VD, FP->getParameterPackLocation(), *this);
	} else {
	llvm_unreachable("Unexpected expression");
	}
	}

	assert(MaybeODRUseExprs.empty() &&
	"MarkVarDeclODRUsed failed to cleanup MaybeODRUseExprs?");
	}

	static void DoMarkPotentialCapture(Sema &SemaRef, SourceLocation Loc,
	ValueDecl Var, Expr E) {
	VarDecl *VD = Var->getPotentiallyDecomposedVarDecl();
	if (!VD)
	return;

	const bool RefersToEnclosingScope =
	(SemaRef.CurContext != VD->getDeclContext() &&
	VD->getDeclContext()->isFunctionOrMethod() && VD->hasLocalStorage());
	if (RefersToEnclosingScope) {
	LambdaScopeInfo *const LSI =
	SemaRef.getCurLambda(/IgnoreNonLambdaCapturingScope=/true);
	if (LSI && (!LSI->CallOperator \|\|
	!LSI->CallOperator->Encloses(Var->getDeclContext()))) {
	// If a variable could potentially be odr-used, defer marking it so
	// until we finish analyzing the full expression for any
	// lvalue-to-rvalue
	// or discarded value conversions that would obviate odr-use.
	// Add it to the list of potential captures that will be analyzed
	// later (ActOnFinishFullExpr) for eventual capture and odr-use marking
	// unless the variable is a reference that was initialized by a constant
	// expression (this will never need to be captured or odr-used).
	//
	// FIXME: We can simplify this a lot after implementing P0588R1.
	assert(E && "Capture variable should be used in an expression.");
	if (!Var->getType()->isReferenceType() \|\|
	!VD->isUsableInConstantExpressions(SemaRef.Context))
	LSI->addPotentialCapture(E->IgnoreParens());
	}
	}
	}

	static void DoMarkVarDeclReferenced(
	Sema &SemaRef, SourceLocation Loc, VarDecl Var, Expr E,
	llvm::DenseMap<const VarDecl *, int> &RefsMinusAssignments) {
	assert((!E \|\| isa<DeclRefExpr>(E) \|\| isa<MemberExpr>(E) \|\|
	isa<FunctionParmPackExpr>(E)) &&
	"Invalid Expr argument to DoMarkVarDeclReferenced");
	Var->setReferenced();

	if (Var->isInvalidDecl())
	return;

	auto *MSI = Var->getMemberSpecializationInfo();
	TemplateSpecializationKind TSK = MSI ? MSI->getTemplateSpecializationKind()
	: Var->getTemplateSpecializationKind();

	OdrUseContext OdrUse = isOdrUseContext(SemaRef);
	bool UsableInConstantExpr =
	Var->mightBeUsableInConstantExpressions(SemaRef.Context);

	if (Var->isLocalVarDeclOrParm() && !Var->hasExternalStorage()) {
	RefsMinusAssignments.insert({Var, 0}).first->getSecond()++;
	}

	// C++20 [expr.const]p12:
	// A variable [...] is needed for constant evaluation if it is [...] a
	// variable whose name appears as a potentially constant evaluated
	// expression that is either a contexpr variable or is of non-volatile
	// const-qualified integral type or of reference type
	bool NeededForConstantEvaluation =
	isPotentiallyConstantEvaluatedContext(SemaRef) && UsableInConstantExpr;

	bool NeedDefinition =
	OdrUse == OdrUseContext::Used \|\| NeededForConstantEvaluation;

	assert(!isa<VarTemplatePartialSpecializationDecl>(Var) &&
	"Can't instantiate a partial template specialization.");

	// If this might be a member specialization of a static data member, check
	// the specialization is visible. We already did the checks for variable
	// template specializations when we created them.
	if (NeedDefinition && TSK != TSK_Undeclared &&
	!isa<VarTemplateSpecializationDecl>(Var))
	SemaRef.checkSpecializationVisibility(Loc, Var);

	// Perform implicit instantiation of static data members, static data member
	// templates of class templates, and variable template specializations. Delay
	// instantiations of variable templates, except for those that could be used
	// in a constant expression.
	if (NeedDefinition && isTemplateInstantiation(TSK)) {
	// Per C++17 [temp.explicit]p10, we may instantiate despite an explicit
	// instantiation declaration if a variable is usable in a constant
	// expression (among other cases).
	bool TryInstantiating =
	TSK == TSK_ImplicitInstantiation \|\|
	(TSK == TSK_ExplicitInstantiationDeclaration && UsableInConstantExpr);

	if (TryInstantiating) {
	SourceLocation PointOfInstantiation =
	MSI ? MSI->getPointOfInstantiation() : Var->getPointOfInstantiation();
	bool FirstInstantiation = PointOfInstantiation.isInvalid();
	if (FirstInstantiation) {
	PointOfInstantiation = Loc;
	if (MSI)
	MSI->setPointOfInstantiation(PointOfInstantiation);
	// FIXME: Notify listener.
	else
	Var->setTemplateSpecializationKind(TSK, PointOfInstantiation);
	}

	if (UsableInConstantExpr) {
	// Do not defer instantiations of variables that could be used in a
	// constant expression.
	SemaRef.runWithSufficientStackSpace(PointOfInstantiation, [&] {
	SemaRef.InstantiateVariableDefinition(PointOfInstantiation, Var);
	});

	// Re-set the member to trigger a recomputation of the dependence bits
	// for the expression.
	if (auto *DRE = dyn_cast_or_null<DeclRefExpr>(E))
	DRE->setDecl(DRE->getDecl());
	else if (auto *ME = dyn_cast_or_null<MemberExpr>(E))
	ME->setMemberDecl(ME->getMemberDecl());
	} else if (FirstInstantiation) {
	SemaRef.PendingInstantiations
	.push_back(std::make_pair(Var, PointOfInstantiation));
	} else {
	bool Inserted = false;
	for (auto &I : SemaRef.SavedPendingInstantiations) {
	auto Iter = llvm::find_if(
	I, [Var](const Sema::PendingImplicitInstantiation &P) {
	return P.first == Var;
	});
	if (Iter != I.end()) {
	SemaRef.PendingInstantiations.push_back(*Iter);
	I.erase(Iter);
	Inserted = true;
	break;
	}
	}

	// FIXME: For a specialization of a variable template, we don't
	// distinguish between "declaration and type implicitly instantiated"
	// and "implicit instantiation of definition requested", so we have
	// no direct way to avoid enqueueing the pending instantiation
	// multiple times.
	if (isa<VarTemplateSpecializationDecl>(Var) && !Inserted)
	SemaRef.PendingInstantiations
	.push_back(std::make_pair(Var, PointOfInstantiation));
	}
	}
	}

	// C++2a [basic.def.odr]p4:
	// A variable x whose name appears as a potentially-evaluated expression e
	// is odr-used by e unless
	// -- x is a reference that is usable in constant expressions
	// -- x is a variable of non-reference type that is usable in constant
	// expressions and has no mutable subobjects [FIXME], and e is an
	// element of the set of potential results of an expression of
	// non-volatile-qualified non-class type to which the lvalue-to-rvalue
	// conversion is applied
	// -- x is a variable of non-reference type, and e is an element of the set
	// of potential results of a discarded-value expression to which the
	// lvalue-to-rvalue conversion is not applied [FIXME]
	//
	// We check the first part of the second bullet here, and
	// Sema::CheckLValueToRValueConversionOperand deals with the second part.
	// FIXME: To get the third bullet right, we need to delay this even for
	// variables that are not usable in constant expressions.

	// If we already know this isn't an odr-use, there's nothing more to do.
	if (DeclRefExpr *DRE = dyn_cast_or_null<DeclRefExpr>(E))
	if (DRE->isNonOdrUse())
	return;
	if (MemberExpr *ME = dyn_cast_or_null<MemberExpr>(E))
	if (ME->isNonOdrUse())
	return;

	switch (OdrUse) {
	case OdrUseContext::None:
	// In some cases, a variable may not have been marked unevaluated, if it
	// appears in a defaukt initializer.
	assert((!E \|\| isa<FunctionParmPackExpr>(E) \|\|
	SemaRef.isUnevaluatedContext()) &&
	"missing non-odr-use marking for unevaluated decl ref");
	break;

	case OdrUseContext::FormallyOdrUsed:
	// FIXME: Ignoring formal odr-uses results in incorrect lambda capture
	// behavior.
	break;

	case OdrUseContext::Used:
	// If we might later find that this expression isn't actually an odr-use,
	// delay the marking.
	if (E && Var->isUsableInConstantExpressions(SemaRef.Context))
	SemaRef.MaybeODRUseExprs.insert(E);
	else
	MarkVarDeclODRUsed(Var, Loc, SemaRef);
	break;

	case OdrUseContext::Dependent:
	// If this is a dependent context, we don't need to mark variables as
	// odr-used, but we may still need to track them for lambda capture.
	// FIXME: Do we also need to do this inside dependent typeid expressions
	// (which are modeled as unevaluated at this point)?
	DoMarkPotentialCapture(SemaRef, Loc, Var, E);
	break;
	}
	}

	static void DoMarkBindingDeclReferenced(Sema &SemaRef, SourceLocation Loc,
	BindingDecl BD, Expr E) {
	BD->setReferenced();

	if (BD->isInvalidDecl())
	return;

	OdrUseContext OdrUse = isOdrUseContext(SemaRef);
	if (OdrUse == OdrUseContext::Used) {
	QualType CaptureType, DeclRefType;
	SemaRef.tryCaptureVariable(BD, Loc, Sema::TryCapture_Implicit,
	/EllipsisLoc/ SourceLocation(),
	/BuildAndDiagnose/ true, CaptureType,
	DeclRefType,
	/FunctionScopeIndexToStopAt/ nullptr);
	} else if (OdrUse == OdrUseContext::Dependent) {
	DoMarkPotentialCapture(SemaRef, Loc, BD, E);
	}
	}

	void Sema::MarkVariableReferenced(SourceLocation Loc, VarDecl *Var) {
	DoMarkVarDeclReferenced(*this, Loc, Var, nullptr, RefsMinusAssignments);
	}

	// C++ [temp.dep.expr]p3:
	// An id-expression is type-dependent if it contains:
	// - an identifier associated by name lookup with an entity captured by copy
	// in a lambda-expression that has an explicit object parameter whose type
	// is dependent ([dcl.fct]),
	static void FixDependencyOfIdExpressionsInLambdaWithDependentObjectParameter(
	Sema &SemaRef, ValueDecl D, Expr E) {
	auto *ID = dyn_cast<DeclRefExpr>(E);
	if (!ID \|\| ID->isTypeDependent() \|\| !ID->refersToEnclosingVariableOrCapture())
	return;

	// If any enclosing lambda with a dependent explicit object parameter either
	// explicitly captures the variable by value, or has a capture default of '='
	// and does not capture the variable by reference, then the type of the DRE
	// is dependent on the type of that lambda's explicit object parameter.
	auto IsDependent = [&]() {
	for (auto *Scope : llvm::reverse(SemaRef.FunctionScopes)) {
	auto *LSI = dyn_cast<sema::LambdaScopeInfo>(Scope);
	if (!LSI)
	continue;

	if (LSI->Lambda && !LSI->Lambda->Encloses(SemaRef.CurContext) &&
	LSI->AfterParameterList)
	return false;

	const auto *MD = LSI->CallOperator;
	if (MD->getType().isNull())
	continue;

	const auto *Ty = MD->getType()->getAs<FunctionProtoType>();
	if (!Ty \|\| !MD->isExplicitObjectMemberFunction() \|\|
	!Ty->getParamType(0)->isDependentType())
	continue;

	if (auto *C = LSI->CaptureMap.count(D) ? &LSI->getCapture(D) : nullptr) {
	if (C->isCopyCapture())
	return true;
	continue;
	}

	if (LSI->ImpCaptureStyle == LambdaScopeInfo::ImpCap_LambdaByval)
	return true;
	}
	return false;
	}();

	ID->setCapturedByCopyInLambdaWithExplicitObjectParameter(
	IsDependent, SemaRef.getASTContext());
	}

	static void
	MarkExprReferenced(Sema &SemaRef, SourceLocation Loc, Decl D, Expr E,
	bool MightBeOdrUse,
	llvm::DenseMap<const VarDecl *, int> &RefsMinusAssignments) {
	if (SemaRef.OpenMP().isInOpenMPDeclareTargetContext())
	SemaRef.OpenMP().checkDeclIsAllowedInOpenMPTarget(E, D);

	if (VarDecl *Var = dyn_cast<VarDecl>(D)) {
	DoMarkVarDeclReferenced(SemaRef, Loc, Var, E, RefsMinusAssignments);
	if (SemaRef.getLangOpts().CPlusPlus)
	FixDependencyOfIdExpressionsInLambdaWithDependentObjectParameter(SemaRef,
	Var, E);
	return;
	}

	if (BindingDecl *Decl = dyn_cast<BindingDecl>(D)) {
	DoMarkBindingDeclReferenced(SemaRef, Loc, Decl, E);
	if (SemaRef.getLangOpts().CPlusPlus)
	FixDependencyOfIdExpressionsInLambdaWithDependentObjectParameter(SemaRef,
	Decl, E);
	return;
	}
	SemaRef.MarkAnyDeclReferenced(Loc, D, MightBeOdrUse);

	// If this is a call to a method via a cast, also mark the method in the
	// derived class used in case codegen can devirtualize the call.
	const MemberExpr *ME = dyn_cast<MemberExpr>(E);
	if (!ME)
	return;
	CXXMethodDecl *MD = dyn_cast<CXXMethodDecl>(ME->getMemberDecl());
	if (!MD)
	return;
	// Only attempt to devirtualize if this is truly a virtual call.
	bool IsVirtualCall = MD->isVirtual() &&
	ME->performsVirtualDispatch(SemaRef.getLangOpts());
	if (!IsVirtualCall)
	return;

	// If it's possible to devirtualize the call, mark the called function
	// referenced.
	CXXMethodDecl *DM = MD->getDevirtualizedMethod(
	ME->getBase(), SemaRef.getLangOpts().AppleKext);
	if (DM)
	SemaRef.MarkAnyDeclReferenced(Loc, DM, MightBeOdrUse);
	}

	void Sema::MarkDeclRefReferenced(DeclRefExpr E, const Expr Base) {
	// TODO: update this with DR# once a defect report is filed.
	// C++11 defect. The address of a pure member should not be an ODR use, even
	// if it's a qualified reference.
	bool OdrUse = true;
	if (const CXXMethodDecl *Method = dyn_cast<CXXMethodDecl>(E->getDecl()))
	if (Method->isVirtual() &&
	!Method->getDevirtualizedMethod(Base, getLangOpts().AppleKext))
	OdrUse = false;

	if (auto *FD = dyn_cast<FunctionDecl>(E->getDecl())) {
	if (!isUnevaluatedContext() && !isConstantEvaluatedContext() &&
	!isImmediateFunctionContext() &&
	!isCheckingDefaultArgumentOrInitializer() &&
	FD->isImmediateFunction() && !RebuildingImmediateInvocation &&
	!FD->isDependentContext())
	ExprEvalContexts.back().ReferenceToConsteval.insert(E);
	}
	MarkExprReferenced(*this, E->getLocation(), E->getDecl(), E, OdrUse,
	RefsMinusAssignments);
	}

	void Sema::MarkMemberReferenced(MemberExpr *E) {
	// C++11 [basic.def.odr]p2:
	// A non-overloaded function whose name appears as a potentially-evaluated
	// expression or a member of a set of candidate functions, if selected by
	// overload resolution when referred to from a potentially-evaluated
	// expression, is odr-used, unless it is a pure virtual function and its
	// name is not explicitly qualified.
	bool MightBeOdrUse = true;
	if (E->performsVirtualDispatch(getLangOpts())) {
	if (CXXMethodDecl *Method = dyn_cast<CXXMethodDecl>(E->getMemberDecl()))
	if (Method->isPureVirtual())
	MightBeOdrUse = false;
	}
	SourceLocation Loc =
	E->getMemberLoc().isValid() ? E->getMemberLoc() : E->getBeginLoc();
	MarkExprReferenced(*this, Loc, E->getMemberDecl(), E, MightBeOdrUse,
	RefsMinusAssignments);
	}

	void Sema::MarkFunctionParmPackReferenced(FunctionParmPackExpr *E) {
	for (VarDecl VD : E)
	MarkExprReferenced(*this, E->getParameterPackLocation(), VD, E, true,
	RefsMinusAssignments);
	}

	/// Perform marking for a reference to an arbitrary declaration. It
	/// marks the declaration referenced, and performs odr-use checking for
	/// functions and variables. This method should not be used when building a
	/// normal expression which refers to a variable.
	void Sema::MarkAnyDeclReferenced(SourceLocation Loc, Decl *D,
	bool MightBeOdrUse) {
	if (MightBeOdrUse) {
	if (auto *VD = dyn_cast<VarDecl>(D)) {
	MarkVariableReferenced(Loc, VD);
	return;
	}
	}
	if (auto *FD = dyn_cast<FunctionDecl>(D)) {
	MarkFunctionReferenced(Loc, FD, MightBeOdrUse);
	return;
	}
	D->setReferenced();
	}

	namespace {
	// Mark all of the declarations used by a type as referenced.
	// FIXME: Not fully implemented yet! We need to have a better understanding
	// of when we're entering a context we should not recurse into.
	// FIXME: This is and EvaluatedExprMarker are more-or-less equivalent to
	// TreeTransforms rebuilding the type in a new context. Rather than
	// duplicating the TreeTransform logic, we should consider reusing it here.
	// Currently that causes problems when rebuilding LambdaExprs.
	class MarkReferencedDecls : public RecursiveASTVisitor<MarkReferencedDecls> {
	Sema &S;
	SourceLocation Loc;

	public:
	typedef RecursiveASTVisitor<MarkReferencedDecls> Inherited;

	MarkReferencedDecls(Sema &S, SourceLocation Loc) : S(S), Loc(Loc) { }

	bool TraverseTemplateArgument(const TemplateArgument &Arg);
	};
	}

	bool MarkReferencedDecls::TraverseTemplateArgument(
	const TemplateArgument &Arg) {
	{
	// A non-type template argument is a constant-evaluated context.
	EnterExpressionEvaluationContext Evaluated(
	S, Sema::ExpressionEvaluationContext::ConstantEvaluated);
	if (Arg.getKind() == TemplateArgument::Declaration) {
	if (Decl *D = Arg.getAsDecl())
	S.MarkAnyDeclReferenced(Loc, D, true);
	} else if (Arg.getKind() == TemplateArgument::Expression) {
	S.MarkDeclarationsReferencedInExpr(Arg.getAsExpr(), false);
	}
	}

	return Inherited::TraverseTemplateArgument(Arg);
	}

	void Sema::MarkDeclarationsReferencedInType(SourceLocation Loc, QualType T) {
	MarkReferencedDecls Marker(*this, Loc);
	Marker.TraverseType(T);
	}

	namespace {
	/// Helper class that marks all of the declarations referenced by
	/// potentially-evaluated subexpressions as "referenced".
	class EvaluatedExprMarker : public UsedDeclVisitor<EvaluatedExprMarker> {
	public:
	typedef UsedDeclVisitor<EvaluatedExprMarker> Inherited;
	bool SkipLocalVariables;
	ArrayRef<const Expr *> StopAt;

	EvaluatedExprMarker(Sema &S, bool SkipLocalVariables,
	ArrayRef<const Expr *> StopAt)
	: Inherited(S), SkipLocalVariables(SkipLocalVariables), StopAt(StopAt) {}

	void visitUsedDecl(SourceLocation Loc, Decl *D) {
	S.MarkFunctionReferenced(Loc, cast<FunctionDecl>(D));
	}

	void Visit(Expr *E) {
	if (llvm::is_contained(StopAt, E))
	return;
	Inherited::Visit(E);
	}

	void VisitConstantExpr(ConstantExpr *E) {
	// Don't mark declarations within a ConstantExpression, as this expression
	// will be evaluated and folded to a value.
	}

	void VisitDeclRefExpr(DeclRefExpr *E) {
	// If we were asked not to visit local variables, don't.
	if (SkipLocalVariables) {
	if (VarDecl *VD = dyn_cast<VarDecl>(E->getDecl()))
	if (VD->hasLocalStorage())
	return;
	}

	// FIXME: This can trigger the instantiation of the initializer of a
	// variable, which can cause the expression to become value-dependent
	// or error-dependent. Do we need to propagate the new dependence bits?
	S.MarkDeclRefReferenced(E);
	}

	void VisitMemberExpr(MemberExpr *E) {
	S.MarkMemberReferenced(E);
	Visit(E->getBase());
	}
	};
	} // namespace

	void Sema::MarkDeclarationsReferencedInExpr(Expr *E,
	bool SkipLocalVariables,
	ArrayRef<const Expr*> StopAt) {
	EvaluatedExprMarker(*this, SkipLocalVariables, StopAt).Visit(E);
	}

	/// Emit a diagnostic when statements are reachable.
	/// FIXME: check for reachability even in expressions for which we don't build a
	/// CFG (eg, in the initializer of a global or in a constant expression).
	/// For example,
	/// namespace { auto *p = new double[3][false ? (1, 2) : 3]; }
	bool Sema::DiagIfReachable(SourceLocation Loc, ArrayRef<const Stmt *> Stmts,
	const PartialDiagnostic &PD) {
	if (!Stmts.empty() && getCurFunctionOrMethodDecl()) {
	if (!FunctionScopes.empty())
	FunctionScopes.back()->PossiblyUnreachableDiags.push_back(
	sema::PossiblyUnreachableDiag(PD, Loc, Stmts));
	return true;
	}

	// The initializer of a constexpr variable or of the first declaration of a
	// static data member is not syntactically a constant evaluated constant,
	// but nonetheless is always required to be a constant expression, so we
	// can skip diagnosing.
	// FIXME: Using the mangling context here is a hack.
	if (auto *VD = dyn_cast_or_null<VarDecl>(
	ExprEvalContexts.back().ManglingContextDecl)) {
	if (VD->isConstexpr() \|\|
	(VD->isStaticDataMember() && VD->isFirstDecl() && !VD->isInline()))
	return false;
	// FIXME: For any other kind of variable, we should build a CFG for its
	// initializer and check whether the context in question is reachable.
	}

	Diag(Loc, PD);
	return true;
	}

	/// Emit a diagnostic that describes an effect on the run-time behavior
	/// of the program being compiled.
	///
	/// This routine emits the given diagnostic when the code currently being
	/// type-checked is "potentially evaluated", meaning that there is a
	/// possibility that the code will actually be executable. Code in sizeof()
	/// expressions, code used only during overload resolution, etc., are not
	/// potentially evaluated. This routine will suppress such diagnostics or,
	/// in the absolutely nutty case of potentially potentially evaluated
	/// expressions (C++ typeid), queue the diagnostic to potentially emit it
	/// later.
	///
	/// This routine should be used for all diagnostics that describe the run-time
	/// behavior of a program, such as passing a non-POD value through an ellipsis.
	/// Failure to do so will likely result in spurious diagnostics or failures
	/// during overload resolution or within sizeof/alignof/typeof/typeid.
	bool Sema::DiagRuntimeBehavior(SourceLocation Loc, ArrayRef<const Stmt*> Stmts,
	const PartialDiagnostic &PD) {

	if (ExprEvalContexts.back().isDiscardedStatementContext())
	return false;

	switch (ExprEvalContexts.back().Context) {
	case ExpressionEvaluationContext::Unevaluated:
	case ExpressionEvaluationContext::UnevaluatedList:
	case ExpressionEvaluationContext::UnevaluatedAbstract:
	case ExpressionEvaluationContext::DiscardedStatement:
	// The argument will never be evaluated, so don't complain.
	break;

	case ExpressionEvaluationContext::ConstantEvaluated:
	case ExpressionEvaluationContext::ImmediateFunctionContext:
	// Relevant diagnostics should be produced by constant evaluation.
	break;

	case ExpressionEvaluationContext::PotentiallyEvaluated:
	case ExpressionEvaluationContext::PotentiallyEvaluatedIfUsed:
	return DiagIfReachable(Loc, Stmts, PD);
	}

	return false;
	}

	bool Sema::DiagRuntimeBehavior(SourceLocation Loc, const Stmt *Statement,
	const PartialDiagnostic &PD) {
	return DiagRuntimeBehavior(
	Loc, Statement ? llvm::ArrayRef(Statement) : std::nullopt, PD);
	}

	bool Sema::CheckCallReturnType(QualType ReturnType, SourceLocation Loc,
	CallExpr CE, FunctionDecl FD) {
	if (ReturnType->isVoidType() \|\| !ReturnType->isIncompleteType())
	return false;

	// If we're inside a decltype's expression, don't check for a valid return
	// type or construct temporaries until we know whether this is the last call.
	if (ExprEvalContexts.back().ExprContext ==
	ExpressionEvaluationContextRecord::EK_Decltype) {
	ExprEvalContexts.back().DelayedDecltypeCalls.push_back(CE);
	return false;
	}

	class CallReturnIncompleteDiagnoser : public TypeDiagnoser {
	FunctionDecl *FD;
	CallExpr *CE;

	public:
	CallReturnIncompleteDiagnoser(FunctionDecl FD, CallExpr CE)
	: FD(FD), CE(CE) { }

	void diagnose(Sema &S, SourceLocation Loc, QualType T) override {
	if (!FD) {
	S.Diag(Loc, diag::err_call_incomplete_return)
	<< T << CE->getSourceRange();
	return;
	}

	S.Diag(Loc, diag::err_call_function_incomplete_return)
	<< CE->getSourceRange() << FD << T;
	S.Diag(FD->getLocation(), diag::note_entity_declared_at)
	<< FD->getDeclName();
	}
	} Diagnoser(FD, CE);

	if (RequireCompleteType(Loc, ReturnType, Diagnoser))
	return true;

	return false;
	}

	// Diagnose the s/=/==/ and s/\\|=/!=/ typos. Note that adding parentheses
	// will prevent this condition from triggering, which is what we want.
	void Sema::DiagnoseAssignmentAsCondition(Expr *E) {
	SourceLocation Loc;

	unsigned diagnostic = diag::warn_condition_is_assignment;
	bool IsOrAssign = false;

	if (BinaryOperator *Op = dyn_cast<BinaryOperator>(E)) {
	if (Op->getOpcode() != BO_Assign && Op->getOpcode() != BO_OrAssign)
	return;

	IsOrAssign = Op->getOpcode() == BO_OrAssign;

	// Greylist some idioms by putting them into a warning subcategory.
	if (ObjCMessageExpr *ME
	= dyn_cast<ObjCMessageExpr>(Op->getRHS()->IgnoreParenCasts())) {
	Selector Sel = ME->getSelector();

	// self = [<foo> init...]
	if (ObjC().isSelfExpr(Op->getLHS()) && ME->getMethodFamily() == OMF_init)
	diagnostic = diag::warn_condition_is_idiomatic_assignment;

	// <foo> = [<bar> nextObject]
	else if (Sel.isUnarySelector() && Sel.getNameForSlot(0) == "nextObject")
	diagnostic = diag::warn_condition_is_idiomatic_assignment;
	}

	Loc = Op->getOperatorLoc();
	} else if (CXXOperatorCallExpr *Op = dyn_cast<CXXOperatorCallExpr>(E)) {
	if (Op->getOperator() != OO_Equal && Op->getOperator() != OO_PipeEqual)
	return;

	IsOrAssign = Op->getOperator() == OO_PipeEqual;
	Loc = Op->getOperatorLoc();
	} else if (PseudoObjectExpr *POE = dyn_cast<PseudoObjectExpr>(E))
	return DiagnoseAssignmentAsCondition(POE->getSyntacticForm());
	else {
	// Not an assignment.
	return;
	}

	Diag(Loc, diagnostic) << E->getSourceRange();

	SourceLocation Open = E->getBeginLoc();
	SourceLocation Close = getLocForEndOfToken(E->getSourceRange().getEnd());
	Diag(Loc, diag::note_condition_assign_silence)
	<< FixItHint::CreateInsertion(Open, "(")
	<< FixItHint::CreateInsertion(Close, ")");

	if (IsOrAssign)
	Diag(Loc, diag::note_condition_or_assign_to_comparison)
	<< FixItHint::CreateReplacement(Loc, "!=");
	else
	Diag(Loc, diag::note_condition_assign_to_comparison)
	<< FixItHint::CreateReplacement(Loc, "==");
	}

	void Sema::DiagnoseEqualityWithExtraParens(ParenExpr *ParenE) {
	// Don't warn if the parens came from a macro.
	SourceLocation parenLoc = ParenE->getBeginLoc();
	if (parenLoc.isInvalid() \|\| parenLoc.isMacroID())
	return;
	// Don't warn for dependent expressions.
	if (ParenE->isTypeDependent())
	return;

	Expr *E = ParenE->IgnoreParens();

	if (BinaryOperator *opE = dyn_cast<BinaryOperator>(E))
	if (opE->getOpcode() == BO_EQ &&
	opE->getLHS()->IgnoreParenImpCasts()->isModifiableLvalue(Context)
	== Expr::MLV_Valid) {
	SourceLocation Loc = opE->getOperatorLoc();

	Diag(Loc, diag::warn_equality_with_extra_parens) << E->getSourceRange();
	SourceRange ParenERange = ParenE->getSourceRange();
	Diag(Loc, diag::note_equality_comparison_silence)
	<< FixItHint::CreateRemoval(ParenERange.getBegin())
	<< FixItHint::CreateRemoval(ParenERange.getEnd());
	Diag(Loc, diag::note_equality_comparison_to_assign)
	<< FixItHint::CreateReplacement(Loc, "=");
	}
	}

	ExprResult Sema::CheckBooleanCondition(SourceLocation Loc, Expr *E,
	bool IsConstexpr) {
	DiagnoseAssignmentAsCondition(E);
	if (ParenExpr *parenE = dyn_cast<ParenExpr>(E))
	DiagnoseEqualityWithExtraParens(parenE);

	ExprResult result = CheckPlaceholderExpr(E);
	if (result.isInvalid()) return ExprError();
	E = result.get();

	if (!E->isTypeDependent()) {
	if (getLangOpts().CPlusPlus)
	return CheckCXXBooleanCondition(E, IsConstexpr); // C++ 6.4p4

	ExprResult ERes = DefaultFunctionArrayLvalueConversion(E);
	if (ERes.isInvalid())
	return ExprError();
	E = ERes.get();

	QualType T = E->getType();
	if (!T->isScalarType()) { // C99 6.8.4.1p1
	Diag(Loc, diag::err_typecheck_statement_requires_scalar)
	<< T << E->getSourceRange();
	return ExprError();
	}
	CheckBoolLikeConversion(E, Loc);
	}

	return E;
	}

	Sema::ConditionResult Sema::ActOnCondition(Scope *S, SourceLocation Loc,
	Expr *SubExpr, ConditionKind CK,
	bool MissingOK) {
	// MissingOK indicates whether having no condition expression is valid
	// (for loop) or invalid (e.g. while loop).
	if (!SubExpr)
	return MissingOK ? ConditionResult() : ConditionError();

	ExprResult Cond;
	switch (CK) {
	case ConditionKind::Boolean:
	Cond = CheckBooleanCondition(Loc, SubExpr);
	break;

	case ConditionKind::ConstexprIf:
	Cond = CheckBooleanCondition(Loc, SubExpr, true);
	break;

	case ConditionKind::Switch:
	Cond = CheckSwitchCondition(Loc, SubExpr);
	break;
	}
	if (Cond.isInvalid()) {
	Cond = CreateRecoveryExpr(SubExpr->getBeginLoc(), SubExpr->getEndLoc(),
	{SubExpr}, PreferredConditionType(CK));
	if (!Cond.get())
	return ConditionError();
	}
	// FIXME: FullExprArg doesn't have an invalid bit, so check nullness instead.
	FullExprArg FullExpr = MakeFullExpr(Cond.get(), Loc);
	if (!FullExpr.get())
	return ConditionError();

	return ConditionResult(*this, nullptr, FullExpr,
	CK == ConditionKind::ConstexprIf);
	}

	namespace {
	/// A visitor for rebuilding a call to an __unknown_any expression
	/// to have an appropriate type.
	struct RebuildUnknownAnyFunction
	: StmtVisitor<RebuildUnknownAnyFunction, ExprResult> {

	Sema &S;

	RebuildUnknownAnyFunction(Sema &S) : S(S) {}

	ExprResult VisitStmt(Stmt *S) {
	llvm_unreachable("unexpected statement!");
	}

	ExprResult VisitExpr(Expr *E) {
	S.Diag(E->getExprLoc(), diag::err_unsupported_unknown_any_call)
	<< E->getSourceRange();
	return ExprError();
	}

	/// Rebuild an expression which simply semantically wraps another
	/// expression which it shares the type and value kind of.
	template <class T> ExprResult rebuildSugarExpr(T *E) {
	ExprResult SubResult = Visit(E->getSubExpr());
	if (SubResult.isInvalid()) return ExprError();

	Expr *SubExpr = SubResult.get();
	E->setSubExpr(SubExpr);
	E->setType(SubExpr->getType());
	E->setValueKind(SubExpr->getValueKind());
	assert(E->getObjectKind() == OK_Ordinary);
	return E;
	}

	ExprResult VisitParenExpr(ParenExpr *E) {
	return rebuildSugarExpr(E);
	}

	ExprResult VisitUnaryExtension(UnaryOperator *E) {
	return rebuildSugarExpr(E);
	}

	ExprResult VisitUnaryAddrOf(UnaryOperator *E) {
	ExprResult SubResult = Visit(E->getSubExpr());
	if (SubResult.isInvalid()) return ExprError();

	Expr *SubExpr = SubResult.get();
	E->setSubExpr(SubExpr);
	E->setType(S.Context.getPointerType(SubExpr->getType()));
	assert(E->isPRValue());
	assert(E->getObjectKind() == OK_Ordinary);
	return E;
	}

	ExprResult resolveDecl(Expr E, ValueDecl VD) {
	if (!isa<FunctionDecl>(VD)) return VisitExpr(E);

	E->setType(VD->getType());

	assert(E->isPRValue());
	if (S.getLangOpts().CPlusPlus &&
	!(isa<CXXMethodDecl>(VD) &&
	cast<CXXMethodDecl>(VD)->isInstance()))
	E->setValueKind(VK_LValue);

	return E;
	}

	ExprResult VisitMemberExpr(MemberExpr *E) {
	return resolveDecl(E, E->getMemberDecl());
	}

	ExprResult VisitDeclRefExpr(DeclRefExpr *E) {
	return resolveDecl(E, E->getDecl());
	}
	};
	}

	/// Given a function expression of unknown-any type, try to rebuild it
	/// to have a function type.
	static ExprResult rebuildUnknownAnyFunction(Sema &S, Expr *FunctionExpr) {
	ExprResult Result = RebuildUnknownAnyFunction(S).Visit(FunctionExpr);
	if (Result.isInvalid()) return ExprError();
	return S.DefaultFunctionArrayConversion(Result.get());
	}

	namespace {
	/// A visitor for rebuilding an expression of type __unknown_anytype
	/// into one which resolves the type directly on the referring
	/// expression. Strict preservation of the original source
	/// structure is not a goal.
	struct RebuildUnknownAnyExpr
	: StmtVisitor<RebuildUnknownAnyExpr, ExprResult> {

	Sema &S;

	/// The current destination type.
	QualType DestType;

	RebuildUnknownAnyExpr(Sema &S, QualType CastType)
	: S(S), DestType(CastType) {}

	ExprResult VisitStmt(Stmt *S) {
	llvm_unreachable("unexpected statement!");
	}

	ExprResult VisitExpr(Expr *E) {
	S.Diag(E->getExprLoc(), diag::err_unsupported_unknown_any_expr)
	<< E->getSourceRange();
	return ExprError();
	}

	ExprResult VisitCallExpr(CallExpr *E);
	ExprResult VisitObjCMessageExpr(ObjCMessageExpr *E);

	/// Rebuild an expression which simply semantically wraps another
	/// expression which it shares the type and value kind of.
	template <class T> ExprResult rebuildSugarExpr(T *E) {
	ExprResult SubResult = Visit(E->getSubExpr());
	if (SubResult.isInvalid()) return ExprError();
	Expr *SubExpr = SubResult.get();
	E->setSubExpr(SubExpr);
	E->setType(SubExpr->getType());
	E->setValueKind(SubExpr->getValueKind());
	assert(E->getObjectKind() == OK_Ordinary);
	return E;
	}

	ExprResult VisitParenExpr(ParenExpr *E) {
	return rebuildSugarExpr(E);
	}

	ExprResult VisitUnaryExtension(UnaryOperator *E) {
	return rebuildSugarExpr(E);
	}

	ExprResult VisitUnaryAddrOf(UnaryOperator *E) {
	const PointerType *Ptr = DestType->getAs<PointerType>();
	if (!Ptr) {
	S.Diag(E->getOperatorLoc(), diag::err_unknown_any_addrof)
	<< E->getSourceRange();
	return ExprError();
	}

	if (isa<CallExpr>(E->getSubExpr())) {
	S.Diag(E->getOperatorLoc(), diag::err_unknown_any_addrof_call)
	<< E->getSourceRange();
	return ExprError();
	}

	assert(E->isPRValue());
	assert(E->getObjectKind() == OK_Ordinary);
	E->setType(DestType);

	// Build the sub-expression as if it were an object of the pointee type.
	DestType = Ptr->getPointeeType();
	ExprResult SubResult = Visit(E->getSubExpr());
	if (SubResult.isInvalid()) return ExprError();
	E->setSubExpr(SubResult.get());
	return E;
	}

	ExprResult VisitImplicitCastExpr(ImplicitCastExpr *E);

	ExprResult resolveDecl(Expr E, ValueDecl VD);

	ExprResult VisitMemberExpr(MemberExpr *E) {
	return resolveDecl(E, E->getMemberDecl());
	}

	ExprResult VisitDeclRefExpr(DeclRefExpr *E) {
	return resolveDecl(E, E->getDecl());
	}
	};
	}

	/// Rebuilds a call expression which yielded __unknown_anytype.
	ExprResult RebuildUnknownAnyExpr::VisitCallExpr(CallExpr *E) {
	Expr *CalleeExpr = E->getCallee();

	enum FnKind {
	FK_MemberFunction,
	FK_FunctionPointer,
	FK_BlockPointer
	};

	FnKind Kind;
	QualType CalleeType = CalleeExpr->getType();
	if (CalleeType == S.Context.BoundMemberTy) {
	assert(isa<CXXMemberCallExpr>(E) \|\| isa<CXXOperatorCallExpr>(E));
	Kind = FK_MemberFunction;
	CalleeType = Expr::findBoundMemberType(CalleeExpr);
	} else if (const PointerType *Ptr = CalleeType->getAs<PointerType>()) {
	CalleeType = Ptr->getPointeeType();
	Kind = FK_FunctionPointer;
	} else {
	CalleeType = CalleeType->castAs<BlockPointerType>()->getPointeeType();
	Kind = FK_BlockPointer;
	}
	const FunctionType *FnType = CalleeType->castAs<FunctionType>();

	// Verify that this is a legal result type of a function.
	if (DestType->isArrayType() \|\| DestType->isFunctionType()) {
	unsigned diagID = diag::err_func_returning_array_function;
	if (Kind == FK_BlockPointer)
	diagID = diag::err_block_returning_array_function;

	S.Diag(E->getExprLoc(), diagID)
	<< DestType->isFunctionType() << DestType;
	return ExprError();
	}

	// Otherwise, go ahead and set DestType as the call's result.
	E->setType(DestType.getNonLValueExprType(S.Context));
	E->setValueKind(Expr::getValueKindForType(DestType));
	assert(E->getObjectKind() == OK_Ordinary);

	// Rebuild the function type, replacing the result type with DestType.
	const FunctionProtoType *Proto = dyn_cast<FunctionProtoType>(FnType);
	if (Proto) {
	// __unknown_anytype(...) is a special case used by the debugger when
	// it has no idea what a function's signature is.
	//
	// We want to build this call essentially under the K&R
	// unprototyped rules, but making a FunctionNoProtoType in C++
	// would foul up all sorts of assumptions. However, we cannot
	// simply pass all arguments as variadic arguments, nor can we
	// portably just call the function under a non-variadic type; see
	// the comment on IR-gen's TargetInfo::isNoProtoCallVariadic.
	// However, it turns out that in practice it is generally safe to
	// call a function declared as "A foo(B,C,D);" under the prototype
	// "A foo(B,C,D,...);". The only known exception is with the
	// Windows ABI, where any variadic function is implicitly cdecl
	// regardless of its normal CC. Therefore we change the parameter
	// types to match the types of the arguments.
	//
	// This is a hack, but it is far superior to moving the
	// corresponding target-specific code from IR-gen to Sema/AST.

	ArrayRef<QualType> ParamTypes = Proto->getParamTypes();
	SmallVector<QualType, 8> ArgTypes;
	if (ParamTypes.empty() && Proto->isVariadic()) { // the special case
	ArgTypes.reserve(E->getNumArgs());
	for (unsigned i = 0, e = E->getNumArgs(); i != e; ++i) {
	ArgTypes.push_back(S.Context.getReferenceQualifiedType(E->getArg(i)));
	}
	ParamTypes = ArgTypes;
	}
	DestType = S.Context.getFunctionType(DestType, ParamTypes,
	Proto->getExtProtoInfo());
	} else {
	DestType = S.Context.getFunctionNoProtoType(DestType,
	FnType->getExtInfo());
	}

	// Rebuild the appropriate pointer-to-function type.
	switch (Kind) {
	case FK_MemberFunction:
	// Nothing to do.
	break;

	case FK_FunctionPointer:
	DestType = S.Context.getPointerType(DestType);
	break;

	case FK_BlockPointer:
	DestType = S.Context.getBlockPointerType(DestType);
	break;
	}

	// Finally, we can recurse.
	ExprResult CalleeResult = Visit(CalleeExpr);
	if (!CalleeResult.isUsable()) return ExprError();
	E->setCallee(CalleeResult.get());

	// Bind a temporary if necessary.
	return S.MaybeBindToTemporary(E);
	}

	ExprResult RebuildUnknownAnyExpr::VisitObjCMessageExpr(ObjCMessageExpr *E) {
	// Verify that this is a legal result type of a call.
	if (DestType->isArrayType() \|\| DestType->isFunctionType()) {
	S.Diag(E->getExprLoc(), diag::err_func_returning_array_function)
	<< DestType->isFunctionType() << DestType;
	return ExprError();
	}

	// Rewrite the method result type if available.
	if (ObjCMethodDecl *Method = E->getMethodDecl()) {
	assert(Method->getReturnType() == S.Context.UnknownAnyTy);
	Method->setReturnType(DestType);
	}

	// Change the type of the message.
	E->setType(DestType.getNonReferenceType());
	E->setValueKind(Expr::getValueKindForType(DestType));

	return S.MaybeBindToTemporary(E);
	}

	ExprResult RebuildUnknownAnyExpr::VisitImplicitCastExpr(ImplicitCastExpr *E) {
	// The only case we should ever see here is a function-to-pointer decay.
	if (E->getCastKind() == CK_FunctionToPointerDecay) {
	assert(E->isPRValue());
	assert(E->getObjectKind() == OK_Ordinary);

	E->setType(DestType);

	// Rebuild the sub-expression as the pointee (function) type.
	DestType = DestType->castAs<PointerType>()->getPointeeType();

	ExprResult Result = Visit(E->getSubExpr());
	if (!Result.isUsable()) return ExprError();

	E->setSubExpr(Result.get());
	return E;
	} else if (E->getCastKind() == CK_LValueToRValue) {
	assert(E->isPRValue());
	assert(E->getObjectKind() == OK_Ordinary);

	assert(isa<BlockPointerType>(E->getType()));

	E->setType(DestType);

	// The sub-expression has to be a lvalue reference, so rebuild it as such.
	DestType = S.Context.getLValueReferenceType(DestType);

	ExprResult Result = Visit(E->getSubExpr());
	if (!Result.isUsable()) return ExprError();

	E->setSubExpr(Result.get());
	return E;
	} else {
	llvm_unreachable("Unhandled cast type!");
	}
	}

	ExprResult RebuildUnknownAnyExpr::resolveDecl(Expr E, ValueDecl VD) {
	ExprValueKind ValueKind = VK_LValue;
	QualType Type = DestType;

	// We know how to make this work for certain kinds of decls:

	// - functions
	if (FunctionDecl *FD = dyn_cast<FunctionDecl>(VD)) {
	if (const PointerType *Ptr = Type->getAs<PointerType>()) {
	DestType = Ptr->getPointeeType();
	ExprResult Result = resolveDecl(E, VD);
	if (Result.isInvalid()) return ExprError();
	return S.ImpCastExprToType(Result.get(), Type, CK_FunctionToPointerDecay,
	VK_PRValue);
	}

	if (!Type->isFunctionType()) {
	S.Diag(E->getExprLoc(), diag::err_unknown_any_function)
	<< VD << E->getSourceRange();
	return ExprError();
	}
	if (const FunctionProtoType *FT = Type->getAs<FunctionProtoType>()) {
	// We must match the FunctionDecl's type to the hack introduced in
	// RebuildUnknownAnyExpr::VisitCallExpr to vararg functions of unknown
	// type. See the lengthy commentary in that routine.
	QualType FDT = FD->getType();
	const FunctionType *FnType = FDT->castAs<FunctionType>();
	const FunctionProtoType *Proto = dyn_cast_or_null<FunctionProtoType>(FnType);
	DeclRefExpr *DRE = dyn_cast<DeclRefExpr>(E);
	if (DRE && Proto && Proto->getParamTypes().empty() && Proto->isVariadic()) {
	SourceLocation Loc = FD->getLocation();
	FunctionDecl *NewFD = FunctionDecl::Create(
	S.Context, FD->getDeclContext(), Loc, Loc,
	FD->getNameInfo().getName(), DestType, FD->getTypeSourceInfo(),
	SC_None, S.getCurFPFeatures().isFPConstrained(),
	false /isInlineSpecified/, FD->hasPrototype(),
	/ConstexprKind/ ConstexprSpecKind::Unspecified);

	if (FD->getQualifier())
	NewFD->setQualifierInfo(FD->getQualifierLoc());

	SmallVector<ParmVarDecl*, 16> Params;
	for (const auto &AI : FT->param_types()) {
	ParmVarDecl *Param =
	S.BuildParmVarDeclForTypedef(FD, Loc, AI);
	Param->setScopeInfo(0, Params.size());
	Params.push_back(Param);
	}
	NewFD->setParams(Params);
	DRE->setDecl(NewFD);
	VD = DRE->getDecl();
	}
	}

	if (CXXMethodDecl *MD = dyn_cast<CXXMethodDecl>(FD))
	if (MD->isInstance()) {
	ValueKind = VK_PRValue;
	Type = S.Context.BoundMemberTy;
	}

	// Function references aren't l-values in C.
	if (!S.getLangOpts().CPlusPlus)
	ValueKind = VK_PRValue;

	// - variables
	} else if (isa<VarDecl>(VD)) {
	if (const ReferenceType *RefTy = Type->getAs<ReferenceType>()) {
	Type = RefTy->getPointeeType();
	} else if (Type->isFunctionType()) {
	S.Diag(E->getExprLoc(), diag::err_unknown_any_var_function_type)
	<< VD << E->getSourceRange();
	return ExprError();
	}

	// - nothing else
	} else {
	S.Diag(E->getExprLoc(), diag::err_unsupported_unknown_any_decl)
	<< VD << E->getSourceRange();
	return ExprError();
	}

	// Modifying the declaration like this is friendly to IR-gen but
	// also really dangerous.
	VD->setType(DestType);
	E->setType(Type);
	E->setValueKind(ValueKind);
	return E;
	}

	ExprResult Sema::checkUnknownAnyCast(SourceRange TypeRange, QualType CastType,
	Expr *CastExpr, CastKind &CastKind,
	ExprValueKind &VK, CXXCastPath &Path) {
	// The type we're casting to must be either void or complete.
	if (!CastType->isVoidType() &&
	RequireCompleteType(TypeRange.getBegin(), CastType,
	diag::err_typecheck_cast_to_incomplete))
	return ExprError();

	// Rewrite the casted expression from scratch.
	ExprResult result = RebuildUnknownAnyExpr(*this, CastType).Visit(CastExpr);
	if (!result.isUsable()) return ExprError();

	CastExpr = result.get();
	VK = CastExpr->getValueKind();
	CastKind = CK_NoOp;

	return CastExpr;
	}

	ExprResult Sema::forceUnknownAnyToType(Expr *E, QualType ToType) {
	return RebuildUnknownAnyExpr(*this, ToType).Visit(E);
	}

	ExprResult Sema::checkUnknownAnyArg(SourceLocation callLoc,
	Expr *arg, QualType &paramType) {
	// If the syntactic form of the argument is not an explicit cast of
	// any sort, just do default argument promotion.
	ExplicitCastExpr *castArg = dyn_cast<ExplicitCastExpr>(arg->IgnoreParens());
	if (!castArg) {
	ExprResult result = DefaultArgumentPromotion(arg);
	if (result.isInvalid()) return ExprError();
	paramType = result.get()->getType();
	return result;
	}

	// Otherwise, use the type that was written in the explicit cast.
	assert(!arg->hasPlaceholderType());
	paramType = castArg->getTypeAsWritten();

	// Copy-initialize a parameter of that type.
	InitializedEntity entity =
	InitializedEntity::InitializeParameter(Context, paramType,
	/consumed/ false);
	return PerformCopyInitialization(entity, callLoc, arg);
	}

	static ExprResult diagnoseUnknownAnyExpr(Sema &S, Expr *E) {
	Expr *orig = E;
	unsigned diagID = diag::err_uncasted_use_of_unknown_any;
	while (true) {
	E = E->IgnoreParenImpCasts();
	if (CallExpr *call = dyn_cast<CallExpr>(E)) {
	E = call->getCallee();
	diagID = diag::err_uncasted_call_of_unknown_any;
	} else {
	break;
	}
	}

	SourceLocation loc;
	NamedDecl *d;
	if (DeclRefExpr *ref = dyn_cast<DeclRefExpr>(E)) {
	loc = ref->getLocation();
	d = ref->getDecl();
	} else if (MemberExpr *mem = dyn_cast<MemberExpr>(E)) {
	loc = mem->getMemberLoc();
	d = mem->getMemberDecl();
	} else if (ObjCMessageExpr *msg = dyn_cast<ObjCMessageExpr>(E)) {
	diagID = diag::err_uncasted_call_of_unknown_any;
	loc = msg->getSelectorStartLoc();
	d = msg->getMethodDecl();
	if (!d) {
	S.Diag(loc, diag::err_uncasted_send_to_unknown_any_method)
	<< static_cast<unsigned>(msg->isClassMessage()) << msg->getSelector()
	<< orig->getSourceRange();
	return ExprError();
	}
	} else {
	S.Diag(E->getExprLoc(), diag::err_unsupported_unknown_any_expr)
	<< E->getSourceRange();
	return ExprError();
	}

	S.Diag(loc, diagID) << d << orig->getSourceRange();

	// Never recoverable.
	return ExprError();
	}

	ExprResult Sema::CheckPlaceholderExpr(Expr *E) {
	if (!Context.isDependenceAllowed()) {
	// C cannot handle TypoExpr nodes on either side of a binop because it
	// doesn't handle dependent types properly, so make sure any TypoExprs have
	// been dealt with before checking the operands.
	ExprResult Result = CorrectDelayedTyposInExpr(E);
	if (!Result.isUsable()) return ExprError();
	E = Result.get();
	}

	const BuiltinType *placeholderType = E->getType()->getAsPlaceholderType();
	if (!placeholderType) return E;

	switch (placeholderType->getKind()) {
	case BuiltinType::UnresolvedTemplate: {
	auto *ULE = cast<UnresolvedLookupExpr>(E);
	const DeclarationNameInfo &NameInfo = ULE->getNameInfo();
	// There's only one FoundDecl for UnresolvedTemplate type. See
	// BuildTemplateIdExpr.
	NamedDecl Temp = ULE->decls_begin();
	const bool IsTypeAliasTemplateDecl = isa<TypeAliasTemplateDecl>(Temp);

	if (NestedNameSpecifierLoc Loc = ULE->getQualifierLoc(); Loc.hasQualifier())
	Diag(NameInfo.getLoc(), diag::err_template_kw_refers_to_type_template)
	<< Loc.getNestedNameSpecifier() << NameInfo.getName().getAsString()
	<< Loc.getSourceRange() << IsTypeAliasTemplateDecl;
	else
	Diag(NameInfo.getLoc(), diag::err_template_kw_refers_to_type_template)
	<< "" << NameInfo.getName().getAsString() << ULE->getSourceRange()
	<< IsTypeAliasTemplateDecl;
	Diag(Temp->getLocation(), diag::note_referenced_type_template)
	<< IsTypeAliasTemplateDecl;

	return CreateRecoveryExpr(NameInfo.getBeginLoc(), NameInfo.getEndLoc(), {});
	}

	// Overloaded expressions.
	case BuiltinType::Overload: {
	// Try to resolve a single function template specialization.
	// This is obligatory.
	ExprResult Result = E;
	if (ResolveAndFixSingleFunctionTemplateSpecialization(Result, false))
	return Result;

	// No guarantees that ResolveAndFixSingleFunctionTemplateSpecialization
	// leaves Result unchanged on failure.
	Result = E;
	if (resolveAndFixAddressOfSingleOverloadCandidate(Result))
	return Result;

	// If that failed, try to recover with a call.
	tryToRecoverWithCall(Result, PDiag(diag::err_ovl_unresolvable),
	/complain/ true);
	return Result;
	}

	// Bound member functions.
	case BuiltinType::BoundMember: {
	ExprResult result = E;
	const Expr *BME = E->IgnoreParens();
	PartialDiagnostic PD = PDiag(diag::err_bound_member_function);
	// Try to give a nicer diagnostic if it is a bound member that we recognize.
	if (isa<CXXPseudoDestructorExpr>(BME)) {
	PD = PDiag(diag::err_dtor_expr_without_call) << /pseudo-destructor/ 1;
	} else if (const auto *ME = dyn_cast<MemberExpr>(BME)) {
	if (ME->getMemberNameInfo().getName().getNameKind() ==
	DeclarationName::CXXDestructorName)
	PD = PDiag(diag::err_dtor_expr_without_call) << /destructor/ 0;
	}
	tryToRecoverWithCall(result, PD,
	/complain/ true);
	return result;
	}

	// ARC unbridged casts.
	case BuiltinType::ARCUnbridgedCast: {
	Expr *realCast = ObjC().stripARCUnbridgedCast(E);
	ObjC().diagnoseARCUnbridgedCast(realCast);
	return realCast;
	}

	// Expressions of unknown type.
	case BuiltinType::UnknownAny:
	return diagnoseUnknownAnyExpr(*this, E);

	// Pseudo-objects.
	case BuiltinType::PseudoObject:
	return PseudoObject().checkRValue(E);

	case BuiltinType::BuiltinFn: {
	// Accept __noop without parens by implicitly converting it to a call expr.
	auto *DRE = dyn_cast<DeclRefExpr>(E->IgnoreParenImpCasts());
	if (DRE) {
	auto *FD = cast<FunctionDecl>(DRE->getDecl());
	unsigned BuiltinID = FD->getBuiltinID();
	if (BuiltinID == Builtin::BI__noop) {
	E = ImpCastExprToType(E, Context.getPointerType(FD->getType()),
	CK_BuiltinFnToFnPtr)
	.get();
	return CallExpr::Create(Context, E, /Args=/{}, Context.IntTy,
	VK_PRValue, SourceLocation(),
	FPOptionsOverride());
	}

	if (Context.BuiltinInfo.isInStdNamespace(BuiltinID)) {
	// Any use of these other than a direct call is ill-formed as of C++20,
	// because they are not addressable functions. In earlier language
	// modes, warn and force an instantiation of the real body.
	Diag(E->getBeginLoc(),
	getLangOpts().CPlusPlus20
	? diag::err_use_of_unaddressable_function
	: diag::warn_cxx20_compat_use_of_unaddressable_function);
	if (FD->isImplicitlyInstantiable()) {
	// Require a definition here because a normal attempt at
	// instantiation for a builtin will be ignored, and we won't try
	// again later. We assume that the definition of the template
	// precedes this use.
	InstantiateFunctionDefinition(E->getBeginLoc(), FD,
	/Recursive=/false,
	/DefinitionRequired=/true,
	/AtEndOfTU=/false);
	}
	// Produce a properly-typed reference to the function.
	CXXScopeSpec SS;
	SS.Adopt(DRE->getQualifierLoc());
	TemplateArgumentListInfo TemplateArgs;
	DRE->copyTemplateArgumentsInto(TemplateArgs);
	return BuildDeclRefExpr(
	FD, FD->getType(), VK_LValue, DRE->getNameInfo(),
	DRE->hasQualifier() ? &SS : nullptr, DRE->getFoundDecl(),
	DRE->getTemplateKeywordLoc(),
	DRE->hasExplicitTemplateArgs() ? &TemplateArgs : nullptr);
	}
	}

	Diag(E->getBeginLoc(), diag::err_builtin_fn_use);
	return ExprError();
	}

	case BuiltinType::IncompleteMatrixIdx:
	Diag(cast<MatrixSubscriptExpr>(E->IgnoreParens())
	->getRowIdx()
	->getBeginLoc(),
	diag::err_matrix_incomplete_index);
	return ExprError();

	// Expressions of unknown type.
	case BuiltinType::ArraySection:
	Diag(E->getBeginLoc(), diag::err_array_section_use)
	<< cast<ArraySectionExpr>(E)->isOMPArraySection();
	return ExprError();

	// Expressions of unknown type.
	case BuiltinType::OMPArrayShaping:
	return ExprError(Diag(E->getBeginLoc(), diag::err_omp_array_shaping_use));

	case BuiltinType::OMPIterator:
	return ExprError(Diag(E->getBeginLoc(), diag::err_omp_iterator_use));

	// Everything else should be impossible.
	#define IMAGE_TYPE(ImgType, Id, SingletonId, Access, Suffix) \
	case BuiltinType::Id:
	#include "clang/Basic/OpenCLImageTypes.def"
	#define EXT_OPAQUE_TYPE(ExtType, Id, Ext) \
	case BuiltinType::Id:
	#include "clang/Basic/OpenCLExtensionTypes.def"
	#define SVE_TYPE(Name, Id, SingletonId) \
	case BuiltinType::Id:
	#include "clang/Basic/AArch64SVEACLETypes.def"
	#define PPC_VECTOR_TYPE(Name, Id, Size) \
	case BuiltinType::Id:
	#include "clang/Basic/PPCTypes.def"
	#define RVV_TYPE(Name, Id, SingletonId) case BuiltinType::Id:
	#include "clang/Basic/RISCVVTypes.def"
	#define WASM_TYPE(Name, Id, SingletonId) case BuiltinType::Id:
	#include "clang/Basic/WebAssemblyReferenceTypes.def"
	#define AMDGPU_TYPE(Name, Id, SingletonId) case BuiltinType::Id:
	#include "clang/Basic/AMDGPUTypes.def"
	#define BUILTIN_TYPE(Id, SingletonId) case BuiltinType::Id:
	#define PLACEHOLDER_TYPE(Id, SingletonId)
	#include "clang/AST/BuiltinTypes.def"
	break;
	}

	llvm_unreachable("invalid placeholder type!");
	}

	bool Sema::CheckCaseExpression(Expr *E) {
	if (E->isTypeDependent())
	return true;
	if (E->isValueDependent() \|\| E->isIntegerConstantExpr(Context))
	return E->getType()->isIntegralOrEnumerationType();
	return false;
	}

	ExprResult Sema::CreateRecoveryExpr(SourceLocation Begin, SourceLocation End,
	ArrayRef<Expr *> SubExprs, QualType T) {
	if (!Context.getLangOpts().RecoveryAST)
	return ExprError();

	if (isSFINAEContext())
	return ExprError();

	if (T.isNull() \|\| T->isUndeducedType() \|\|
	!Context.getLangOpts().RecoveryASTType)
	// We don't know the concrete type, fallback to dependent type.
	T = Context.DependentTy;

	return RecoveryExpr::Create(Context, T, Begin, End, SubExprs);
	}
	diff --git a/contrib/llvm-project/clang/lib/Sema/SemaLambda.cpp b/contrib/llvm-project/clang/lib/Sema/SemaLambda.cpp
	index 601077e9f333..809b94bb7412 100644
	--- a/contrib/llvm-project/clang/lib/Sema/SemaLambda.cpp
	+++ b/contrib/llvm-project/clang/lib/Sema/SemaLambda.cpp
	@@ -1,2421 +1,2420 @@
	//===--- SemaLambda.cpp - Semantic Analysis for C++11 Lambdas -------------===//
	//
	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
	// See https://llvm.org/LICENSE.txt for license information.
	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
	//
	//===----------------------------------------------------------------------===//
	//
	// This file implements semantic analysis for C++ lambda expressions.
	//
	//===----------------------------------------------------------------------===//
	#include "clang/Sema/SemaLambda.h"
	#include "TypeLocBuilder.h"
	#include "clang/AST/ASTLambda.h"
	#include "clang/AST/CXXInheritance.h"
	#include "clang/AST/ExprCXX.h"
	#include "clang/Basic/TargetInfo.h"
	#include "clang/Sema/DeclSpec.h"
	#include "clang/Sema/Initialization.h"
	#include "clang/Sema/Lookup.h"
	#include "clang/Sema/Scope.h"
	#include "clang/Sema/ScopeInfo.h"
	#include "clang/Sema/SemaCUDA.h"
	#include "clang/Sema/SemaInternal.h"
	#include "clang/Sema/SemaOpenMP.h"
	#include "clang/Sema/Template.h"
	#include "llvm/ADT/STLExtras.h"
	#include <optional>
	using namespace clang;
	using namespace sema;

	/// Examines the FunctionScopeInfo stack to determine the nearest
	/// enclosing lambda (to the current lambda) that is 'capture-ready' for
	/// the variable referenced in the current lambda (i.e. \p VarToCapture).
	/// If successful, returns the index into Sema's FunctionScopeInfo stack
	/// of the capture-ready lambda's LambdaScopeInfo.
	///
	/// Climbs down the stack of lambdas (deepest nested lambda - i.e. current
	/// lambda - is on top) to determine the index of the nearest enclosing/outer
	/// lambda that is ready to capture the \p VarToCapture being referenced in
	/// the current lambda.
	/// As we climb down the stack, we want the index of the first such lambda -
	/// that is the lambda with the highest index that is 'capture-ready'.
	///
	/// A lambda 'L' is capture-ready for 'V' (var or this) if:
	/// - its enclosing context is non-dependent
	/// - and if the chain of lambdas between L and the lambda in which
	/// V is potentially used (i.e. the lambda at the top of the scope info
	/// stack), can all capture or have already captured V.
	/// If \p VarToCapture is 'null' then we are trying to capture 'this'.
	///
	/// Note that a lambda that is deemed 'capture-ready' still needs to be checked
	/// for whether it is 'capture-capable' (see
	/// getStackIndexOfNearestEnclosingCaptureCapableLambda), before it can truly
	/// capture.
	///
	/// \param FunctionScopes - Sema's stack of nested FunctionScopeInfo's (which a
	/// LambdaScopeInfo inherits from). The current/deepest/innermost lambda
	/// is at the top of the stack and has the highest index.
	/// \param VarToCapture - the variable to capture. If NULL, capture 'this'.
	///
	/// \returns An std::optional<unsigned> Index that if evaluates to 'true'
	/// contains the index (into Sema's FunctionScopeInfo stack) of the innermost
	/// lambda which is capture-ready. If the return value evaluates to 'false'
	/// then no lambda is capture-ready for \p VarToCapture.

	static inline std::optional<unsigned>
	getStackIndexOfNearestEnclosingCaptureReadyLambda(
	ArrayRef<const clang::sema::FunctionScopeInfo *> FunctionScopes,
	ValueDecl *VarToCapture) {
	// Label failure to capture.
	const std::optional<unsigned> NoLambdaIsCaptureReady;

	// Ignore all inner captured regions.
	unsigned CurScopeIndex = FunctionScopes.size() - 1;
	while (CurScopeIndex > 0 && isa<clang::sema::CapturedRegionScopeInfo>(
	FunctionScopes[CurScopeIndex]))
	--CurScopeIndex;
	assert(
	isa<clang::sema::LambdaScopeInfo>(FunctionScopes[CurScopeIndex]) &&
	"The function on the top of sema's function-info stack must be a lambda");

	// If VarToCapture is null, we are attempting to capture 'this'.
	const bool IsCapturingThis = !VarToCapture;
	const bool IsCapturingVariable = !IsCapturingThis;

	// Start with the current lambda at the top of the stack (highest index).
	DeclContext *EnclosingDC =
	cast<sema::LambdaScopeInfo>(FunctionScopes[CurScopeIndex])->CallOperator;

	do {
	const clang::sema::LambdaScopeInfo *LSI =
	cast<sema::LambdaScopeInfo>(FunctionScopes[CurScopeIndex]);
	// IF we have climbed down to an intervening enclosing lambda that contains
	// the variable declaration - it obviously can/must not capture the
	// variable.
	// Since its enclosing DC is dependent, all the lambdas between it and the
	// innermost nested lambda are dependent (otherwise we wouldn't have
	// arrived here) - so we don't yet have a lambda that can capture the
	// variable.
	if (IsCapturingVariable &&
	VarToCapture->getDeclContext()->Equals(EnclosingDC))
	return NoLambdaIsCaptureReady;

	// For an enclosing lambda to be capture ready for an entity, all
	// intervening lambda's have to be able to capture that entity. If even
	// one of the intervening lambda's is not capable of capturing the entity
	// then no enclosing lambda can ever capture that entity.
	// For e.g.
	// const int x = 10;
	// [=](auto a) { #1
	// [](auto b) { #2 <-- an intervening lambda that can never capture 'x'
	// [=](auto c) { #3
	// f(x, c); <-- can not lead to x's speculative capture by #1 or #2
	// }; }; };
	// If they do not have a default implicit capture, check to see
	// if the entity has already been explicitly captured.
	// If even a single dependent enclosing lambda lacks the capability
	// to ever capture this variable, there is no further enclosing
	// non-dependent lambda that can capture this variable.
	if (LSI->ImpCaptureStyle == sema::LambdaScopeInfo::ImpCap_None) {
	if (IsCapturingVariable && !LSI->isCaptured(VarToCapture))
	return NoLambdaIsCaptureReady;
	if (IsCapturingThis && !LSI->isCXXThisCaptured())
	return NoLambdaIsCaptureReady;
	}
	EnclosingDC = getLambdaAwareParentOfDeclContext(EnclosingDC);

	assert(CurScopeIndex);
	--CurScopeIndex;
	} while (!EnclosingDC->isTranslationUnit() &&
	EnclosingDC->isDependentContext() &&
	isLambdaCallOperator(EnclosingDC));

	assert(CurScopeIndex < (FunctionScopes.size() - 1));
	// If the enclosingDC is not dependent, then the immediately nested lambda
	// (one index above) is capture-ready.
	if (!EnclosingDC->isDependentContext())
	return CurScopeIndex + 1;
	return NoLambdaIsCaptureReady;
	}

	/// Examines the FunctionScopeInfo stack to determine the nearest
	/// enclosing lambda (to the current lambda) that is 'capture-capable' for
	/// the variable referenced in the current lambda (i.e. \p VarToCapture).
	/// If successful, returns the index into Sema's FunctionScopeInfo stack
	/// of the capture-capable lambda's LambdaScopeInfo.
	///
	/// Given the current stack of lambdas being processed by Sema and
	/// the variable of interest, to identify the nearest enclosing lambda (to the
	/// current lambda at the top of the stack) that can truly capture
	/// a variable, it has to have the following two properties:
	/// a) 'capture-ready' - be the innermost lambda that is 'capture-ready':
	/// - climb down the stack (i.e. starting from the innermost and examining
	/// each outer lambda step by step) checking if each enclosing
	/// lambda can either implicitly or explicitly capture the variable.
	/// Record the first such lambda that is enclosed in a non-dependent
	/// context. If no such lambda currently exists return failure.
	/// b) 'capture-capable' - make sure the 'capture-ready' lambda can truly
	/// capture the variable by checking all its enclosing lambdas:
	/// - check if all outer lambdas enclosing the 'capture-ready' lambda
	/// identified above in 'a' can also capture the variable (this is done
	/// via tryCaptureVariable for variables and CheckCXXThisCapture for
	/// 'this' by passing in the index of the Lambda identified in step 'a')
	///
	/// \param FunctionScopes - Sema's stack of nested FunctionScopeInfo's (which a
	/// LambdaScopeInfo inherits from). The current/deepest/innermost lambda
	/// is at the top of the stack.
	///
	/// \param VarToCapture - the variable to capture. If NULL, capture 'this'.
	///
	///
	/// \returns An std::optional<unsigned> Index that if evaluates to 'true'
	/// contains the index (into Sema's FunctionScopeInfo stack) of the innermost
	/// lambda which is capture-capable. If the return value evaluates to 'false'
	/// then no lambda is capture-capable for \p VarToCapture.

	std::optional<unsigned>
	clang::getStackIndexOfNearestEnclosingCaptureCapableLambda(
	ArrayRef<const sema::FunctionScopeInfo *> FunctionScopes,
	ValueDecl *VarToCapture, Sema &S) {

	const std::optional<unsigned> NoLambdaIsCaptureCapable;

	const std::optional<unsigned> OptionalStackIndex =
	getStackIndexOfNearestEnclosingCaptureReadyLambda(FunctionScopes,
	VarToCapture);
	if (!OptionalStackIndex)
	return NoLambdaIsCaptureCapable;

	const unsigned IndexOfCaptureReadyLambda = *OptionalStackIndex;
	assert(((IndexOfCaptureReadyLambda != (FunctionScopes.size() - 1)) \|\|
	S.getCurGenericLambda()) &&
	"The capture ready lambda for a potential capture can only be the "
	"current lambda if it is a generic lambda");

	const sema::LambdaScopeInfo *const CaptureReadyLambdaLSI =
	cast<sema::LambdaScopeInfo>(FunctionScopes[IndexOfCaptureReadyLambda]);

	// If VarToCapture is null, we are attempting to capture 'this'
	const bool IsCapturingThis = !VarToCapture;
	const bool IsCapturingVariable = !IsCapturingThis;

	if (IsCapturingVariable) {
	// Check if the capture-ready lambda can truly capture the variable, by
	// checking whether all enclosing lambdas of the capture-ready lambda allow
	// the capture - i.e. make sure it is capture-capable.
	QualType CaptureType, DeclRefType;
	const bool CanCaptureVariable =
	!S.tryCaptureVariable(VarToCapture,
	/ExprVarIsUsedInLoc/ SourceLocation(),
	clang::Sema::TryCapture_Implicit,
	/EllipsisLoc/ SourceLocation(),
	/BuildAndDiagnose/ false, CaptureType,
	DeclRefType, &IndexOfCaptureReadyLambda);
	if (!CanCaptureVariable)
	return NoLambdaIsCaptureCapable;
	} else {
	// Check if the capture-ready lambda can truly capture 'this' by checking
	// whether all enclosing lambdas of the capture-ready lambda can capture
	// 'this'.
	const bool CanCaptureThis =
	!S.CheckCXXThisCapture(
	CaptureReadyLambdaLSI->PotentialThisCaptureLocation,
	/Explicit/ false, /BuildAndDiagnose/ false,
	&IndexOfCaptureReadyLambda);
	if (!CanCaptureThis)
	return NoLambdaIsCaptureCapable;
	}
	return IndexOfCaptureReadyLambda;
	}

	static inline TemplateParameterList *
	getGenericLambdaTemplateParameterList(LambdaScopeInfo *LSI, Sema &SemaRef) {
	if (!LSI->GLTemplateParameterList && !LSI->TemplateParams.empty()) {
	LSI->GLTemplateParameterList = TemplateParameterList::Create(
	SemaRef.Context,
	/Template kw loc/ SourceLocation(),
	/L angle loc/ LSI->ExplicitTemplateParamsRange.getBegin(),
	LSI->TemplateParams,
	/R angle loc/LSI->ExplicitTemplateParamsRange.getEnd(),
	LSI->RequiresClause.get());
	}
	return LSI->GLTemplateParameterList;
	}

	CXXRecordDecl *
	Sema::createLambdaClosureType(SourceRange IntroducerRange, TypeSourceInfo *Info,
	unsigned LambdaDependencyKind,
	LambdaCaptureDefault CaptureDefault) {
	DeclContext *DC = CurContext;
	while (!(DC->isFunctionOrMethod() \|\| DC->isRecord() \|\| DC->isFileContext()))
	DC = DC->getParent();

	bool IsGenericLambda =
	Info && getGenericLambdaTemplateParameterList(getCurLambda(), *this);
	// Start constructing the lambda class.
	CXXRecordDecl *Class = CXXRecordDecl::CreateLambda(
	Context, DC, Info, IntroducerRange.getBegin(), LambdaDependencyKind,
	IsGenericLambda, CaptureDefault);
	DC->addDecl(Class);

	return Class;
	}

	/// Determine whether the given context is or is enclosed in an inline
	/// function.
	static bool isInInlineFunction(const DeclContext *DC) {
	while (!DC->isFileContext()) {
	if (const FunctionDecl *FD = dyn_cast<FunctionDecl>(DC))
	if (FD->isInlined())
	return true;

	DC = DC->getLexicalParent();
	}

	return false;
	}

	std::tuple<MangleNumberingContext , Decl >
	Sema::getCurrentMangleNumberContext(const DeclContext *DC) {
	// Compute the context for allocating mangling numbers in the current
	// expression, if the ABI requires them.
	Decl *ManglingContextDecl = ExprEvalContexts.back().ManglingContextDecl;

	enum ContextKind {
	Normal,
	DefaultArgument,
	DataMember,
	InlineVariable,
	TemplatedVariable,
	Concept
	} Kind = Normal;

	bool IsInNonspecializedTemplate =
	inTemplateInstantiation() \|\| CurContext->isDependentContext();

	// Default arguments of member function parameters that appear in a class
	// definition, as well as the initializers of data members, receive special
	// treatment. Identify them.
	if (ManglingContextDecl) {
	if (ParmVarDecl *Param = dyn_cast<ParmVarDecl>(ManglingContextDecl)) {
	if (const DeclContext *LexicalDC
	= Param->getDeclContext()->getLexicalParent())
	if (LexicalDC->isRecord())
	Kind = DefaultArgument;
	} else if (VarDecl *Var = dyn_cast<VarDecl>(ManglingContextDecl)) {
	if (Var->getMostRecentDecl()->isInline())
	Kind = InlineVariable;
	else if (Var->getDeclContext()->isRecord() && IsInNonspecializedTemplate)
	Kind = TemplatedVariable;
	else if (Var->getDescribedVarTemplate())
	Kind = TemplatedVariable;
	else if (auto *VTS = dyn_cast<VarTemplateSpecializationDecl>(Var)) {
	if (!VTS->isExplicitSpecialization())
	Kind = TemplatedVariable;
	}
	} else if (isa<FieldDecl>(ManglingContextDecl)) {
	Kind = DataMember;
	} else if (isa<ImplicitConceptSpecializationDecl>(ManglingContextDecl)) {
	Kind = Concept;
	}
	}

	// Itanium ABI [5.1.7]:
	// In the following contexts [...] the one-definition rule requires closure
	// types in different translation units to "correspond":
	switch (Kind) {
	case Normal: {
	// -- the bodies of inline or templated functions
	if ((IsInNonspecializedTemplate &&
	!(ManglingContextDecl && isa<ParmVarDecl>(ManglingContextDecl))) \|\|
	isInInlineFunction(CurContext)) {
	while (auto *CD = dyn_cast<CapturedDecl>(DC))
	DC = CD->getParent();
	return std::make_tuple(&Context.getManglingNumberContext(DC), nullptr);
	}

	return std::make_tuple(nullptr, nullptr);
	}

	case Concept:
	// Concept definitions aren't code generated and thus aren't mangled,
	// however the ManglingContextDecl is important for the purposes of
	// re-forming the template argument list of the lambda for constraint
	// evaluation.
	case DataMember:
	// -- default member initializers
	case DefaultArgument:
	// -- default arguments appearing in class definitions
	case InlineVariable:
	case TemplatedVariable:
	// -- the initializers of inline or templated variables
	return std::make_tuple(
	&Context.getManglingNumberContext(ASTContext::NeedExtraManglingDecl,
	ManglingContextDecl),
	ManglingContextDecl);
	}

	llvm_unreachable("unexpected context");
	}

	static QualType
	buildTypeForLambdaCallOperator(Sema &S, clang::CXXRecordDecl *Class,
	TemplateParameterList *TemplateParams,
	TypeSourceInfo *MethodTypeInfo) {
	assert(MethodTypeInfo && "expected a non null type");

	QualType MethodType = MethodTypeInfo->getType();
	// If a lambda appears in a dependent context or is a generic lambda (has
	// template parameters) and has an 'auto' return type, deduce it to a
	// dependent type.
	if (Class->isDependentContext() \|\| TemplateParams) {
	const FunctionProtoType *FPT = MethodType->castAs<FunctionProtoType>();
	QualType Result = FPT->getReturnType();
	if (Result->isUndeducedType()) {
	Result = S.SubstAutoTypeDependent(Result);
	MethodType = S.Context.getFunctionType(Result, FPT->getParamTypes(),
	FPT->getExtProtoInfo());
	}
	}
	return MethodType;
	}

	// [C++2b] [expr.prim.lambda.closure] p4
	// Given a lambda with a lambda-capture, the type of the explicit object
	// parameter, if any, of the lambda's function call operator (possibly
	// instantiated from a function call operator template) shall be either:
	// - the closure type,
	// - class type publicly and unambiguously derived from the closure type, or
	// - a reference to a possibly cv-qualified such type.
	bool Sema::DiagnoseInvalidExplicitObjectParameterInLambda(
	CXXMethodDecl *Method, SourceLocation CallLoc) {
	if (!isLambdaCallWithExplicitObjectParameter(Method))
	return false;
	CXXRecordDecl *RD = Method->getParent();
	if (Method->getType()->isDependentType())
	return false;
	if (RD->isCapturelessLambda())
	return false;

	ParmVarDecl *Param = Method->getParamDecl(0);
	QualType ExplicitObjectParameterType = Param->getType()
	.getNonReferenceType()
	.getUnqualifiedType()
	.getDesugaredType(getASTContext());
	QualType LambdaType = getASTContext().getRecordType(RD);
	if (LambdaType == ExplicitObjectParameterType)
	return false;

	// Don't check the same instantiation twice.
	//
	// If this call operator is ill-formed, there is no point in issuing
	// a diagnostic every time it is called because the problem is in the
	// definition of the derived type, not at the call site.
	//
	// FIXME: Move this check to where we instantiate the method? This should
	// be possible, but the naive approach of just marking the method as invalid
	// leads to us emitting more diagnostics than we should have to for this case
	// (1 error here and 1 error about there being no matching overload at the
	// call site). It might be possible to avoid that by also checking if there
	// is an empty cast path for the method stored in the context (signalling that
	// we've already diagnosed it) and then just not building the call, but that
	// doesn't really seem any simpler than diagnosing it at the call site...
	if (auto It = Context.LambdaCastPaths.find(Method);
	It != Context.LambdaCastPaths.end())
	return It->second.empty();

	CXXCastPath &Path = Context.LambdaCastPaths[Method];
	CXXBasePaths Paths(/FindAmbiguities=/true, /RecordPaths=/true,
	/DetectVirtual=/false);
	if (!IsDerivedFrom(RD->getLocation(), ExplicitObjectParameterType, LambdaType,
	Paths)) {
	Diag(Param->getLocation(), diag::err_invalid_explicit_object_type_in_lambda)
	<< ExplicitObjectParameterType;
	return true;
	}

	if (Paths.isAmbiguous(LambdaType->getCanonicalTypeUnqualified())) {
	std::string PathsDisplay = getAmbiguousPathsDisplayString(Paths);
	Diag(CallLoc, diag::err_explicit_object_lambda_ambiguous_base)
	<< LambdaType << PathsDisplay;
	return true;
	}

	if (CheckBaseClassAccess(CallLoc, LambdaType, ExplicitObjectParameterType,
	Paths.front(),
	diag::err_explicit_object_lambda_inaccessible_base))
	return true;

	BuildBasePathArray(Paths, Path);
	return false;
	}

	void Sema::handleLambdaNumbering(
	CXXRecordDecl Class, CXXMethodDecl Method,
	std::optional<CXXRecordDecl::LambdaNumbering> NumberingOverride) {
	if (NumberingOverride) {
	Class->setLambdaNumbering(*NumberingOverride);
	return;
	}

	ContextRAII ManglingContext(*this, Class->getDeclContext());

	auto getMangleNumberingContext =
	[this](CXXRecordDecl *Class,
	Decl ManglingContextDecl) -> MangleNumberingContext {
	// Get mangle numbering context if there's any extra decl context.
	if (ManglingContextDecl)
	return &Context.getManglingNumberContext(
	ASTContext::NeedExtraManglingDecl, ManglingContextDecl);
	// Otherwise, from that lambda's decl context.
	auto DC = Class->getDeclContext();
	while (auto *CD = dyn_cast<CapturedDecl>(DC))
	DC = CD->getParent();
	return &Context.getManglingNumberContext(DC);
	};

	CXXRecordDecl::LambdaNumbering Numbering;
	MangleNumberingContext *MCtx;
	std::tie(MCtx, Numbering.ContextDecl) =
	getCurrentMangleNumberContext(Class->getDeclContext());
	if (!MCtx && (getLangOpts().CUDA \|\| getLangOpts().SYCLIsDevice \|\|
	getLangOpts().SYCLIsHost)) {
	// Force lambda numbering in CUDA/HIP as we need to name lambdas following
	// ODR. Both device- and host-compilation need to have a consistent naming
	// on kernel functions. As lambdas are potential part of these `__global__`
	// function names, they needs numbering following ODR.
	// Also force for SYCL, since we need this for the
	// __builtin_sycl_unique_stable_name implementation, which depends on lambda
	// mangling.
	MCtx = getMangleNumberingContext(Class, Numbering.ContextDecl);
	assert(MCtx && "Retrieving mangle numbering context failed!");
	Numbering.HasKnownInternalLinkage = true;
	}
	if (MCtx) {
	Numbering.IndexInContext = MCtx->getNextLambdaIndex();
	Numbering.ManglingNumber = MCtx->getManglingNumber(Method);
	Numbering.DeviceManglingNumber = MCtx->getDeviceManglingNumber(Method);
	Class->setLambdaNumbering(Numbering);

	if (auto *Source =
	dyn_cast_or_null<ExternalSemaSource>(Context.getExternalSource()))
	Source->AssignedLambdaNumbering(Class);
	}
	}

	static void buildLambdaScopeReturnType(Sema &S, LambdaScopeInfo *LSI,
	CXXMethodDecl *CallOperator,
	bool ExplicitResultType) {
	if (ExplicitResultType) {
	LSI->HasImplicitReturnType = false;
	LSI->ReturnType = CallOperator->getReturnType();
	if (!LSI->ReturnType->isDependentType() && !LSI->ReturnType->isVoidType())
	S.RequireCompleteType(CallOperator->getBeginLoc(), LSI->ReturnType,
	diag::err_lambda_incomplete_result);
	} else {
	LSI->HasImplicitReturnType = true;
	}
	}

	void Sema::buildLambdaScope(LambdaScopeInfo LSI, CXXMethodDecl CallOperator,
	SourceRange IntroducerRange,
	LambdaCaptureDefault CaptureDefault,
	SourceLocation CaptureDefaultLoc,
	bool ExplicitParams, bool Mutable) {
	LSI->CallOperator = CallOperator;
	CXXRecordDecl *LambdaClass = CallOperator->getParent();
	LSI->Lambda = LambdaClass;
	if (CaptureDefault == LCD_ByCopy)
	LSI->ImpCaptureStyle = LambdaScopeInfo::ImpCap_LambdaByval;
	else if (CaptureDefault == LCD_ByRef)
	LSI->ImpCaptureStyle = LambdaScopeInfo::ImpCap_LambdaByref;
	LSI->CaptureDefaultLoc = CaptureDefaultLoc;
	LSI->IntroducerRange = IntroducerRange;
	LSI->ExplicitParams = ExplicitParams;
	LSI->Mutable = Mutable;
	}

	void Sema::finishLambdaExplicitCaptures(LambdaScopeInfo *LSI) {
	LSI->finishedExplicitCaptures();
	}

	void Sema::ActOnLambdaExplicitTemplateParameterList(
	LambdaIntroducer &Intro, SourceLocation LAngleLoc,
	ArrayRef<NamedDecl *> TParams, SourceLocation RAngleLoc,
	ExprResult RequiresClause) {
	LambdaScopeInfo *LSI = getCurLambda();
	assert(LSI && "Expected a lambda scope");
	assert(LSI->NumExplicitTemplateParams == 0 &&
	"Already acted on explicit template parameters");
	assert(LSI->TemplateParams.empty() &&
	"Explicit template parameters should come "
	"before invented (auto) ones");
	assert(!TParams.empty() &&
	"No template parameters to act on");
	LSI->TemplateParams.append(TParams.begin(), TParams.end());
	LSI->NumExplicitTemplateParams = TParams.size();
	LSI->ExplicitTemplateParamsRange = {LAngleLoc, RAngleLoc};
	LSI->RequiresClause = RequiresClause;
	}

	/// If this expression is an enumerator-like expression of some type
	/// T, return the type T; otherwise, return null.
	///
	/// Pointer comparisons on the result here should always work because
	/// it's derived from either the parent of an EnumConstantDecl
	/// (i.e. the definition) or the declaration returned by
	/// EnumType::getDecl() (i.e. the definition).
	static EnumDecl findEnumForBlockReturn(Expr E) {
	// An expression is an enumerator-like expression of type T if,
	// ignoring parens and parens-like expressions:
	E = E->IgnoreParens();

	// - it is an enumerator whose enum type is T or
	if (DeclRefExpr *DRE = dyn_cast<DeclRefExpr>(E)) {
	if (EnumConstantDecl *D
	= dyn_cast<EnumConstantDecl>(DRE->getDecl())) {
	return cast<EnumDecl>(D->getDeclContext());
	}
	return nullptr;
	}

	// - it is a comma expression whose RHS is an enumerator-like
	// expression of type T or
	if (BinaryOperator *BO = dyn_cast<BinaryOperator>(E)) {
	if (BO->getOpcode() == BO_Comma)
	return findEnumForBlockReturn(BO->getRHS());
	return nullptr;
	}

	// - it is a statement-expression whose value expression is an
	// enumerator-like expression of type T or
	if (StmtExpr *SE = dyn_cast<StmtExpr>(E)) {
	if (Expr *last = dyn_cast_or_null<Expr>(SE->getSubStmt()->body_back()))
	return findEnumForBlockReturn(last);
	return nullptr;
	}

	// - it is a ternary conditional operator (not the GNU ?:
	// extension) whose second and third operands are
	// enumerator-like expressions of type T or
	if (ConditionalOperator *CO = dyn_cast<ConditionalOperator>(E)) {
	if (EnumDecl *ED = findEnumForBlockReturn(CO->getTrueExpr()))
	if (ED == findEnumForBlockReturn(CO->getFalseExpr()))
	return ED;
	return nullptr;
	}

	// (implicitly:)
	// - it is an implicit integral conversion applied to an
	// enumerator-like expression of type T or
	if (ImplicitCastExpr *ICE = dyn_cast<ImplicitCastExpr>(E)) {
	// We can sometimes see integral conversions in valid
	// enumerator-like expressions.
	if (ICE->getCastKind() == CK_IntegralCast)
	return findEnumForBlockReturn(ICE->getSubExpr());

	// Otherwise, just rely on the type.
	}

	// - it is an expression of that formal enum type.
	if (const EnumType *ET = E->getType()->getAs<EnumType>()) {
	return ET->getDecl();
	}

	// Otherwise, nope.
	return nullptr;
	}

	/// Attempt to find a type T for which the returned expression of the
	/// given statement is an enumerator-like expression of that type.
	static EnumDecl findEnumForBlockReturn(ReturnStmt ret) {
	if (Expr *retValue = ret->getRetValue())
	return findEnumForBlockReturn(retValue);
	return nullptr;
	}

	/// Attempt to find a common type T for which all of the returned
	/// expressions in a block are enumerator-like expressions of that
	/// type.
	static EnumDecl findCommonEnumForBlockReturns(ArrayRef<ReturnStmt> returns) {
	ArrayRef<ReturnStmt*>::iterator i = returns.begin(), e = returns.end();

	// Try to find one for the first return.
	EnumDecl ED = findEnumForBlockReturn(i);
	if (!ED) return nullptr;

	// Check that the rest of the returns have the same enum.
	for (++i; i != e; ++i) {
	if (findEnumForBlockReturn(*i) != ED)
	return nullptr;
	}

	// Never infer an anonymous enum type.
	if (!ED->hasNameForLinkage()) return nullptr;

	return ED;
	}

	/// Adjust the given return statements so that they formally return
	/// the given type. It should require, at most, an IntegralCast.
	static void adjustBlockReturnsToEnum(Sema &S, ArrayRef<ReturnStmt*> returns,
	QualType returnType) {
	for (ArrayRef<ReturnStmt*>::iterator
	i = returns.begin(), e = returns.end(); i != e; ++i) {
	ReturnStmt ret = i;
	Expr *retValue = ret->getRetValue();
	if (S.Context.hasSameType(retValue->getType(), returnType))
	continue;

	// Right now we only support integral fixup casts.
	assert(returnType->isIntegralOrUnscopedEnumerationType());
	assert(retValue->getType()->isIntegralOrUnscopedEnumerationType());

	ExprWithCleanups *cleanups = dyn_cast<ExprWithCleanups>(retValue);

	Expr *E = (cleanups ? cleanups->getSubExpr() : retValue);
	E = ImplicitCastExpr::Create(S.Context, returnType, CK_IntegralCast, E,
	/base path/ nullptr, VK_PRValue,
	FPOptionsOverride());
	if (cleanups) {
	cleanups->setSubExpr(E);
	} else {
	ret->setRetValue(E);
	}
	}
	}

	void Sema::deduceClosureReturnType(CapturingScopeInfo &CSI) {
	assert(CSI.HasImplicitReturnType);
	// If it was ever a placeholder, it had to been deduced to DependentTy.
	assert(CSI.ReturnType.isNull() \|\| !CSI.ReturnType->isUndeducedType());
	assert((!isa<LambdaScopeInfo>(CSI) \|\| !getLangOpts().CPlusPlus14) &&
	"lambda expressions use auto deduction in C++14 onwards");

	// C++ core issue 975:
	// If a lambda-expression does not include a trailing-return-type,
	// it is as if the trailing-return-type denotes the following type:
	// - if there are no return statements in the compound-statement,
	// or all return statements return either an expression of type
	// void or no expression or braced-init-list, the type void;
	// - otherwise, if all return statements return an expression
	// and the types of the returned expressions after
	// lvalue-to-rvalue conversion (4.1 [conv.lval]),
	// array-to-pointer conversion (4.2 [conv.array]), and
	// function-to-pointer conversion (4.3 [conv.func]) are the
	// same, that common type;
	// - otherwise, the program is ill-formed.
	//
	// C++ core issue 1048 additionally removes top-level cv-qualifiers
	// from the types of returned expressions to match the C++14 auto
	// deduction rules.
	//
	// In addition, in blocks in non-C++ modes, if all of the return
	// statements are enumerator-like expressions of some type T, where
	// T has a name for linkage, then we infer the return type of the
	// block to be that type.

	// First case: no return statements, implicit void return type.
	ASTContext &Ctx = getASTContext();
	if (CSI.Returns.empty()) {
	// It's possible there were simply no /valid/ return statements.
	// In this case, the first one we found may have at least given us a type.
	if (CSI.ReturnType.isNull())
	CSI.ReturnType = Ctx.VoidTy;
	return;
	}

	// Second case: at least one return statement has dependent type.
	// Delay type checking until instantiation.
	assert(!CSI.ReturnType.isNull() && "We should have a tentative return type.");
	if (CSI.ReturnType->isDependentType())
	return;

	// Try to apply the enum-fuzz rule.
	if (!getLangOpts().CPlusPlus) {
	assert(isa<BlockScopeInfo>(CSI));
	const EnumDecl *ED = findCommonEnumForBlockReturns(CSI.Returns);
	if (ED) {
	CSI.ReturnType = Context.getTypeDeclType(ED);
	adjustBlockReturnsToEnum(*this, CSI.Returns, CSI.ReturnType);
	return;
	}
	}

	// Third case: only one return statement. Don't bother doing extra work!
	if (CSI.Returns.size() == 1)
	return;

	// General case: many return statements.
	// Check that they all have compatible return types.

	// We require the return types to strictly match here.
	// Note that we've already done the required promotions as part of
	// processing the return statement.
	for (const ReturnStmt *RS : CSI.Returns) {
	const Expr *RetE = RS->getRetValue();

	QualType ReturnType =
	(RetE ? RetE->getType() : Context.VoidTy).getUnqualifiedType();
	if (Context.getCanonicalFunctionResultType(ReturnType) ==
	Context.getCanonicalFunctionResultType(CSI.ReturnType)) {
	// Use the return type with the strictest possible nullability annotation.
	auto RetTyNullability = ReturnType->getNullability();
	auto BlockNullability = CSI.ReturnType->getNullability();
	if (BlockNullability &&
	(!RetTyNullability \|\|
	hasWeakerNullability(RetTyNullability, BlockNullability)))
	CSI.ReturnType = ReturnType;
	continue;
	}

	// FIXME: This is a poor diagnostic for ReturnStmts without expressions.
	// TODO: It's possible that the first return is the divergent one.
	Diag(RS->getBeginLoc(),
	diag::err_typecheck_missing_return_type_incompatible)
	<< ReturnType << CSI.ReturnType << isa<LambdaScopeInfo>(CSI);
	// Continue iterating so that we keep emitting diagnostics.
	}
	}

	QualType Sema::buildLambdaInitCaptureInitialization(
	SourceLocation Loc, bool ByRef, SourceLocation EllipsisLoc,
	std::optional<unsigned> NumExpansions, IdentifierInfo *Id,
	bool IsDirectInit, Expr *&Init) {
	// Create an 'auto' or 'auto&' TypeSourceInfo that we can use to
	// deduce against.
	QualType DeductType = Context.getAutoDeductType();
	TypeLocBuilder TLB;
	AutoTypeLoc TL = TLB.push<AutoTypeLoc>(DeductType);
	TL.setNameLoc(Loc);
	if (ByRef) {
	DeductType = BuildReferenceType(DeductType, true, Loc, Id);
	assert(!DeductType.isNull() && "can't build reference to auto");
	TLB.push<ReferenceTypeLoc>(DeductType).setSigilLoc(Loc);
	}
	if (EllipsisLoc.isValid()) {
	if (Init->containsUnexpandedParameterPack()) {
	Diag(EllipsisLoc, getLangOpts().CPlusPlus20
	? diag::warn_cxx17_compat_init_capture_pack
	: diag::ext_init_capture_pack);
	DeductType = Context.getPackExpansionType(DeductType, NumExpansions,
	/ExpectPackInType=/false);
	TLB.push<PackExpansionTypeLoc>(DeductType).setEllipsisLoc(EllipsisLoc);
	} else {
	// Just ignore the ellipsis for now and form a non-pack variable. We'll
	// diagnose this later when we try to capture it.
	}
	}
	TypeSourceInfo *TSI = TLB.getTypeSourceInfo(Context, DeductType);

	// Deduce the type of the init capture.
	QualType DeducedType = deduceVarTypeFromInitializer(
	/VarDecl/nullptr, DeclarationName(Id), DeductType, TSI,
	SourceRange(Loc, Loc), IsDirectInit, Init);
	if (DeducedType.isNull())
	return QualType();

	// Are we a non-list direct initialization?
	ParenListExpr *CXXDirectInit = dyn_cast<ParenListExpr>(Init);

	// Perform initialization analysis and ensure any implicit conversions
	// (such as lvalue-to-rvalue) are enforced.
	InitializedEntity Entity =
	InitializedEntity::InitializeLambdaCapture(Id, DeducedType, Loc);
	InitializationKind Kind =
	IsDirectInit
	? (CXXDirectInit ? InitializationKind::CreateDirect(
	Loc, Init->getBeginLoc(), Init->getEndLoc())
	: InitializationKind::CreateDirectList(Loc))
	: InitializationKind::CreateCopy(Loc, Init->getBeginLoc());

	MultiExprArg Args = Init;
	if (CXXDirectInit)
	Args =
	MultiExprArg(CXXDirectInit->getExprs(), CXXDirectInit->getNumExprs());
	QualType DclT;
	InitializationSequence InitSeq(*this, Entity, Kind, Args);
	ExprResult Result = InitSeq.Perform(*this, Entity, Kind, Args, &DclT);

	if (Result.isInvalid())
	return QualType();

	Init = Result.getAs<Expr>();
	return DeducedType;
	}

	VarDecl *Sema::createLambdaInitCaptureVarDecl(
	SourceLocation Loc, QualType InitCaptureType, SourceLocation EllipsisLoc,
	IdentifierInfo Id, unsigned InitStyle, Expr Init, DeclContext *DeclCtx) {
	// FIXME: Retain the TypeSourceInfo from buildLambdaInitCaptureInitialization
	// rather than reconstructing it here.
	TypeSourceInfo *TSI = Context.getTrivialTypeSourceInfo(InitCaptureType, Loc);
	if (auto PETL = TSI->getTypeLoc().getAs<PackExpansionTypeLoc>())
	PETL.setEllipsisLoc(EllipsisLoc);

	// Create a dummy variable representing the init-capture. This is not actually
	// used as a variable, and only exists as a way to name and refer to the
	// init-capture.
	// FIXME: Pass in separate source locations for '&' and identifier.
	VarDecl *NewVD = VarDecl::Create(Context, DeclCtx, Loc, Loc, Id,
	InitCaptureType, TSI, SC_Auto);
	NewVD->setInitCapture(true);
	NewVD->setReferenced(true);
	// FIXME: Pass in a VarDecl::InitializationStyle.
	NewVD->setInitStyle(static_cast<VarDecl::InitializationStyle>(InitStyle));
	NewVD->markUsed(Context);
	NewVD->setInit(Init);
	if (NewVD->isParameterPack())
	getCurLambda()->LocalPacks.push_back(NewVD);
	return NewVD;
	}

	void Sema::addInitCapture(LambdaScopeInfo LSI, VarDecl Var, bool ByRef) {
	assert(Var->isInitCapture() && "init capture flag should be set");
	LSI->addCapture(Var, /isBlock=/false, ByRef,
	/isNested=/false, Var->getLocation(), SourceLocation(),
	Var->getType(), /Invalid=/false);
	}

	// Unlike getCurLambda, getCurrentLambdaScopeUnsafe doesn't
	// check that the current lambda is in a consistent or fully constructed state.
	static LambdaScopeInfo *getCurrentLambdaScopeUnsafe(Sema &S) {
	assert(!S.FunctionScopes.empty());
	return cast<LambdaScopeInfo>(S.FunctionScopes[S.FunctionScopes.size() - 1]);
	}

	static TypeSourceInfo *
	getDummyLambdaType(Sema &S, SourceLocation Loc = SourceLocation()) {
	// C++11 [expr.prim.lambda]p4:
	// If a lambda-expression does not include a lambda-declarator, it is as
	// if the lambda-declarator were ().
	FunctionProtoType::ExtProtoInfo EPI(S.Context.getDefaultCallingConvention(
	/IsVariadic=/false, /IsCXXMethod=/true));
	EPI.HasTrailingReturn = true;
	EPI.TypeQuals.addConst();
	LangAS AS = S.getDefaultCXXMethodAddrSpace();
	if (AS != LangAS::Default)
	EPI.TypeQuals.addAddressSpace(AS);

	// C++1y [expr.prim.lambda]:
	// The lambda return type is 'auto', which is replaced by the
	// trailing-return type if provided and/or deduced from 'return'
	// statements
	// We don't do this before C++1y, because we don't support deduced return
	// types there.
	QualType DefaultTypeForNoTrailingReturn = S.getLangOpts().CPlusPlus14
	? S.Context.getAutoDeductType()
	: S.Context.DependentTy;
	QualType MethodTy = S.Context.getFunctionType(DefaultTypeForNoTrailingReturn,
	std::nullopt, EPI);
	return S.Context.getTrivialTypeSourceInfo(MethodTy, Loc);
	}

	static TypeSourceInfo *getLambdaType(Sema &S, LambdaIntroducer &Intro,
	Declarator &ParamInfo, Scope *CurScope,
	SourceLocation Loc,
	bool &ExplicitResultType) {

	ExplicitResultType = false;

	assert(
	(ParamInfo.getDeclSpec().getStorageClassSpec() ==
	DeclSpec::SCS_unspecified \|\|
	ParamInfo.getDeclSpec().getStorageClassSpec() == DeclSpec::SCS_static) &&
	"Unexpected storage specifier");
	bool IsLambdaStatic =
	ParamInfo.getDeclSpec().getStorageClassSpec() == DeclSpec::SCS_static;

	TypeSourceInfo *MethodTyInfo;

	if (ParamInfo.getNumTypeObjects() == 0) {
	MethodTyInfo = getDummyLambdaType(S, Loc);
	} else {
	// Check explicit parameters
	S.CheckExplicitObjectLambda(ParamInfo);

	DeclaratorChunk::FunctionTypeInfo &FTI = ParamInfo.getFunctionTypeInfo();

	bool HasExplicitObjectParameter =
	ParamInfo.isExplicitObjectMemberFunction();

	ExplicitResultType = FTI.hasTrailingReturnType();
	if (!FTI.hasMutableQualifier() && !IsLambdaStatic &&
	!HasExplicitObjectParameter)
	FTI.getOrCreateMethodQualifiers().SetTypeQual(DeclSpec::TQ_const, Loc);

	if (ExplicitResultType && S.getLangOpts().HLSL) {
	QualType RetTy = FTI.getTrailingReturnType().get();
	if (!RetTy.isNull()) {
	// HLSL does not support specifying an address space on a lambda return
	// type.
	LangAS AddressSpace = RetTy.getAddressSpace();
	if (AddressSpace != LangAS::Default)
	S.Diag(FTI.getTrailingReturnTypeLoc(),
	diag::err_return_value_with_address_space);
	}
	}

	MethodTyInfo = S.GetTypeForDeclarator(ParamInfo);
	assert(MethodTyInfo && "no type from lambda-declarator");

	// Check for unexpanded parameter packs in the method type.
	if (MethodTyInfo->getType()->containsUnexpandedParameterPack())
	S.DiagnoseUnexpandedParameterPack(Intro.Range.getBegin(), MethodTyInfo,
	S.UPPC_DeclarationType);
	}
	return MethodTyInfo;
	}

	CXXMethodDecl *Sema::CreateLambdaCallOperator(SourceRange IntroducerRange,
	CXXRecordDecl *Class) {

	// C++20 [expr.prim.lambda.closure]p3:
	// The closure type for a lambda-expression has a public inline function
	// call operator (for a non-generic lambda) or function call operator
	// template (for a generic lambda) whose parameters and return type are
	// described by the lambda-expression's parameter-declaration-clause
	// and trailing-return-type respectively.
	DeclarationName MethodName =
	Context.DeclarationNames.getCXXOperatorName(OO_Call);
	DeclarationNameLoc MethodNameLoc =
	DeclarationNameLoc::makeCXXOperatorNameLoc(IntroducerRange.getBegin());
	CXXMethodDecl *Method = CXXMethodDecl::Create(
	Context, Class, SourceLocation(),
	DeclarationNameInfo(MethodName, IntroducerRange.getBegin(),
	MethodNameLoc),
	QualType(), /Tinfo=/nullptr, SC_None,
	getCurFPFeatures().isFPConstrained(),
	/isInline=/true, ConstexprSpecKind::Unspecified, SourceLocation(),
	/TrailingRequiresClause=/nullptr);
	Method->setAccess(AS_public);
	return Method;
	}

	void Sema::AddTemplateParametersToLambdaCallOperator(
	CXXMethodDecl CallOperator, CXXRecordDecl Class,
	TemplateParameterList *TemplateParams) {
	assert(TemplateParams && "no template parameters");
	FunctionTemplateDecl *TemplateMethod = FunctionTemplateDecl::Create(
	Context, Class, CallOperator->getLocation(), CallOperator->getDeclName(),
	TemplateParams, CallOperator);
	TemplateMethod->setAccess(AS_public);
	CallOperator->setDescribedFunctionTemplate(TemplateMethod);
	}

	void Sema::CompleteLambdaCallOperator(
	CXXMethodDecl *Method, SourceLocation LambdaLoc,
	SourceLocation CallOperatorLoc, Expr *TrailingRequiresClause,
	TypeSourceInfo *MethodTyInfo, ConstexprSpecKind ConstexprKind,
	StorageClass SC, ArrayRef<ParmVarDecl *> Params,
	bool HasExplicitResultType) {

	LambdaScopeInfo LSI = getCurrentLambdaScopeUnsafe(this);

	if (TrailingRequiresClause)
	Method->setTrailingRequiresClause(TrailingRequiresClause);

	TemplateParameterList *TemplateParams =
	getGenericLambdaTemplateParameterList(LSI, *this);

	DeclContext *DC = Method->getLexicalDeclContext();
	Method->setLexicalDeclContext(LSI->Lambda);
	if (TemplateParams) {
	FunctionTemplateDecl *TemplateMethod =
	Method->getDescribedFunctionTemplate();
	assert(TemplateMethod &&
	"AddTemplateParametersToLambdaCallOperator should have been called");

	LSI->Lambda->addDecl(TemplateMethod);
	TemplateMethod->setLexicalDeclContext(DC);
	} else {
	LSI->Lambda->addDecl(Method);
	}
	LSI->Lambda->setLambdaIsGeneric(TemplateParams);
	LSI->Lambda->setLambdaTypeInfo(MethodTyInfo);

	Method->setLexicalDeclContext(DC);
	Method->setLocation(LambdaLoc);
	Method->setInnerLocStart(CallOperatorLoc);
	Method->setTypeSourceInfo(MethodTyInfo);
	Method->setType(buildTypeForLambdaCallOperator(*this, LSI->Lambda,
	TemplateParams, MethodTyInfo));
	Method->setConstexprKind(ConstexprKind);
	Method->setStorageClass(SC);
	if (!Params.empty()) {
	CheckParmsForFunctionDef(Params, /CheckParameterNames=/false);
	Method->setParams(Params);
	for (auto P : Method->parameters()) {
	assert(P && "null in a parameter list");
	P->setOwningFunction(Method);
	}
	}

	buildLambdaScopeReturnType(*this, LSI, Method, HasExplicitResultType);
	}

	void Sema::ActOnLambdaExpressionAfterIntroducer(LambdaIntroducer &Intro,
	Scope *CurrentScope) {

	LambdaScopeInfo *LSI = getCurLambda();
	assert(LSI && "LambdaScopeInfo should be on stack!");

	if (Intro.Default == LCD_ByCopy)
	LSI->ImpCaptureStyle = LambdaScopeInfo::ImpCap_LambdaByval;
	else if (Intro.Default == LCD_ByRef)
	LSI->ImpCaptureStyle = LambdaScopeInfo::ImpCap_LambdaByref;
	LSI->CaptureDefaultLoc = Intro.DefaultLoc;
	LSI->IntroducerRange = Intro.Range;
	LSI->AfterParameterList = false;

	assert(LSI->NumExplicitTemplateParams == 0);

	// Determine if we're within a context where we know that the lambda will
	// be dependent, because there are template parameters in scope.
	CXXRecordDecl::LambdaDependencyKind LambdaDependencyKind =
	CXXRecordDecl::LDK_Unknown;
	if (CurScope->getTemplateParamParent() != nullptr) {
	LambdaDependencyKind = CXXRecordDecl::LDK_AlwaysDependent;
	} else if (Scope *P = CurScope->getParent()) {
	// Given a lambda defined inside a requires expression,
	//
	// struct S {
	// S(auto var) requires requires { [&] -> decltype(var) { }; }
	// {}
	// };
	//
	// The parameter var is not injected into the function Decl at the point of
	// parsing lambda. In such scenarios, perceiving it as dependent could
	// result in the constraint being evaluated, which matches what GCC does.
	while (P->getEntity() && P->getEntity()->isRequiresExprBody())
	P = P->getParent();
	if (P->isFunctionDeclarationScope() &&
	llvm::any_of(P->decls(), [](Decl *D) {
	return isa<ParmVarDecl>(D) &&
	cast<ParmVarDecl>(D)->getType()->isTemplateTypeParmType();
	}))
	LambdaDependencyKind = CXXRecordDecl::LDK_AlwaysDependent;
	}

	CXXRecordDecl *Class = createLambdaClosureType(
	Intro.Range, /Info=/nullptr, LambdaDependencyKind, Intro.Default);
	LSI->Lambda = Class;

	CXXMethodDecl *Method = CreateLambdaCallOperator(Intro.Range, Class);
	LSI->CallOperator = Method;
	Method->setLexicalDeclContext(CurContext);

	PushDeclContext(CurScope, Method);

	bool ContainsUnexpandedParameterPack = false;

	// Distinct capture names, for diagnostics.
	llvm::DenseMap<IdentifierInfo , ValueDecl > CaptureNames;

	// Handle explicit captures.
	SourceLocation PrevCaptureLoc =
	Intro.Default == LCD_None ? Intro.Range.getBegin() : Intro.DefaultLoc;
	for (auto C = Intro.Captures.begin(), E = Intro.Captures.end(); C != E;
	PrevCaptureLoc = C->Loc, ++C) {
	if (C->Kind == LCK_This \|\| C->Kind == LCK_StarThis) {
	if (C->Kind == LCK_StarThis)
	Diag(C->Loc, !getLangOpts().CPlusPlus17
	? diag::ext_star_this_lambda_capture_cxx17
	: diag::warn_cxx14_compat_star_this_lambda_capture);

	// C++11 [expr.prim.lambda]p8:
	// An identifier or this shall not appear more than once in a
	// lambda-capture.
	if (LSI->isCXXThisCaptured()) {
	Diag(C->Loc, diag::err_capture_more_than_once)
	<< "'this'" << SourceRange(LSI->getCXXThisCapture().getLocation())
	<< FixItHint::CreateRemoval(
	SourceRange(getLocForEndOfToken(PrevCaptureLoc), C->Loc));
	continue;
	}

	// C++20 [expr.prim.lambda]p8:
	// If a lambda-capture includes a capture-default that is =,
	// each simple-capture of that lambda-capture shall be of the form
	// "&identifier", "this", or "* this". [ Note: The form [&,this] is
	// redundant but accepted for compatibility with ISO C++14. --end note ]
	if (Intro.Default == LCD_ByCopy && C->Kind != LCK_StarThis)
	Diag(C->Loc, !getLangOpts().CPlusPlus20
	? diag::ext_equals_this_lambda_capture_cxx20
	: diag::warn_cxx17_compat_equals_this_lambda_capture);

	// C++11 [expr.prim.lambda]p12:
	// If this is captured by a local lambda expression, its nearest
	// enclosing function shall be a non-static member function.
	QualType ThisCaptureType = getCurrentThisType();
	if (ThisCaptureType.isNull()) {
	Diag(C->Loc, diag::err_this_capture) << true;
	continue;
	}

	CheckCXXThisCapture(C->Loc, /Explicit=/true, /BuildAndDiagnose/ true,
	/FunctionScopeIndexToStopAtPtr/ nullptr,
	C->Kind == LCK_StarThis);
	if (!LSI->Captures.empty())
	LSI->ExplicitCaptureRanges[LSI->Captures.size() - 1] = C->ExplicitRange;
	continue;
	}

	assert(C->Id && "missing identifier for capture");

	if (C->Init.isInvalid())
	continue;

	ValueDecl *Var = nullptr;
	if (C->Init.isUsable()) {
	Diag(C->Loc, getLangOpts().CPlusPlus14
	? diag::warn_cxx11_compat_init_capture
	: diag::ext_init_capture);

	// If the initializer expression is usable, but the InitCaptureType
	// is not, then an error has occurred - so ignore the capture for now.
	// for e.g., [n{0}] { }; <-- if no <initializer_list> is included.
	// FIXME: we should create the init capture variable and mark it invalid
	// in this case.
	if (C->InitCaptureType.get().isNull())
	continue;

	if (C->Init.get()->containsUnexpandedParameterPack() &&
	!C->InitCaptureType.get()->getAs<PackExpansionType>())
	DiagnoseUnexpandedParameterPack(C->Init.get(), UPPC_Initializer);

	unsigned InitStyle;
	switch (C->InitKind) {
	case LambdaCaptureInitKind::NoInit:
	llvm_unreachable("not an init-capture?");
	case LambdaCaptureInitKind::CopyInit:
	InitStyle = VarDecl::CInit;
	break;
	case LambdaCaptureInitKind::DirectInit:
	InitStyle = VarDecl::CallInit;
	break;
	case LambdaCaptureInitKind::ListInit:
	InitStyle = VarDecl::ListInit;
	break;
	}
	Var = createLambdaInitCaptureVarDecl(C->Loc, C->InitCaptureType.get(),
	C->EllipsisLoc, C->Id, InitStyle,
	C->Init.get(), Method);
	assert(Var && "createLambdaInitCaptureVarDecl returned a null VarDecl?");
	if (auto *V = dyn_cast<VarDecl>(Var))
	CheckShadow(CurrentScope, V);
	PushOnScopeChains(Var, CurrentScope, false);
	} else {
	assert(C->InitKind == LambdaCaptureInitKind::NoInit &&
	"init capture has valid but null init?");

	// C++11 [expr.prim.lambda]p8:
	// If a lambda-capture includes a capture-default that is &, the
	// identifiers in the lambda-capture shall not be preceded by &.
	// If a lambda-capture includes a capture-default that is =, [...]
	// each identifier it contains shall be preceded by &.
	if (C->Kind == LCK_ByRef && Intro.Default == LCD_ByRef) {
	Diag(C->Loc, diag::err_reference_capture_with_reference_default)
	<< FixItHint::CreateRemoval(
	SourceRange(getLocForEndOfToken(PrevCaptureLoc), C->Loc));
	continue;
	} else if (C->Kind == LCK_ByCopy && Intro.Default == LCD_ByCopy) {
	Diag(C->Loc, diag::err_copy_capture_with_copy_default)
	<< FixItHint::CreateRemoval(
	SourceRange(getLocForEndOfToken(PrevCaptureLoc), C->Loc));
	continue;
	}

	// C++11 [expr.prim.lambda]p10:
	// The identifiers in a capture-list are looked up using the usual
	// rules for unqualified name lookup (3.4.1)
	DeclarationNameInfo Name(C->Id, C->Loc);
	LookupResult R(*this, Name, LookupOrdinaryName);
	LookupName(R, CurScope);
	if (R.isAmbiguous())
	continue;
	if (R.empty()) {
	// FIXME: Disable corrections that would add qualification?
	CXXScopeSpec ScopeSpec;
	DeclFilterCCC<VarDecl> Validator{};
	if (DiagnoseEmptyLookup(CurScope, ScopeSpec, R, Validator))
	continue;
	}

	if (auto *BD = R.getAsSingle<BindingDecl>())
	Var = BD;
	else if (R.getAsSingle<FieldDecl>()) {
	Diag(C->Loc, diag::err_capture_class_member_does_not_name_variable)
	<< C->Id;
	continue;
	} else
	Var = R.getAsSingle<VarDecl>();
	if (Var && DiagnoseUseOfDecl(Var, C->Loc))
	continue;
	}

	// C++11 [expr.prim.lambda]p10:
	// [...] each such lookup shall find a variable with automatic storage
	// duration declared in the reaching scope of the local lambda expression.
	// Note that the 'reaching scope' check happens in tryCaptureVariable().
	if (!Var) {
	Diag(C->Loc, diag::err_capture_does_not_name_variable) << C->Id;
	continue;
	}

	// C++11 [expr.prim.lambda]p8:
	// An identifier or this shall not appear more than once in a
	// lambda-capture.
	if (auto [It, Inserted] = CaptureNames.insert(std::pair{C->Id, Var});
	!Inserted) {
	if (C->InitKind == LambdaCaptureInitKind::NoInit &&
	!Var->isInitCapture()) {
	Diag(C->Loc, diag::err_capture_more_than_once)
	<< C->Id << It->second->getBeginLoc()
	<< FixItHint::CreateRemoval(
	SourceRange(getLocForEndOfToken(PrevCaptureLoc), C->Loc));
	Var->setInvalidDecl();
	} else if (Var && Var->isPlaceholderVar(getLangOpts())) {
	DiagPlaceholderVariableDefinition(C->Loc);
	} else {
	// Previous capture captured something different (one or both was
	// an init-capture): no fixit.
	Diag(C->Loc, diag::err_capture_more_than_once) << C->Id;
	continue;
	}
	}

	// Ignore invalid decls; they'll just confuse the code later.
	if (Var->isInvalidDecl())
	continue;

	VarDecl *Underlying = Var->getPotentiallyDecomposedVarDecl();

	if (!Underlying->hasLocalStorage()) {
	Diag(C->Loc, diag::err_capture_non_automatic_variable) << C->Id;
	Diag(Var->getLocation(), diag::note_previous_decl) << C->Id;
	continue;
	}

	// C++11 [expr.prim.lambda]p23:
	// A capture followed by an ellipsis is a pack expansion (14.5.3).
	SourceLocation EllipsisLoc;
	if (C->EllipsisLoc.isValid()) {
	if (Var->isParameterPack()) {
	EllipsisLoc = C->EllipsisLoc;
	} else {
	Diag(C->EllipsisLoc, diag::err_pack_expansion_without_parameter_packs)
	<< (C->Init.isUsable() ? C->Init.get()->getSourceRange()
	: SourceRange(C->Loc));

	// Just ignore the ellipsis.
	}
	} else if (Var->isParameterPack()) {
	ContainsUnexpandedParameterPack = true;
	}

	if (C->Init.isUsable()) {
	addInitCapture(LSI, cast<VarDecl>(Var), C->Kind == LCK_ByRef);
	- PushOnScopeChains(Var, CurScope, false);
	} else {
	TryCaptureKind Kind = C->Kind == LCK_ByRef ? TryCapture_ExplicitByRef
	: TryCapture_ExplicitByVal;
	tryCaptureVariable(Var, C->Loc, Kind, EllipsisLoc);
	}
	if (!LSI->Captures.empty())
	LSI->ExplicitCaptureRanges[LSI->Captures.size() - 1] = C->ExplicitRange;
	}
	finishLambdaExplicitCaptures(LSI);
	LSI->ContainsUnexpandedParameterPack \|= ContainsUnexpandedParameterPack;
	PopDeclContext();
	}

	void Sema::ActOnLambdaClosureQualifiers(LambdaIntroducer &Intro,
	SourceLocation MutableLoc) {

	LambdaScopeInfo LSI = getCurrentLambdaScopeUnsafe(this);
	LSI->Mutable = MutableLoc.isValid();
	ContextRAII Context(this, LSI->CallOperator, /NewThisContext*/ false);

	// C++11 [expr.prim.lambda]p9:
	// A lambda-expression whose smallest enclosing scope is a block scope is a
	// local lambda expression; any other lambda expression shall not have a
	// capture-default or simple-capture in its lambda-introducer.
	//
	// For simple-captures, this is covered by the check below that any named
	// entity is a variable that can be captured.
	//
	// For DR1632, we also allow a capture-default in any context where we can
	// odr-use 'this' (in particular, in a default initializer for a non-static
	// data member).
	if (Intro.Default != LCD_None &&
	!LSI->Lambda->getParent()->isFunctionOrMethod() &&
	(getCurrentThisType().isNull() \|\|
	CheckCXXThisCapture(SourceLocation(), /Explicit=/true,
	/BuildAndDiagnose=/false)))
	Diag(Intro.DefaultLoc, diag::err_capture_default_non_local);
	}

	void Sema::ActOnLambdaClosureParameters(
	Scope *LambdaScope, MutableArrayRef<DeclaratorChunk::ParamInfo> Params) {
	LambdaScopeInfo LSI = getCurrentLambdaScopeUnsafe(this);
	PushDeclContext(LambdaScope, LSI->CallOperator);

	for (const DeclaratorChunk::ParamInfo &P : Params) {
	auto *Param = cast<ParmVarDecl>(P.Param);
	Param->setOwningFunction(LSI->CallOperator);
	if (Param->getIdentifier())
	PushOnScopeChains(Param, LambdaScope, false);
	}

	// After the parameter list, we may parse a noexcept/requires/trailing return
	// type which need to know whether the call operator constiture a dependent
	// context, so we need to setup the FunctionTemplateDecl of generic lambdas
	// now.
	TemplateParameterList *TemplateParams =
	getGenericLambdaTemplateParameterList(LSI, *this);
	if (TemplateParams) {
	AddTemplateParametersToLambdaCallOperator(LSI->CallOperator, LSI->Lambda,
	TemplateParams);
	LSI->Lambda->setLambdaIsGeneric(true);
	LSI->ContainsUnexpandedParameterPack \|=
	TemplateParams->containsUnexpandedParameterPack();
	}
	LSI->AfterParameterList = true;
	}

	void Sema::ActOnStartOfLambdaDefinition(LambdaIntroducer &Intro,
	Declarator &ParamInfo,
	const DeclSpec &DS) {

	LambdaScopeInfo LSI = getCurrentLambdaScopeUnsafe(this);
	LSI->CallOperator->setConstexprKind(DS.getConstexprSpecifier());

	SmallVector<ParmVarDecl *, 8> Params;
	bool ExplicitResultType;

	SourceLocation TypeLoc, CallOperatorLoc;
	if (ParamInfo.getNumTypeObjects() == 0) {
	CallOperatorLoc = TypeLoc = Intro.Range.getEnd();
	} else {
	unsigned Index;
	ParamInfo.isFunctionDeclarator(Index);
	const auto &Object = ParamInfo.getTypeObject(Index);
	TypeLoc =
	Object.Loc.isValid() ? Object.Loc : ParamInfo.getSourceRange().getEnd();
	CallOperatorLoc = ParamInfo.getSourceRange().getEnd();
	}

	CXXRecordDecl *Class = LSI->Lambda;
	CXXMethodDecl *Method = LSI->CallOperator;

	TypeSourceInfo *MethodTyInfo = getLambdaType(
	*this, Intro, ParamInfo, getCurScope(), TypeLoc, ExplicitResultType);

	LSI->ExplicitParams = ParamInfo.getNumTypeObjects() != 0;

	if (ParamInfo.isFunctionDeclarator() != 0 &&
	!FTIHasSingleVoidParameter(ParamInfo.getFunctionTypeInfo())) {
	const auto &FTI = ParamInfo.getFunctionTypeInfo();
	Params.reserve(Params.size());
	for (unsigned I = 0; I < FTI.NumParams; ++I) {
	auto *Param = cast<ParmVarDecl>(FTI.Params[I].Param);
	Param->setScopeInfo(0, Params.size());
	Params.push_back(Param);
	}
	}

	bool IsLambdaStatic =
	ParamInfo.getDeclSpec().getStorageClassSpec() == DeclSpec::SCS_static;

	CompleteLambdaCallOperator(
	Method, Intro.Range.getBegin(), CallOperatorLoc,
	ParamInfo.getTrailingRequiresClause(), MethodTyInfo,
	ParamInfo.getDeclSpec().getConstexprSpecifier(),
	IsLambdaStatic ? SC_Static : SC_None, Params, ExplicitResultType);

	CheckCXXDefaultArguments(Method);

	// This represents the function body for the lambda function, check if we
	// have to apply optnone due to a pragma.
	AddRangeBasedOptnone(Method);

	// code_seg attribute on lambda apply to the method.
	if (Attr *A = getImplicitCodeSegOrSectionAttrForFunction(
	Method, /IsDefinition=/true))
	Method->addAttr(A);

	// Attributes on the lambda apply to the method.
	ProcessDeclAttributes(CurScope, Method, ParamInfo);

	// CUDA lambdas get implicit host and device attributes.
	if (getLangOpts().CUDA)
	CUDA().SetLambdaAttrs(Method);

	// OpenMP lambdas might get assumumption attributes.
	if (LangOpts.OpenMP)
	OpenMP().ActOnFinishedFunctionDefinitionInOpenMPAssumeScope(Method);

	handleLambdaNumbering(Class, Method);

	for (auto &&C : LSI->Captures) {
	if (!C.isVariableCapture())
	continue;
	ValueDecl *Var = C.getVariable();
	if (Var && Var->isInitCapture()) {
	PushOnScopeChains(Var, CurScope, false);
	}
	}

	auto CheckRedefinition = [&](ParmVarDecl *Param) {
	for (const auto &Capture : Intro.Captures) {
	if (Capture.Id == Param->getIdentifier()) {
	Diag(Param->getLocation(), diag::err_parameter_shadow_capture);
	Diag(Capture.Loc, diag::note_var_explicitly_captured_here)
	<< Capture.Id << true;
	return false;
	}
	}
	return true;
	};

	for (ParmVarDecl *P : Params) {
	if (!P->getIdentifier())
	continue;
	if (CheckRedefinition(P))
	CheckShadow(CurScope, P);
	PushOnScopeChains(P, CurScope);
	}

	// C++23 [expr.prim.lambda.capture]p5:
	// If an identifier in a capture appears as the declarator-id of a parameter
	// of the lambda-declarator's parameter-declaration-clause or as the name of a
	// template parameter of the lambda-expression's template-parameter-list, the
	// program is ill-formed.
	TemplateParameterList *TemplateParams =
	getGenericLambdaTemplateParameterList(LSI, *this);
	if (TemplateParams) {
	for (const auto *TP : TemplateParams->asArray()) {
	if (!TP->getIdentifier())
	continue;
	for (const auto &Capture : Intro.Captures) {
	if (Capture.Id == TP->getIdentifier()) {
	Diag(Capture.Loc, diag::err_template_param_shadow) << Capture.Id;
	NoteTemplateParameterLocation(*TP);
	}
	}
	}
	}

	// C++20: dcl.decl.general p4:
	// The optional requires-clause ([temp.pre]) in an init-declarator or
	// member-declarator shall be present only if the declarator declares a
	// templated function ([dcl.fct]).
	if (Expr *TRC = Method->getTrailingRequiresClause()) {
	// [temp.pre]/8:
	// An entity is templated if it is
	// - a template,
	// - an entity defined ([basic.def]) or created ([class.temporary]) in a
	// templated entity,
	// - a member of a templated entity,
	// - an enumerator for an enumeration that is a templated entity, or
	// - the closure type of a lambda-expression ([expr.prim.lambda.closure])
	// appearing in the declaration of a templated entity. [Note 6: A local
	// class, a local or block variable, or a friend function defined in a
	// templated entity is a templated entity. — end note]
	//
	// A templated function is a function template or a function that is
	// templated. A templated class is a class template or a class that is
	// templated. A templated variable is a variable template or a variable
	// that is templated.

	// Note: we only have to check if this is defined in a template entity, OR
	// if we are a template, since the rest don't apply. The requires clause
	// applies to the call operator, which we already know is a member function,
	// AND defined.
	if (!Method->getDescribedFunctionTemplate() && !Method->isTemplated()) {
	Diag(TRC->getBeginLoc(), diag::err_constrained_non_templated_function);
	}
	}

	// Enter a new evaluation context to insulate the lambda from any
	// cleanups from the enclosing full-expression.
	PushExpressionEvaluationContext(
	LSI->CallOperator->isConsteval()
	? ExpressionEvaluationContext::ImmediateFunctionContext
	: ExpressionEvaluationContext::PotentiallyEvaluated);
	ExprEvalContexts.back().InImmediateFunctionContext =
	LSI->CallOperator->isConsteval();
	ExprEvalContexts.back().InImmediateEscalatingFunctionContext =
	getLangOpts().CPlusPlus20 && LSI->CallOperator->isImmediateEscalating();
	}

	void Sema::ActOnLambdaError(SourceLocation StartLoc, Scope *CurScope,
	bool IsInstantiation) {
	LambdaScopeInfo *LSI = cast<LambdaScopeInfo>(FunctionScopes.back());

	// Leave the expression-evaluation context.
	DiscardCleanupsInEvaluationContext();
	PopExpressionEvaluationContext();

	// Leave the context of the lambda.
	if (!IsInstantiation)
	PopDeclContext();

	// Finalize the lambda.
	CXXRecordDecl *Class = LSI->Lambda;
	Class->setInvalidDecl();
	SmallVector<Decl*, 4> Fields(Class->fields());
	ActOnFields(nullptr, Class->getLocation(), Class, Fields, SourceLocation(),
	SourceLocation(), ParsedAttributesView());
	CheckCompletedCXXClass(nullptr, Class);

	PopFunctionScopeInfo();
	}

	template <typename Func>
	static void repeatForLambdaConversionFunctionCallingConvs(
	Sema &S, const FunctionProtoType &CallOpProto, Func F) {
	CallingConv DefaultFree = S.Context.getDefaultCallingConvention(
	CallOpProto.isVariadic(), /IsCXXMethod=/false);
	CallingConv DefaultMember = S.Context.getDefaultCallingConvention(
	CallOpProto.isVariadic(), /IsCXXMethod=/true);
	CallingConv CallOpCC = CallOpProto.getCallConv();

	/// Implement emitting a version of the operator for many of the calling
	/// conventions for MSVC, as described here:
	/// https://devblogs.microsoft.com/oldnewthing/20150220-00/?p=44623.
	/// Experimentally, we determined that cdecl, stdcall, fastcall, and
	/// vectorcall are generated by MSVC when it is supported by the target.
	/// Additionally, we are ensuring that the default-free/default-member and
	/// call-operator calling convention are generated as well.
	/// NOTE: We intentionally generate a 'thiscall' on Win32 implicitly from the
	/// 'member default', despite MSVC not doing so. We do this in order to ensure
	/// that someone who intentionally places 'thiscall' on the lambda call
	/// operator will still get that overload, since we don't have the a way of
	/// detecting the attribute by the time we get here.
	if (S.getLangOpts().MSVCCompat) {
	CallingConv Convs[] = {
	CC_C, CC_X86StdCall, CC_X86FastCall, CC_X86VectorCall,
	DefaultFree, DefaultMember, CallOpCC};
	llvm::sort(Convs);
	llvm::iterator_range<CallingConv *> Range(
	std::begin(Convs), std::unique(std::begin(Convs), std::end(Convs)));
	const TargetInfo &TI = S.getASTContext().getTargetInfo();

	for (CallingConv C : Range) {
	if (TI.checkCallingConvention(C) == TargetInfo::CCCR_OK)
	F(C);
	}
	return;
	}

	if (CallOpCC == DefaultMember && DefaultMember != DefaultFree) {
	F(DefaultFree);
	F(DefaultMember);
	} else {
	F(CallOpCC);
	}
	}

	// Returns the 'standard' calling convention to be used for the lambda
	// conversion function, that is, the 'free' function calling convention unless
	// it is overridden by a non-default calling convention attribute.
	static CallingConv
	getLambdaConversionFunctionCallConv(Sema &S,
	const FunctionProtoType *CallOpProto) {
	CallingConv DefaultFree = S.Context.getDefaultCallingConvention(
	CallOpProto->isVariadic(), /IsCXXMethod=/false);
	CallingConv DefaultMember = S.Context.getDefaultCallingConvention(
	CallOpProto->isVariadic(), /IsCXXMethod=/true);
	CallingConv CallOpCC = CallOpProto->getCallConv();

	// If the call-operator hasn't been changed, return both the 'free' and
	// 'member' function calling convention.
	if (CallOpCC == DefaultMember && DefaultMember != DefaultFree)
	return DefaultFree;
	return CallOpCC;
	}

	QualType Sema::getLambdaConversionFunctionResultType(
	const FunctionProtoType *CallOpProto, CallingConv CC) {
	const FunctionProtoType::ExtProtoInfo CallOpExtInfo =
	CallOpProto->getExtProtoInfo();
	FunctionProtoType::ExtProtoInfo InvokerExtInfo = CallOpExtInfo;
	InvokerExtInfo.ExtInfo = InvokerExtInfo.ExtInfo.withCallingConv(CC);
	InvokerExtInfo.TypeQuals = Qualifiers();
	assert(InvokerExtInfo.RefQualifier == RQ_None &&
	"Lambda's call operator should not have a reference qualifier");
	return Context.getFunctionType(CallOpProto->getReturnType(),
	CallOpProto->getParamTypes(), InvokerExtInfo);
	}

	/// Add a lambda's conversion to function pointer, as described in
	/// C++11 [expr.prim.lambda]p6.
	static void addFunctionPointerConversion(Sema &S, SourceRange IntroducerRange,
	CXXRecordDecl *Class,
	CXXMethodDecl *CallOperator,
	QualType InvokerFunctionTy) {
	// This conversion is explicitly disabled if the lambda's function has
	// pass_object_size attributes on any of its parameters.
	auto HasPassObjectSizeAttr = [](const ParmVarDecl *P) {
	return P->hasAttr<PassObjectSizeAttr>();
	};
	if (llvm::any_of(CallOperator->parameters(), HasPassObjectSizeAttr))
	return;

	// Add the conversion to function pointer.
	QualType PtrToFunctionTy = S.Context.getPointerType(InvokerFunctionTy);

	// Create the type of the conversion function.
	FunctionProtoType::ExtProtoInfo ConvExtInfo(
	S.Context.getDefaultCallingConvention(
	/IsVariadic=/false, /IsCXXMethod=/true));
	// The conversion function is always const and noexcept.
	ConvExtInfo.TypeQuals = Qualifiers();
	ConvExtInfo.TypeQuals.addConst();
	ConvExtInfo.ExceptionSpec.Type = EST_BasicNoexcept;
	QualType ConvTy =
	S.Context.getFunctionType(PtrToFunctionTy, std::nullopt, ConvExtInfo);

	SourceLocation Loc = IntroducerRange.getBegin();
	DeclarationName ConversionName
	= S.Context.DeclarationNames.getCXXConversionFunctionName(
	S.Context.getCanonicalType(PtrToFunctionTy));
	// Construct a TypeSourceInfo for the conversion function, and wire
	// all the parameters appropriately for the FunctionProtoTypeLoc
	// so that everything works during transformation/instantiation of
	// generic lambdas.
	// The main reason for wiring up the parameters of the conversion
	// function with that of the call operator is so that constructs
	// like the following work:
	// auto L = [](auto b) { <-- 1
	// return [](auto a) -> decltype(a) { <-- 2
	// return a;
	// };
	// };
	// int (*fp)(int) = L(5);
	// Because the trailing return type can contain DeclRefExprs that refer
	// to the original call operator's variables, we hijack the call
	// operators ParmVarDecls below.
	TypeSourceInfo *ConvNamePtrToFunctionTSI =
	S.Context.getTrivialTypeSourceInfo(PtrToFunctionTy, Loc);
	DeclarationNameLoc ConvNameLoc =
	DeclarationNameLoc::makeNamedTypeLoc(ConvNamePtrToFunctionTSI);

	// The conversion function is a conversion to a pointer-to-function.
	TypeSourceInfo *ConvTSI = S.Context.getTrivialTypeSourceInfo(ConvTy, Loc);
	FunctionProtoTypeLoc ConvTL =
	ConvTSI->getTypeLoc().getAs<FunctionProtoTypeLoc>();
	// Get the result of the conversion function which is a pointer-to-function.
	PointerTypeLoc PtrToFunctionTL =
	ConvTL.getReturnLoc().getAs<PointerTypeLoc>();
	// Do the same for the TypeSourceInfo that is used to name the conversion
	// operator.
	PointerTypeLoc ConvNamePtrToFunctionTL =
	ConvNamePtrToFunctionTSI->getTypeLoc().getAs<PointerTypeLoc>();

	// Get the underlying function types that the conversion function will
	// be converting to (should match the type of the call operator).
	FunctionProtoTypeLoc CallOpConvTL =
	PtrToFunctionTL.getPointeeLoc().getAs<FunctionProtoTypeLoc>();
	FunctionProtoTypeLoc CallOpConvNameTL =
	ConvNamePtrToFunctionTL.getPointeeLoc().getAs<FunctionProtoTypeLoc>();

	// Wire up the FunctionProtoTypeLocs with the call operator's parameters.
	// These parameter's are essentially used to transform the name and
	// the type of the conversion operator. By using the same parameters
	// as the call operator's we don't have to fix any back references that
	// the trailing return type of the call operator's uses (such as
	// decltype(some_type<decltype(a)>::type{} + decltype(a){}) etc.)
	// - we can simply use the return type of the call operator, and
	// everything should work.
	SmallVector<ParmVarDecl *, 4> InvokerParams;
	for (unsigned I = 0, N = CallOperator->getNumParams(); I != N; ++I) {
	ParmVarDecl *From = CallOperator->getParamDecl(I);

	InvokerParams.push_back(ParmVarDecl::Create(
	S.Context,
	// Temporarily add to the TU. This is set to the invoker below.
	S.Context.getTranslationUnitDecl(), From->getBeginLoc(),
	From->getLocation(), From->getIdentifier(), From->getType(),
	From->getTypeSourceInfo(), From->getStorageClass(),
	/DefArg=/nullptr));
	CallOpConvTL.setParam(I, From);
	CallOpConvNameTL.setParam(I, From);
	}

	CXXConversionDecl *Conversion = CXXConversionDecl::Create(
	S.Context, Class, Loc,
	DeclarationNameInfo(ConversionName, Loc, ConvNameLoc), ConvTy, ConvTSI,
	S.getCurFPFeatures().isFPConstrained(),
	/isInline=/true, ExplicitSpecifier(),
	S.getLangOpts().CPlusPlus17 ? ConstexprSpecKind::Constexpr
	: ConstexprSpecKind::Unspecified,
	CallOperator->getBody()->getEndLoc());
	Conversion->setAccess(AS_public);
	Conversion->setImplicit(true);

	// A non-generic lambda may still be a templated entity. We need to preserve
	// constraints when converting the lambda to a function pointer. See GH63181.
	if (Expr *Requires = CallOperator->getTrailingRequiresClause())
	Conversion->setTrailingRequiresClause(Requires);

	if (Class->isGenericLambda()) {
	// Create a template version of the conversion operator, using the template
	// parameter list of the function call operator.
	FunctionTemplateDecl *TemplateCallOperator =
	CallOperator->getDescribedFunctionTemplate();
	FunctionTemplateDecl *ConversionTemplate =
	FunctionTemplateDecl::Create(S.Context, Class,
	Loc, ConversionName,
	TemplateCallOperator->getTemplateParameters(),
	Conversion);
	ConversionTemplate->setAccess(AS_public);
	ConversionTemplate->setImplicit(true);
	Conversion->setDescribedFunctionTemplate(ConversionTemplate);
	Class->addDecl(ConversionTemplate);
	} else
	Class->addDecl(Conversion);

	// If the lambda is not static, we need to add a static member
	// function that will be the result of the conversion with a
	// certain unique ID.
	// When it is static we just return the static call operator instead.
	if (CallOperator->isImplicitObjectMemberFunction()) {
	DeclarationName InvokerName =
	&S.Context.Idents.get(getLambdaStaticInvokerName());
	// FIXME: Instead of passing in the CallOperator->getTypeSourceInfo()
	// we should get a prebuilt TrivialTypeSourceInfo from Context
	// using FunctionTy & Loc and get its TypeLoc as a FunctionProtoTypeLoc
	// then rewire the parameters accordingly, by hoisting up the InvokeParams
	// loop below and then use its Params to set Invoke->setParams(...) below.
	// This would avoid the 'const' qualifier of the calloperator from
	// contaminating the type of the invoker, which is currently adjusted
	// in SemaTemplateDeduction.cpp:DeduceTemplateArguments. Fixing the
	// trailing return type of the invoker would require a visitor to rebuild
	// the trailing return type and adjusting all back DeclRefExpr's to refer
	// to the new static invoker parameters - not the call operator's.
	CXXMethodDecl *Invoke = CXXMethodDecl::Create(
	S.Context, Class, Loc, DeclarationNameInfo(InvokerName, Loc),
	InvokerFunctionTy, CallOperator->getTypeSourceInfo(), SC_Static,
	S.getCurFPFeatures().isFPConstrained(),
	/isInline=/true, CallOperator->getConstexprKind(),
	CallOperator->getBody()->getEndLoc());
	for (unsigned I = 0, N = CallOperator->getNumParams(); I != N; ++I)
	InvokerParams[I]->setOwningFunction(Invoke);
	Invoke->setParams(InvokerParams);
	Invoke->setAccess(AS_private);
	Invoke->setImplicit(true);
	if (Class->isGenericLambda()) {
	FunctionTemplateDecl *TemplateCallOperator =
	CallOperator->getDescribedFunctionTemplate();
	FunctionTemplateDecl *StaticInvokerTemplate =
	FunctionTemplateDecl::Create(
	S.Context, Class, Loc, InvokerName,
	TemplateCallOperator->getTemplateParameters(), Invoke);
	StaticInvokerTemplate->setAccess(AS_private);
	StaticInvokerTemplate->setImplicit(true);
	Invoke->setDescribedFunctionTemplate(StaticInvokerTemplate);
	Class->addDecl(StaticInvokerTemplate);
	} else
	Class->addDecl(Invoke);
	}
	}

	/// Add a lambda's conversion to function pointers, as described in
	/// C++11 [expr.prim.lambda]p6. Note that in most cases, this should emit only a
	/// single pointer conversion. In the event that the default calling convention
	/// for free and member functions is different, it will emit both conventions.
	static void addFunctionPointerConversions(Sema &S, SourceRange IntroducerRange,
	CXXRecordDecl *Class,
	CXXMethodDecl *CallOperator) {
	const FunctionProtoType *CallOpProto =
	CallOperator->getType()->castAs<FunctionProtoType>();

	repeatForLambdaConversionFunctionCallingConvs(
	S, *CallOpProto, [&](CallingConv CC) {
	QualType InvokerFunctionTy =
	S.getLambdaConversionFunctionResultType(CallOpProto, CC);
	addFunctionPointerConversion(S, IntroducerRange, Class, CallOperator,
	InvokerFunctionTy);
	});
	}

	/// Add a lambda's conversion to block pointer.
	static void addBlockPointerConversion(Sema &S,
	SourceRange IntroducerRange,
	CXXRecordDecl *Class,
	CXXMethodDecl *CallOperator) {
	const FunctionProtoType *CallOpProto =
	CallOperator->getType()->castAs<FunctionProtoType>();
	QualType FunctionTy = S.getLambdaConversionFunctionResultType(
	CallOpProto, getLambdaConversionFunctionCallConv(S, CallOpProto));
	QualType BlockPtrTy = S.Context.getBlockPointerType(FunctionTy);

	FunctionProtoType::ExtProtoInfo ConversionEPI(
	S.Context.getDefaultCallingConvention(
	/IsVariadic=/false, /IsCXXMethod=/true));
	ConversionEPI.TypeQuals = Qualifiers();
	ConversionEPI.TypeQuals.addConst();
	QualType ConvTy =
	S.Context.getFunctionType(BlockPtrTy, std::nullopt, ConversionEPI);

	SourceLocation Loc = IntroducerRange.getBegin();
	DeclarationName Name
	= S.Context.DeclarationNames.getCXXConversionFunctionName(
	S.Context.getCanonicalType(BlockPtrTy));
	DeclarationNameLoc NameLoc = DeclarationNameLoc::makeNamedTypeLoc(
	S.Context.getTrivialTypeSourceInfo(BlockPtrTy, Loc));
	CXXConversionDecl *Conversion = CXXConversionDecl::Create(
	S.Context, Class, Loc, DeclarationNameInfo(Name, Loc, NameLoc), ConvTy,
	S.Context.getTrivialTypeSourceInfo(ConvTy, Loc),
	S.getCurFPFeatures().isFPConstrained(),
	/isInline=/true, ExplicitSpecifier(), ConstexprSpecKind::Unspecified,
	CallOperator->getBody()->getEndLoc());
	Conversion->setAccess(AS_public);
	Conversion->setImplicit(true);
	Class->addDecl(Conversion);
	}

	ExprResult Sema::BuildCaptureInit(const Capture &Cap,
	SourceLocation ImplicitCaptureLoc,
	bool IsOpenMPMapping) {
	// VLA captures don't have a stored initialization expression.
	if (Cap.isVLATypeCapture())
	return ExprResult();

	// An init-capture is initialized directly from its stored initializer.
	if (Cap.isInitCapture())
	return cast<VarDecl>(Cap.getVariable())->getInit();

	// For anything else, build an initialization expression. For an implicit
	// capture, the capture notionally happens at the capture-default, so use
	// that location here.
	SourceLocation Loc =
	ImplicitCaptureLoc.isValid() ? ImplicitCaptureLoc : Cap.getLocation();

	// C++11 [expr.prim.lambda]p21:
	// When the lambda-expression is evaluated, the entities that
	// are captured by copy are used to direct-initialize each
	// corresponding non-static data member of the resulting closure
	// object. (For array members, the array elements are
	// direct-initialized in increasing subscript order.) These
	// initializations are performed in the (unspecified) order in
	// which the non-static data members are declared.

	// C++ [expr.prim.lambda]p12:
	// An entity captured by a lambda-expression is odr-used (3.2) in
	// the scope containing the lambda-expression.
	ExprResult Init;
	IdentifierInfo *Name = nullptr;
	if (Cap.isThisCapture()) {
	QualType ThisTy = getCurrentThisType();
	Expr *This = BuildCXXThisExpr(Loc, ThisTy, ImplicitCaptureLoc.isValid());
	if (Cap.isCopyCapture())
	Init = CreateBuiltinUnaryOp(Loc, UO_Deref, This);
	else
	Init = This;
	} else {
	assert(Cap.isVariableCapture() && "unknown kind of capture");
	ValueDecl *Var = Cap.getVariable();
	Name = Var->getIdentifier();
	Init = BuildDeclarationNameExpr(
	CXXScopeSpec(), DeclarationNameInfo(Var->getDeclName(), Loc), Var);
	}

	// In OpenMP, the capture kind doesn't actually describe how to capture:
	// variables are "mapped" onto the device in a process that does not formally
	// make a copy, even for a "copy capture".
	if (IsOpenMPMapping)
	return Init;

	if (Init.isInvalid())
	return ExprError();

	Expr *InitExpr = Init.get();
	InitializedEntity Entity = InitializedEntity::InitializeLambdaCapture(
	Name, Cap.getCaptureType(), Loc);
	InitializationKind InitKind =
	InitializationKind::CreateDirect(Loc, Loc, Loc);
	InitializationSequence InitSeq(*this, Entity, InitKind, InitExpr);
	return InitSeq.Perform(*this, Entity, InitKind, InitExpr);
	}

	ExprResult Sema::ActOnLambdaExpr(SourceLocation StartLoc, Stmt *Body) {
	LambdaScopeInfo LSI = *cast<LambdaScopeInfo>(FunctionScopes.back());
	ActOnFinishFunctionBody(LSI.CallOperator, Body);
	return BuildLambdaExpr(StartLoc, Body->getEndLoc(), &LSI);
	}

	static LambdaCaptureDefault
	mapImplicitCaptureStyle(CapturingScopeInfo::ImplicitCaptureStyle ICS) {
	switch (ICS) {
	case CapturingScopeInfo::ImpCap_None:
	return LCD_None;
	case CapturingScopeInfo::ImpCap_LambdaByval:
	return LCD_ByCopy;
	case CapturingScopeInfo::ImpCap_CapturedRegion:
	case CapturingScopeInfo::ImpCap_LambdaByref:
	return LCD_ByRef;
	case CapturingScopeInfo::ImpCap_Block:
	llvm_unreachable("block capture in lambda");
	}
	llvm_unreachable("Unknown implicit capture style");
	}

	bool Sema::CaptureHasSideEffects(const Capture &From) {
	if (From.isInitCapture()) {
	Expr *Init = cast<VarDecl>(From.getVariable())->getInit();
	if (Init && Init->HasSideEffects(Context))
	return true;
	}

	if (!From.isCopyCapture())
	return false;

	const QualType T = From.isThisCapture()
	? getCurrentThisType()->getPointeeType()
	: From.getCaptureType();

	if (T.isVolatileQualified())
	return true;

	const Type *BaseT = T->getBaseElementTypeUnsafe();
	if (const CXXRecordDecl *RD = BaseT->getAsCXXRecordDecl())
	return !RD->isCompleteDefinition() \|\| !RD->hasTrivialCopyConstructor() \|\|
	!RD->hasTrivialDestructor();

	return false;
	}

	bool Sema::DiagnoseUnusedLambdaCapture(SourceRange CaptureRange,
	const Capture &From) {
	if (CaptureHasSideEffects(From))
	return false;

	if (From.isVLATypeCapture())
	return false;

	// FIXME: maybe we should warn on these if we can find a sensible diagnostic
	// message
	if (From.isInitCapture() &&
	From.getVariable()->isPlaceholderVar(getLangOpts()))
	return false;

	auto diag = Diag(From.getLocation(), diag::warn_unused_lambda_capture);
	if (From.isThisCapture())
	diag << "'this'";
	else
	diag << From.getVariable();
	diag << From.isNonODRUsed();
	diag << FixItHint::CreateRemoval(CaptureRange);
	return true;
	}

	/// Create a field within the lambda class or captured statement record for the
	/// given capture.
	FieldDecl Sema::BuildCaptureField(RecordDecl RD,
	const sema::Capture &Capture) {
	SourceLocation Loc = Capture.getLocation();
	QualType FieldType = Capture.getCaptureType();

	TypeSourceInfo *TSI = nullptr;
	if (Capture.isVariableCapture()) {
	const auto *Var = dyn_cast_or_null<VarDecl>(Capture.getVariable());
	if (Var && Var->isInitCapture())
	TSI = Var->getTypeSourceInfo();
	}

	// FIXME: Should we really be doing this? A null TypeSourceInfo seems more
	// appropriate, at least for an implicit capture.
	if (!TSI)
	TSI = Context.getTrivialTypeSourceInfo(FieldType, Loc);

	// Build the non-static data member.
	FieldDecl *Field =
	FieldDecl::Create(Context, RD, /StartLoc=/Loc, /IdLoc=/Loc,
	/Id=/nullptr, FieldType, TSI, /BW=/nullptr,
	/Mutable=/false, ICIS_NoInit);
	// If the variable being captured has an invalid type, mark the class as
	// invalid as well.
	if (!FieldType->isDependentType()) {
	if (RequireCompleteSizedType(Loc, FieldType,
	diag::err_field_incomplete_or_sizeless)) {
	RD->setInvalidDecl();
	Field->setInvalidDecl();
	} else {
	NamedDecl *Def;
	FieldType->isIncompleteType(&Def);
	if (Def && Def->isInvalidDecl()) {
	RD->setInvalidDecl();
	Field->setInvalidDecl();
	}
	}
	}
	Field->setImplicit(true);
	Field->setAccess(AS_private);
	RD->addDecl(Field);

	if (Capture.isVLATypeCapture())
	Field->setCapturedVLAType(Capture.getCapturedVLAType());

	return Field;
	}

	ExprResult Sema::BuildLambdaExpr(SourceLocation StartLoc, SourceLocation EndLoc,
	LambdaScopeInfo *LSI) {
	// Collect information from the lambda scope.
	SmallVector<LambdaCapture, 4> Captures;
	SmallVector<Expr *, 4> CaptureInits;
	SourceLocation CaptureDefaultLoc = LSI->CaptureDefaultLoc;
	LambdaCaptureDefault CaptureDefault =
	mapImplicitCaptureStyle(LSI->ImpCaptureStyle);
	CXXRecordDecl *Class;
	CXXMethodDecl *CallOperator;
	SourceRange IntroducerRange;
	bool ExplicitParams;
	bool ExplicitResultType;
	CleanupInfo LambdaCleanup;
	bool ContainsUnexpandedParameterPack;
	bool IsGenericLambda;
	{
	CallOperator = LSI->CallOperator;
	Class = LSI->Lambda;
	IntroducerRange = LSI->IntroducerRange;
	ExplicitParams = LSI->ExplicitParams;
	ExplicitResultType = !LSI->HasImplicitReturnType;
	LambdaCleanup = LSI->Cleanup;
	ContainsUnexpandedParameterPack = LSI->ContainsUnexpandedParameterPack;
	IsGenericLambda = Class->isGenericLambda();

	CallOperator->setLexicalDeclContext(Class);
	Decl *TemplateOrNonTemplateCallOperatorDecl =
	CallOperator->getDescribedFunctionTemplate()
	? CallOperator->getDescribedFunctionTemplate()
	: cast<Decl>(CallOperator);

	// FIXME: Is this really the best choice? Keeping the lexical decl context
	// set as CurContext seems more faithful to the source.
	TemplateOrNonTemplateCallOperatorDecl->setLexicalDeclContext(Class);

	PopExpressionEvaluationContext();

	// True if the current capture has a used capture or default before it.
	bool CurHasPreviousCapture = CaptureDefault != LCD_None;
	SourceLocation PrevCaptureLoc = CurHasPreviousCapture ?
	CaptureDefaultLoc : IntroducerRange.getBegin();

	for (unsigned I = 0, N = LSI->Captures.size(); I != N; ++I) {
	const Capture &From = LSI->Captures[I];

	if (From.isInvalid())
	return ExprError();

	assert(!From.isBlockCapture() && "Cannot capture __block variables");
	bool IsImplicit = I >= LSI->NumExplicitCaptures;
	SourceLocation ImplicitCaptureLoc =
	IsImplicit ? CaptureDefaultLoc : SourceLocation();

	// Use source ranges of explicit captures for fixits where available.
	SourceRange CaptureRange = LSI->ExplicitCaptureRanges[I];

	// Warn about unused explicit captures.
	bool IsCaptureUsed = true;
	if (!CurContext->isDependentContext() && !IsImplicit &&
	!From.isODRUsed()) {
	// Initialized captures that are non-ODR used may not be eliminated.
	// FIXME: Where did the IsGenericLambda here come from?
	bool NonODRUsedInitCapture =
	IsGenericLambda && From.isNonODRUsed() && From.isInitCapture();
	if (!NonODRUsedInitCapture) {
	bool IsLast = (I + 1) == LSI->NumExplicitCaptures;
	SourceRange FixItRange;
	if (CaptureRange.isValid()) {
	if (!CurHasPreviousCapture && !IsLast) {
	// If there are no captures preceding this capture, remove the
	// following comma.
	FixItRange = SourceRange(CaptureRange.getBegin(),
	getLocForEndOfToken(CaptureRange.getEnd()));
	} else {
	// Otherwise, remove the comma since the last used capture.
	FixItRange = SourceRange(getLocForEndOfToken(PrevCaptureLoc),
	CaptureRange.getEnd());
	}
	}

	IsCaptureUsed = !DiagnoseUnusedLambdaCapture(FixItRange, From);
	}
	}

	if (CaptureRange.isValid()) {
	CurHasPreviousCapture \|= IsCaptureUsed;
	PrevCaptureLoc = CaptureRange.getEnd();
	}

	// Map the capture to our AST representation.
	LambdaCapture Capture = [&] {
	if (From.isThisCapture()) {
	// Capturing 'this' implicitly with a default of '[=]' is deprecated,
	// because it results in a reference capture. Don't warn prior to
	// C++2a; there's nothing that can be done about it before then.
	if (getLangOpts().CPlusPlus20 && IsImplicit &&
	CaptureDefault == LCD_ByCopy) {
	Diag(From.getLocation(), diag::warn_deprecated_this_capture);
	Diag(CaptureDefaultLoc, diag::note_deprecated_this_capture)
	<< FixItHint::CreateInsertion(
	getLocForEndOfToken(CaptureDefaultLoc), ", this");
	}
	return LambdaCapture(From.getLocation(), IsImplicit,
	From.isCopyCapture() ? LCK_StarThis : LCK_This);
	} else if (From.isVLATypeCapture()) {
	return LambdaCapture(From.getLocation(), IsImplicit, LCK_VLAType);
	} else {
	assert(From.isVariableCapture() && "unknown kind of capture");
	ValueDecl *Var = From.getVariable();
	LambdaCaptureKind Kind =
	From.isCopyCapture() ? LCK_ByCopy : LCK_ByRef;
	return LambdaCapture(From.getLocation(), IsImplicit, Kind, Var,
	From.getEllipsisLoc());
	}
	}();

	// Form the initializer for the capture field.
	ExprResult Init = BuildCaptureInit(From, ImplicitCaptureLoc);

	// FIXME: Skip this capture if the capture is not used, the initializer
	// has no side-effects, the type of the capture is trivial, and the
	// lambda is not externally visible.

	// Add a FieldDecl for the capture and form its initializer.
	BuildCaptureField(Class, From);
	Captures.push_back(Capture);
	CaptureInits.push_back(Init.get());

	if (LangOpts.CUDA)
	CUDA().CheckLambdaCapture(CallOperator, From);
	}

	Class->setCaptures(Context, Captures);

	// C++11 [expr.prim.lambda]p6:
	// The closure type for a lambda-expression with no lambda-capture
	// has a public non-virtual non-explicit const conversion function
	// to pointer to function having the same parameter and return
	// types as the closure type's function call operator.
	if (Captures.empty() && CaptureDefault == LCD_None)
	addFunctionPointerConversions(*this, IntroducerRange, Class,
	CallOperator);

	// Objective-C++:
	// The closure type for a lambda-expression has a public non-virtual
	// non-explicit const conversion function to a block pointer having the
	// same parameter and return types as the closure type's function call
	// operator.
	// FIXME: Fix generic lambda to block conversions.
	if (getLangOpts().Blocks && getLangOpts().ObjC && !IsGenericLambda)
	addBlockPointerConversion(*this, IntroducerRange, Class, CallOperator);

	// Finalize the lambda class.
	SmallVector<Decl*, 4> Fields(Class->fields());
	ActOnFields(nullptr, Class->getLocation(), Class, Fields, SourceLocation(),
	SourceLocation(), ParsedAttributesView());
	CheckCompletedCXXClass(nullptr, Class);
	}

	Cleanup.mergeFrom(LambdaCleanup);

	LambdaExpr *Lambda = LambdaExpr::Create(Context, Class, IntroducerRange,
	CaptureDefault, CaptureDefaultLoc,
	ExplicitParams, ExplicitResultType,
	CaptureInits, EndLoc,
	ContainsUnexpandedParameterPack);
	// If the lambda expression's call operator is not explicitly marked constexpr
	// and we are not in a dependent context, analyze the call operator to infer
	// its constexpr-ness, suppressing diagnostics while doing so.
	if (getLangOpts().CPlusPlus17 && !CallOperator->isInvalidDecl() &&
	!CallOperator->isConstexpr() &&
	!isa<CoroutineBodyStmt>(CallOperator->getBody()) &&
	!Class->getDeclContext()->isDependentContext()) {
	CallOperator->setConstexprKind(
	CheckConstexprFunctionDefinition(CallOperator,
	CheckConstexprKind::CheckValid)
	? ConstexprSpecKind::Constexpr
	: ConstexprSpecKind::Unspecified);
	}

	// Emit delayed shadowing warnings now that the full capture list is known.
	DiagnoseShadowingLambdaDecls(LSI);

	if (!CurContext->isDependentContext()) {
	switch (ExprEvalContexts.back().Context) {
	// C++11 [expr.prim.lambda]p2:
	// A lambda-expression shall not appear in an unevaluated operand
	// (Clause 5).
	case ExpressionEvaluationContext::Unevaluated:
	case ExpressionEvaluationContext::UnevaluatedList:
	case ExpressionEvaluationContext::UnevaluatedAbstract:
	// C++1y [expr.const]p2:
	// A conditional-expression e is a core constant expression unless the
	// evaluation of e, following the rules of the abstract machine, would
	// evaluate [...] a lambda-expression.
	//
	// This is technically incorrect, there are some constant evaluated contexts
	// where this should be allowed. We should probably fix this when DR1607 is
	// ratified, it lays out the exact set of conditions where we shouldn't
	// allow a lambda-expression.
	case ExpressionEvaluationContext::ConstantEvaluated:
	case ExpressionEvaluationContext::ImmediateFunctionContext:
	// We don't actually diagnose this case immediately, because we
	// could be within a context where we might find out later that
	// the expression is potentially evaluated (e.g., for typeid).
	ExprEvalContexts.back().Lambdas.push_back(Lambda);
	break;

	case ExpressionEvaluationContext::DiscardedStatement:
	case ExpressionEvaluationContext::PotentiallyEvaluated:
	case ExpressionEvaluationContext::PotentiallyEvaluatedIfUsed:
	break;
	}
	}

	return MaybeBindToTemporary(Lambda);
	}

	ExprResult Sema::BuildBlockForLambdaConversion(SourceLocation CurrentLocation,
	SourceLocation ConvLocation,
	CXXConversionDecl *Conv,
	Expr *Src) {
	// Make sure that the lambda call operator is marked used.
	CXXRecordDecl *Lambda = Conv->getParent();
	CXXMethodDecl *CallOperator
	= cast<CXXMethodDecl>(
	Lambda->lookup(
	Context.DeclarationNames.getCXXOperatorName(OO_Call)).front());
	CallOperator->setReferenced();
	CallOperator->markUsed(Context);

	ExprResult Init = PerformCopyInitialization(
	InitializedEntity::InitializeLambdaToBlock(ConvLocation, Src->getType()),
	CurrentLocation, Src);
	if (!Init.isInvalid())
	Init = ActOnFinishFullExpr(Init.get(), /DiscardedValue/ false);

	if (Init.isInvalid())
	return ExprError();

	// Create the new block to be returned.
	BlockDecl *Block = BlockDecl::Create(Context, CurContext, ConvLocation);

	// Set the type information.
	Block->setSignatureAsWritten(CallOperator->getTypeSourceInfo());
	Block->setIsVariadic(CallOperator->isVariadic());
	Block->setBlockMissingReturnType(false);

	// Add parameters.
	SmallVector<ParmVarDecl *, 4> BlockParams;
	for (unsigned I = 0, N = CallOperator->getNumParams(); I != N; ++I) {
	ParmVarDecl *From = CallOperator->getParamDecl(I);
	BlockParams.push_back(ParmVarDecl::Create(
	Context, Block, From->getBeginLoc(), From->getLocation(),
	From->getIdentifier(), From->getType(), From->getTypeSourceInfo(),
	From->getStorageClass(),
	/DefArg=/nullptr));
	}
	Block->setParams(BlockParams);

	Block->setIsConversionFromLambda(true);

	// Add capture. The capture uses a fake variable, which doesn't correspond
	// to any actual memory location. However, the initializer copy-initializes
	// the lambda object.
	TypeSourceInfo *CapVarTSI =
	Context.getTrivialTypeSourceInfo(Src->getType());
	VarDecl *CapVar = VarDecl::Create(Context, Block, ConvLocation,
	ConvLocation, nullptr,
	Src->getType(), CapVarTSI,
	SC_None);
	BlockDecl::Capture Capture(/variable=/CapVar, /byRef=/false,
	/nested=/false, /copy=/Init.get());
	Block->setCaptures(Context, Capture, /CapturesCXXThis=/false);

	// Add a fake function body to the block. IR generation is responsible
	// for filling in the actual body, which cannot be expressed as an AST.
	Block->setBody(new (Context) CompoundStmt(ConvLocation));

	// Create the block literal expression.
	Expr *BuildBlock = new (Context) BlockExpr(Block, Conv->getConversionType());
	ExprCleanupObjects.push_back(Block);
	Cleanup.setExprNeedsCleanups(true);

	return BuildBlock;
	}

	static FunctionDecl getPatternFunctionDecl(FunctionDecl FD) {
	if (FD->getTemplatedKind() == FunctionDecl::TK_MemberSpecialization) {
	while (FD->getInstantiatedFromMemberFunction())
	FD = FD->getInstantiatedFromMemberFunction();
	return FD;
	}

	if (FD->getTemplatedKind() == FunctionDecl::TK_DependentNonTemplate)
	return FD->getInstantiatedFromDecl();

	FunctionTemplateDecl *FTD = FD->getPrimaryTemplate();
	if (!FTD)
	return nullptr;

	while (FTD->getInstantiatedFromMemberTemplate())
	FTD = FTD->getInstantiatedFromMemberTemplate();

	return FTD->getTemplatedDecl();
	}

	Sema::LambdaScopeForCallOperatorInstantiationRAII::
	LambdaScopeForCallOperatorInstantiationRAII(
	Sema &SemaRef, FunctionDecl *FD, MultiLevelTemplateArgumentList MLTAL,
	LocalInstantiationScope &Scope, bool ShouldAddDeclsFromParentScope)
	: FunctionScopeRAII(SemaRef) {
	if (!isLambdaCallOperator(FD)) {
	FunctionScopeRAII::disable();
	return;
	}

	SemaRef.RebuildLambdaScopeInfo(cast<CXXMethodDecl>(FD));

	FunctionDecl *FDPattern = getPatternFunctionDecl(FD);
	if (!FDPattern)
	return;

	SemaRef.addInstantiatedCapturesToScope(FD, FDPattern, Scope, MLTAL);

	if (!ShouldAddDeclsFromParentScope)
	return;

	llvm::SmallVector<std::pair<FunctionDecl , FunctionDecl >, 4>
	ParentInstantiations;
	while (true) {
	FDPattern =
	dyn_cast<FunctionDecl>(getLambdaAwareParentOfDeclContext(FDPattern));
	FD = dyn_cast<FunctionDecl>(getLambdaAwareParentOfDeclContext(FD));

	if (!FDPattern \|\| !FD)
	break;

	ParentInstantiations.emplace_back(FDPattern, FD);
	}

	// Add instantiated parameters and local vars to scopes, starting from the
	// outermost lambda to the innermost lambda. This ordering ensures that
	// parameters in inner lambdas can correctly depend on those defined
	// in outer lambdas, e.g. auto L = [](auto... x) {
	// return [](decltype(x)... y) { }; // `y` depends on `x`
	// };

	for (const auto &[FDPattern, FD] : llvm::reverse(ParentInstantiations)) {
	SemaRef.addInstantiatedParametersToScope(FD, FDPattern, Scope, MLTAL);
	SemaRef.addInstantiatedLocalVarsToScope(FD, FDPattern, Scope);
	}
	}
	diff --git a/contrib/llvm-project/clang/lib/Sema/SemaLookup.cpp b/contrib/llvm-project/clang/lib/Sema/SemaLookup.cpp
	index 7a6a64529f52..d3d4bf27ae72 100644
	--- a/contrib/llvm-project/clang/lib/Sema/SemaLookup.cpp
	+++ b/contrib/llvm-project/clang/lib/Sema/SemaLookup.cpp
	@@ -1,5764 +1,5764 @@
	//===--------------------- SemaLookup.cpp - Name Lookup ------------------===//
	//
	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
	// See https://llvm.org/LICENSE.txt for license information.
	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
	//
	//===----------------------------------------------------------------------===//
	//
	// This file implements name lookup for C, C++, Objective-C, and
	// Objective-C++.
	//
	//===----------------------------------------------------------------------===//

	#include "clang/AST/ASTContext.h"
	#include "clang/AST/CXXInheritance.h"
	#include "clang/AST/Decl.h"
	#include "clang/AST/DeclCXX.h"
	#include "clang/AST/DeclLookups.h"
	#include "clang/AST/DeclObjC.h"
	#include "clang/AST/DeclTemplate.h"
	#include "clang/AST/Expr.h"
	#include "clang/AST/ExprCXX.h"
	#include "clang/Basic/Builtins.h"
	#include "clang/Basic/FileManager.h"
	#include "clang/Basic/LangOptions.h"
	#include "clang/Lex/HeaderSearch.h"
	#include "clang/Lex/ModuleLoader.h"
	#include "clang/Lex/Preprocessor.h"
	#include "clang/Sema/DeclSpec.h"
	#include "clang/Sema/Lookup.h"
	#include "clang/Sema/Overload.h"
	#include "clang/Sema/RISCVIntrinsicManager.h"
	#include "clang/Sema/Scope.h"
	#include "clang/Sema/ScopeInfo.h"
	#include "clang/Sema/Sema.h"
	#include "clang/Sema/SemaInternal.h"
	#include "clang/Sema/SemaRISCV.h"
	#include "clang/Sema/TemplateDeduction.h"
	#include "clang/Sema/TypoCorrection.h"
	#include "llvm/ADT/STLExtras.h"
	#include "llvm/ADT/STLForwardCompat.h"
	#include "llvm/ADT/SmallPtrSet.h"
	#include "llvm/ADT/TinyPtrVector.h"
	#include "llvm/ADT/edit_distance.h"
	#include "llvm/Support/Casting.h"
	#include "llvm/Support/ErrorHandling.h"
	#include <algorithm>
	#include <iterator>
	#include <list>
	#include <optional>
	#include <set>
	#include <utility>
	#include <vector>

	#include "OpenCLBuiltins.inc"

	using namespace clang;
	using namespace sema;

	namespace {
	class UnqualUsingEntry {
	const DeclContext *Nominated;
	const DeclContext *CommonAncestor;

	public:
	UnqualUsingEntry(const DeclContext *Nominated,
	const DeclContext *CommonAncestor)
	: Nominated(Nominated), CommonAncestor(CommonAncestor) {
	}

	const DeclContext *getCommonAncestor() const {
	return CommonAncestor;
	}

	const DeclContext *getNominatedNamespace() const {
	return Nominated;
	}

	// Sort by the pointer value of the common ancestor.
	struct Comparator {
	bool operator()(const UnqualUsingEntry &L, const UnqualUsingEntry &R) {
	return L.getCommonAncestor() < R.getCommonAncestor();
	}

	bool operator()(const UnqualUsingEntry &E, const DeclContext *DC) {
	return E.getCommonAncestor() < DC;
	}

	bool operator()(const DeclContext *DC, const UnqualUsingEntry &E) {
	return DC < E.getCommonAncestor();
	}
	};
	};

	/// A collection of using directives, as used by C++ unqualified
	/// lookup.
	class UnqualUsingDirectiveSet {
	Sema &SemaRef;

	typedef SmallVector<UnqualUsingEntry, 8> ListTy;

	ListTy list;
	llvm::SmallPtrSet<DeclContext*, 8> visited;

	public:
	UnqualUsingDirectiveSet(Sema &SemaRef) : SemaRef(SemaRef) {}

	void visitScopeChain(Scope S, Scope InnermostFileScope) {
	// C++ [namespace.udir]p1:
	// During unqualified name lookup, the names appear as if they
	// were declared in the nearest enclosing namespace which contains
	// both the using-directive and the nominated namespace.
	DeclContext *InnermostFileDC = InnermostFileScope->getEntity();
	assert(InnermostFileDC && InnermostFileDC->isFileContext());

	for (; S; S = S->getParent()) {
	// C++ [namespace.udir]p1:
	// A using-directive shall not appear in class scope, but may
	// appear in namespace scope or in block scope.
	DeclContext *Ctx = S->getEntity();
	if (Ctx && Ctx->isFileContext()) {
	visit(Ctx, Ctx);
	} else if (!Ctx \|\| Ctx->isFunctionOrMethod()) {
	for (auto *I : S->using_directives())
	if (SemaRef.isVisible(I))
	visit(I, InnermostFileDC);
	}
	}
	}

	// Visits a context and collect all of its using directives
	// recursively. Treats all using directives as if they were
	// declared in the context.
	//
	// A given context is only every visited once, so it is important
	// that contexts be visited from the inside out in order to get
	// the effective DCs right.
	void visit(DeclContext DC, DeclContext EffectiveDC) {
	if (!visited.insert(DC).second)
	return;

	addUsingDirectives(DC, EffectiveDC);
	}

	// Visits a using directive and collects all of its using
	// directives recursively. Treats all using directives as if they
	// were declared in the effective DC.
	void visit(UsingDirectiveDecl UD, DeclContext EffectiveDC) {
	DeclContext *NS = UD->getNominatedNamespace();
	if (!visited.insert(NS).second)
	return;

	addUsingDirective(UD, EffectiveDC);
	addUsingDirectives(NS, EffectiveDC);
	}

	// Adds all the using directives in a context (and those nominated
	// by its using directives, transitively) as if they appeared in
	// the given effective context.
	void addUsingDirectives(DeclContext DC, DeclContext EffectiveDC) {
	SmallVector<DeclContext*, 4> queue;
	while (true) {
	for (auto *UD : DC->using_directives()) {
	DeclContext *NS = UD->getNominatedNamespace();
	if (SemaRef.isVisible(UD) && visited.insert(NS).second) {
	addUsingDirective(UD, EffectiveDC);
	queue.push_back(NS);
	}
	}

	if (queue.empty())
	return;

	DC = queue.pop_back_val();
	}
	}

	// Add a using directive as if it had been declared in the given
	// context. This helps implement C++ [namespace.udir]p3:
	// The using-directive is transitive: if a scope contains a
	// using-directive that nominates a second namespace that itself
	// contains using-directives, the effect is as if the
	// using-directives from the second namespace also appeared in
	// the first.
	void addUsingDirective(UsingDirectiveDecl UD, DeclContext EffectiveDC) {
	// Find the common ancestor between the effective context and
	// the nominated namespace.
	DeclContext *Common = UD->getNominatedNamespace();
	while (!Common->Encloses(EffectiveDC))
	Common = Common->getParent();
	Common = Common->getPrimaryContext();

	list.push_back(UnqualUsingEntry(UD->getNominatedNamespace(), Common));
	}

	void done() { llvm::sort(list, UnqualUsingEntry::Comparator()); }

	typedef ListTy::const_iterator const_iterator;

	const_iterator begin() const { return list.begin(); }
	const_iterator end() const { return list.end(); }

	llvm::iterator_range<const_iterator>
	getNamespacesFor(const DeclContext *DC) const {
	return llvm::make_range(std::equal_range(begin(), end(),
	DC->getPrimaryContext(),
	UnqualUsingEntry::Comparator()));
	}
	};
	} // end anonymous namespace

	// Retrieve the set of identifier namespaces that correspond to a
	// specific kind of name lookup.
	static inline unsigned getIDNS(Sema::LookupNameKind NameKind,
	bool CPlusPlus,
	bool Redeclaration) {
	unsigned IDNS = 0;
	switch (NameKind) {
	case Sema::LookupObjCImplicitSelfParam:
	case Sema::LookupOrdinaryName:
	case Sema::LookupRedeclarationWithLinkage:
	case Sema::LookupLocalFriendName:
	case Sema::LookupDestructorName:
	IDNS = Decl::IDNS_Ordinary;
	if (CPlusPlus) {
	IDNS \|= Decl::IDNS_Tag \| Decl::IDNS_Member \| Decl::IDNS_Namespace;
	if (Redeclaration)
	IDNS \|= Decl::IDNS_TagFriend \| Decl::IDNS_OrdinaryFriend;
	}
	if (Redeclaration)
	IDNS \|= Decl::IDNS_LocalExtern;
	break;

	case Sema::LookupOperatorName:
	// Operator lookup is its own crazy thing; it is not the same
	// as (e.g.) looking up an operator name for redeclaration.
	assert(!Redeclaration && "cannot do redeclaration operator lookup");
	IDNS = Decl::IDNS_NonMemberOperator;
	break;

	case Sema::LookupTagName:
	if (CPlusPlus) {
	IDNS = Decl::IDNS_Type;

	// When looking for a redeclaration of a tag name, we add:
	// 1) TagFriend to find undeclared friend decls
	// 2) Namespace because they can't "overload" with tag decls.
	// 3) Tag because it includes class templates, which can't
	// "overload" with tag decls.
	if (Redeclaration)
	IDNS \|= Decl::IDNS_Tag \| Decl::IDNS_TagFriend \| Decl::IDNS_Namespace;
	} else {
	IDNS = Decl::IDNS_Tag;
	}
	break;

	case Sema::LookupLabel:
	IDNS = Decl::IDNS_Label;
	break;

	case Sema::LookupMemberName:
	IDNS = Decl::IDNS_Member;
	if (CPlusPlus)
	IDNS \|= Decl::IDNS_Tag \| Decl::IDNS_Ordinary;
	break;

	case Sema::LookupNestedNameSpecifierName:
	IDNS = Decl::IDNS_Type \| Decl::IDNS_Namespace;
	break;

	case Sema::LookupNamespaceName:
	IDNS = Decl::IDNS_Namespace;
	break;

	case Sema::LookupUsingDeclName:
	assert(Redeclaration && "should only be used for redecl lookup");
	IDNS = Decl::IDNS_Ordinary \| Decl::IDNS_Tag \| Decl::IDNS_Member \|
	Decl::IDNS_Using \| Decl::IDNS_TagFriend \| Decl::IDNS_OrdinaryFriend \|
	Decl::IDNS_LocalExtern;
	break;

	case Sema::LookupObjCProtocolName:
	IDNS = Decl::IDNS_ObjCProtocol;
	break;

	case Sema::LookupOMPReductionName:
	IDNS = Decl::IDNS_OMPReduction;
	break;

	case Sema::LookupOMPMapperName:
	IDNS = Decl::IDNS_OMPMapper;
	break;

	case Sema::LookupAnyName:
	IDNS = Decl::IDNS_Ordinary \| Decl::IDNS_Tag \| Decl::IDNS_Member
	\| Decl::IDNS_Using \| Decl::IDNS_Namespace \| Decl::IDNS_ObjCProtocol
	\| Decl::IDNS_Type;
	break;
	}
	return IDNS;
	}

	void LookupResult::configure() {
	IDNS = getIDNS(LookupKind, getSema().getLangOpts().CPlusPlus,
	isForRedeclaration());

	// If we're looking for one of the allocation or deallocation
	// operators, make sure that the implicitly-declared new and delete
	// operators can be found.
	switch (NameInfo.getName().getCXXOverloadedOperator()) {
	case OO_New:
	case OO_Delete:
	case OO_Array_New:
	case OO_Array_Delete:
	getSema().DeclareGlobalNewDelete();
	break;

	default:
	break;
	}

	// Compiler builtins are always visible, regardless of where they end
	// up being declared.
	if (IdentifierInfo *Id = NameInfo.getName().getAsIdentifierInfo()) {
	if (unsigned BuiltinID = Id->getBuiltinID()) {
	if (!getSema().Context.BuiltinInfo.isPredefinedLibFunction(BuiltinID))
	AllowHidden = true;
	}
	}
	}

	bool LookupResult::checkDebugAssumptions() const {
	// This function is never called by NDEBUG builds.
	assert(ResultKind != NotFound \|\| Decls.size() == 0);
	assert(ResultKind != Found \|\| Decls.size() == 1);
	assert(ResultKind != FoundOverloaded \|\| Decls.size() > 1 \|\|
	(Decls.size() == 1 &&
	isa<FunctionTemplateDecl>((*begin())->getUnderlyingDecl())));
	assert(ResultKind != FoundUnresolvedValue \|\| checkUnresolved());
	assert(ResultKind != Ambiguous \|\| Decls.size() > 1 \|\|
	(Decls.size() == 1 && (Ambiguity == AmbiguousBaseSubobjects \|\|
	Ambiguity == AmbiguousBaseSubobjectTypes)));
	assert((Paths != nullptr) == (ResultKind == Ambiguous &&
	(Ambiguity == AmbiguousBaseSubobjectTypes \|\|
	Ambiguity == AmbiguousBaseSubobjects)));
	return true;
	}

	// Necessary because CXXBasePaths is not complete in Sema.h
	void LookupResult::deletePaths(CXXBasePaths *Paths) {
	delete Paths;
	}

	/// Get a representative context for a declaration such that two declarations
	/// will have the same context if they were found within the same scope.
	static const DeclContext getContextForScopeMatching(const Decl D) {
	// For function-local declarations, use that function as the context. This
	// doesn't account for scopes within the function; the caller must deal with
	// those.
	if (const DeclContext *DC = D->getLexicalDeclContext();
	DC->isFunctionOrMethod())
	return DC;

	// Otherwise, look at the semantic context of the declaration. The
	// declaration must have been found there.
	return D->getDeclContext()->getRedeclContext();
	}

	/// Determine whether \p D is a better lookup result than \p Existing,
	/// given that they declare the same entity.
	static bool isPreferredLookupResult(Sema &S, Sema::LookupNameKind Kind,
	const NamedDecl *D,
	const NamedDecl *Existing) {
	// When looking up redeclarations of a using declaration, prefer a using
	// shadow declaration over any other declaration of the same entity.
	if (Kind == Sema::LookupUsingDeclName && isa<UsingShadowDecl>(D) &&
	!isa<UsingShadowDecl>(Existing))
	return true;

	const auto *DUnderlying = D->getUnderlyingDecl();
	const auto *EUnderlying = Existing->getUnderlyingDecl();

	// If they have different underlying declarations, prefer a typedef over the
	// original type (this happens when two type declarations denote the same
	// type), per a generous reading of C++ [dcl.typedef]p3 and p4. The typedef
	// might carry additional semantic information, such as an alignment override.
	// However, per C++ [dcl.typedef]p5, when looking up a tag name, prefer a tag
	// declaration over a typedef. Also prefer a tag over a typedef for
	// destructor name lookup because in some contexts we only accept a
	// class-name in a destructor declaration.
	if (DUnderlying->getCanonicalDecl() != EUnderlying->getCanonicalDecl()) {
	assert(isa<TypeDecl>(DUnderlying) && isa<TypeDecl>(EUnderlying));
	bool HaveTag = isa<TagDecl>(EUnderlying);
	bool WantTag =
	Kind == Sema::LookupTagName \|\| Kind == Sema::LookupDestructorName;
	return HaveTag != WantTag;
	}

	// Pick the function with more default arguments.
	// FIXME: In the presence of ambiguous default arguments, we should keep both,
	// so we can diagnose the ambiguity if the default argument is needed.
	// See C++ [over.match.best]p3.
	if (const auto *DFD = dyn_cast<FunctionDecl>(DUnderlying)) {
	const auto *EFD = cast<FunctionDecl>(EUnderlying);
	unsigned DMin = DFD->getMinRequiredArguments();
	unsigned EMin = EFD->getMinRequiredArguments();
	// If D has more default arguments, it is preferred.
	if (DMin != EMin)
	return DMin < EMin;
	// FIXME: When we track visibility for default function arguments, check
	// that we pick the declaration with more visible default arguments.
	}

	// Pick the template with more default template arguments.
	if (const auto *DTD = dyn_cast<TemplateDecl>(DUnderlying)) {
	const auto *ETD = cast<TemplateDecl>(EUnderlying);
	unsigned DMin = DTD->getTemplateParameters()->getMinRequiredArguments();
	unsigned EMin = ETD->getTemplateParameters()->getMinRequiredArguments();
	// If D has more default arguments, it is preferred. Note that default
	// arguments (and their visibility) is monotonically increasing across the
	// redeclaration chain, so this is a quick proxy for "is more recent".
	if (DMin != EMin)
	return DMin < EMin;
	// If D has more visible default arguments, it is preferred. Note, an
	// earlier default argument being visible does not imply that a later
	// default argument is visible, so we can't just check the first one.
	for (unsigned I = DMin, N = DTD->getTemplateParameters()->size();
	I != N; ++I) {
	if (!S.hasVisibleDefaultArgument(
	ETD->getTemplateParameters()->getParam(I)) &&
	S.hasVisibleDefaultArgument(
	DTD->getTemplateParameters()->getParam(I)))
	return true;
	}
	}

	// VarDecl can have incomplete array types, prefer the one with more complete
	// array type.
	if (const auto *DVD = dyn_cast<VarDecl>(DUnderlying)) {
	const auto *EVD = cast<VarDecl>(EUnderlying);
	if (EVD->getType()->isIncompleteType() &&
	!DVD->getType()->isIncompleteType()) {
	// Prefer the decl with a more complete type if visible.
	return S.isVisible(DVD);
	}
	return false; // Avoid picking up a newer decl, just because it was newer.
	}

	// For most kinds of declaration, it doesn't really matter which one we pick.
	if (!isa<FunctionDecl>(DUnderlying) && !isa<VarDecl>(DUnderlying)) {
	// If the existing declaration is hidden, prefer the new one. Otherwise,
	// keep what we've got.
	return !S.isVisible(Existing);
	}

	// Pick the newer declaration; it might have a more precise type.
	for (const Decl *Prev = DUnderlying->getPreviousDecl(); Prev;
	Prev = Prev->getPreviousDecl())
	if (Prev == EUnderlying)
	return true;
	return false;
	}

	/// Determine whether \p D can hide a tag declaration.
	static bool canHideTag(const NamedDecl *D) {
	// C++ [basic.scope.declarative]p4:
	// Given a set of declarations in a single declarative region [...]
	// exactly one declaration shall declare a class name or enumeration name
	// that is not a typedef name and the other declarations shall all refer to
	// the same variable, non-static data member, or enumerator, or all refer
	// to functions and function templates; in this case the class name or
	// enumeration name is hidden.
	// C++ [basic.scope.hiding]p2:
	// A class name or enumeration name can be hidden by the name of a
	// variable, data member, function, or enumerator declared in the same
	// scope.
	// An UnresolvedUsingValueDecl always instantiates to one of these.
	D = D->getUnderlyingDecl();
	return isa<VarDecl>(D) \|\| isa<EnumConstantDecl>(D) \|\| isa<FunctionDecl>(D) \|\|
	isa<FunctionTemplateDecl>(D) \|\| isa<FieldDecl>(D) \|\|
	isa<UnresolvedUsingValueDecl>(D);
	}

	/// Resolves the result kind of this lookup.
	void LookupResult::resolveKind() {
	unsigned N = Decls.size();

	// Fast case: no possible ambiguity.
	if (N == 0) {
	assert(ResultKind == NotFound \|\|
	ResultKind == NotFoundInCurrentInstantiation);
	return;
	}

	// If there's a single decl, we need to examine it to decide what
	// kind of lookup this is.
	if (N == 1) {
	const NamedDecl D = (Decls.begin())->getUnderlyingDecl();
	if (isa<FunctionTemplateDecl>(D))
	ResultKind = FoundOverloaded;
	else if (isa<UnresolvedUsingValueDecl>(D))
	ResultKind = FoundUnresolvedValue;
	return;
	}

	// Don't do any extra resolution if we've already resolved as ambiguous.
	if (ResultKind == Ambiguous) return;

	llvm::SmallDenseMap<const NamedDecl *, unsigned, 16> Unique;
	llvm::SmallDenseMap<QualType, unsigned, 16> UniqueTypes;

	bool Ambiguous = false;
	bool ReferenceToPlaceHolderVariable = false;
	bool HasTag = false, HasFunction = false;
	bool HasFunctionTemplate = false, HasUnresolved = false;
	const NamedDecl *HasNonFunction = nullptr;

	llvm::SmallVector<const NamedDecl *, 4> EquivalentNonFunctions;
	llvm::BitVector RemovedDecls(N);

	for (unsigned I = 0; I < N; I++) {
	const NamedDecl *D = Decls[I]->getUnderlyingDecl();
	D = cast<NamedDecl>(D->getCanonicalDecl());

	// Ignore an invalid declaration unless it's the only one left.
	// Also ignore HLSLBufferDecl which not have name conflict with other Decls.
	if ((D->isInvalidDecl() \|\| isa<HLSLBufferDecl>(D)) &&
	N - RemovedDecls.count() > 1) {
	RemovedDecls.set(I);
	continue;
	}

	// C++ [basic.scope.hiding]p2:
	// A class name or enumeration name can be hidden by the name of
	// an object, function, or enumerator declared in the same
	// scope. If a class or enumeration name and an object, function,
	// or enumerator are declared in the same scope (in any order)
	// with the same name, the class or enumeration name is hidden
	// wherever the object, function, or enumerator name is visible.
	if (HideTags && isa<TagDecl>(D)) {
	bool Hidden = false;
	for (auto *OtherDecl : Decls) {
	if (canHideTag(OtherDecl) && !OtherDecl->isInvalidDecl() &&
	getContextForScopeMatching(OtherDecl)->Equals(
	getContextForScopeMatching(Decls[I]))) {
	RemovedDecls.set(I);
	Hidden = true;
	break;
	}
	}
	if (Hidden)
	continue;
	}

	std::optional<unsigned> ExistingI;

	// Redeclarations of types via typedef can occur both within a scope
	// and, through using declarations and directives, across scopes. There is
	// no ambiguity if they all refer to the same type, so unique based on the
	// canonical type.
	if (const auto *TD = dyn_cast<TypeDecl>(D)) {
	QualType T = getSema().Context.getTypeDeclType(TD);
	auto UniqueResult = UniqueTypes.insert(
	std::make_pair(getSema().Context.getCanonicalType(T), I));
	if (!UniqueResult.second) {
	// The type is not unique.
	ExistingI = UniqueResult.first->second;
	}
	}

	// For non-type declarations, check for a prior lookup result naming this
	// canonical declaration.
	- if (!D->isPlaceholderVar(getSema().getLangOpts()) && !ExistingI) {
	+ if (!ExistingI) {
	auto UniqueResult = Unique.insert(std::make_pair(D, I));
	if (!UniqueResult.second) {
	// We've seen this entity before.
	ExistingI = UniqueResult.first->second;
	}
	}

	if (ExistingI) {
	// This is not a unique lookup result. Pick one of the results and
	// discard the other.
	if (isPreferredLookupResult(getSema(), getLookupKind(), Decls[I],
	Decls[*ExistingI]))
	Decls[*ExistingI] = Decls[I];
	RemovedDecls.set(I);
	continue;
	}

	// Otherwise, do some decl type analysis and then continue.

	if (isa<UnresolvedUsingValueDecl>(D)) {
	HasUnresolved = true;
	} else if (isa<TagDecl>(D)) {
	if (HasTag)
	Ambiguous = true;
	HasTag = true;
	} else if (isa<FunctionTemplateDecl>(D)) {
	HasFunction = true;
	HasFunctionTemplate = true;
	} else if (isa<FunctionDecl>(D)) {
	HasFunction = true;
	} else {
	if (HasNonFunction) {
	// If we're about to create an ambiguity between two declarations that
	// are equivalent, but one is an internal linkage declaration from one
	// module and the other is an internal linkage declaration from another
	// module, just skip it.
	if (getSema().isEquivalentInternalLinkageDeclaration(HasNonFunction,
	D)) {
	EquivalentNonFunctions.push_back(D);
	RemovedDecls.set(I);
	continue;
	}
	if (D->isPlaceholderVar(getSema().getLangOpts()) &&
	getContextForScopeMatching(D) ==
	getContextForScopeMatching(Decls[I])) {
	ReferenceToPlaceHolderVariable = true;
	}
	Ambiguous = true;
	}
	HasNonFunction = D;
	}
	}

	// FIXME: This diagnostic should really be delayed until we're done with
	// the lookup result, in case the ambiguity is resolved by the caller.
	if (!EquivalentNonFunctions.empty() && !Ambiguous)
	getSema().diagnoseEquivalentInternalLinkageDeclarations(
	getNameLoc(), HasNonFunction, EquivalentNonFunctions);

	// Remove decls by replacing them with decls from the end (which
	// means that we need to iterate from the end) and then truncating
	// to the new size.
	for (int I = RemovedDecls.find_last(); I >= 0; I = RemovedDecls.find_prev(I))
	Decls[I] = Decls[--N];
	Decls.truncate(N);

	if ((HasNonFunction && (HasFunction \|\| HasUnresolved)) \|\|
	(HideTags && HasTag && (HasFunction \|\| HasNonFunction \|\| HasUnresolved)))
	Ambiguous = true;

	if (Ambiguous && ReferenceToPlaceHolderVariable)
	setAmbiguous(LookupResult::AmbiguousReferenceToPlaceholderVariable);
	else if (Ambiguous)
	setAmbiguous(LookupResult::AmbiguousReference);
	else if (HasUnresolved)
	ResultKind = LookupResult::FoundUnresolvedValue;
	else if (N > 1 \|\| HasFunctionTemplate)
	ResultKind = LookupResult::FoundOverloaded;
	else
	ResultKind = LookupResult::Found;
	}

	void LookupResult::addDeclsFromBasePaths(const CXXBasePaths &P) {
	CXXBasePaths::const_paths_iterator I, E;
	for (I = P.begin(), E = P.end(); I != E; ++I)
	for (DeclContext::lookup_iterator DI = I->Decls, DE = DI.end(); DI != DE;
	++DI)
	addDecl(*DI);
	}

	void LookupResult::setAmbiguousBaseSubobjects(CXXBasePaths &P) {
	Paths = new CXXBasePaths;
	Paths->swap(P);
	addDeclsFromBasePaths(*Paths);
	resolveKind();
	setAmbiguous(AmbiguousBaseSubobjects);
	}

	void LookupResult::setAmbiguousBaseSubobjectTypes(CXXBasePaths &P) {
	Paths = new CXXBasePaths;
	Paths->swap(P);
	addDeclsFromBasePaths(*Paths);
	resolveKind();
	setAmbiguous(AmbiguousBaseSubobjectTypes);
	}

	void LookupResult::print(raw_ostream &Out) {
	Out << Decls.size() << " result(s)";
	if (isAmbiguous()) Out << ", ambiguous";
	if (Paths) Out << ", base paths present";

	for (iterator I = begin(), E = end(); I != E; ++I) {
	Out << "\n";
	(*I)->print(Out, 2);
	}
	}

	LLVM_DUMP_METHOD void LookupResult::dump() {
	llvm::errs() << "lookup results for " << getLookupName().getAsString()
	<< ":\n";
	for (NamedDecl D : this)
	D->dump();
	}

	/// Diagnose a missing builtin type.
	static QualType diagOpenCLBuiltinTypeError(Sema &S, llvm::StringRef TypeClass,
	llvm::StringRef Name) {
	S.Diag(SourceLocation(), diag::err_opencl_type_not_found)
	<< TypeClass << Name;
	return S.Context.VoidTy;
	}

	/// Lookup an OpenCL enum type.
	static QualType getOpenCLEnumType(Sema &S, llvm::StringRef Name) {
	LookupResult Result(S, &S.Context.Idents.get(Name), SourceLocation(),
	Sema::LookupTagName);
	S.LookupName(Result, S.TUScope);
	if (Result.empty())
	return diagOpenCLBuiltinTypeError(S, "enum", Name);
	EnumDecl *Decl = Result.getAsSingle<EnumDecl>();
	if (!Decl)
	return diagOpenCLBuiltinTypeError(S, "enum", Name);
	return S.Context.getEnumType(Decl);
	}

	/// Lookup an OpenCL typedef type.
	static QualType getOpenCLTypedefType(Sema &S, llvm::StringRef Name) {
	LookupResult Result(S, &S.Context.Idents.get(Name), SourceLocation(),
	Sema::LookupOrdinaryName);
	S.LookupName(Result, S.TUScope);
	if (Result.empty())
	return diagOpenCLBuiltinTypeError(S, "typedef", Name);
	TypedefNameDecl *Decl = Result.getAsSingle<TypedefNameDecl>();
	if (!Decl)
	return diagOpenCLBuiltinTypeError(S, "typedef", Name);
	return S.Context.getTypedefType(Decl);
	}

	/// Get the QualType instances of the return type and arguments for an OpenCL
	/// builtin function signature.
	/// \param S (in) The Sema instance.
	/// \param OpenCLBuiltin (in) The signature currently handled.
	/// \param GenTypeMaxCnt (out) Maximum number of types contained in a generic
	/// type used as return type or as argument.
	/// Only meaningful for generic types, otherwise equals 1.
	/// \param RetTypes (out) List of the possible return types.
	/// \param ArgTypes (out) List of the possible argument types. For each
	/// argument, ArgTypes contains QualTypes for the Cartesian product
	/// of (vector sizes) x (types) .
	static void GetQualTypesForOpenCLBuiltin(
	Sema &S, const OpenCLBuiltinStruct &OpenCLBuiltin, unsigned &GenTypeMaxCnt,
	SmallVector<QualType, 1> &RetTypes,
	SmallVector<SmallVector<QualType, 1>, 5> &ArgTypes) {
	// Get the QualType instances of the return types.
	unsigned Sig = SignatureTable[OpenCLBuiltin.SigTableIndex];
	OCL2Qual(S, TypeTable[Sig], RetTypes);
	GenTypeMaxCnt = RetTypes.size();

	// Get the QualType instances of the arguments.
	// First type is the return type, skip it.
	for (unsigned Index = 1; Index < OpenCLBuiltin.NumTypes; Index++) {
	SmallVector<QualType, 1> Ty;
	OCL2Qual(S, TypeTable[SignatureTable[OpenCLBuiltin.SigTableIndex + Index]],
	Ty);
	GenTypeMaxCnt = (Ty.size() > GenTypeMaxCnt) ? Ty.size() : GenTypeMaxCnt;
	ArgTypes.push_back(std::move(Ty));
	}
	}

	/// Create a list of the candidate function overloads for an OpenCL builtin
	/// function.
	/// \param Context (in) The ASTContext instance.
	/// \param GenTypeMaxCnt (in) Maximum number of types contained in a generic
	/// type used as return type or as argument.
	/// Only meaningful for generic types, otherwise equals 1.
	/// \param FunctionList (out) List of FunctionTypes.
	/// \param RetTypes (in) List of the possible return types.
	/// \param ArgTypes (in) List of the possible types for the arguments.
	static void GetOpenCLBuiltinFctOverloads(
	ASTContext &Context, unsigned GenTypeMaxCnt,
	std::vector<QualType> &FunctionList, SmallVector<QualType, 1> &RetTypes,
	SmallVector<SmallVector<QualType, 1>, 5> &ArgTypes) {
	FunctionProtoType::ExtProtoInfo PI(
	Context.getDefaultCallingConvention(false, false, true));
	PI.Variadic = false;

	// Do not attempt to create any FunctionTypes if there are no return types,
	// which happens when a type belongs to a disabled extension.
	if (RetTypes.size() == 0)
	return;

	// Create FunctionTypes for each (gen)type.
	for (unsigned IGenType = 0; IGenType < GenTypeMaxCnt; IGenType++) {
	SmallVector<QualType, 5> ArgList;

	for (unsigned A = 0; A < ArgTypes.size(); A++) {
	// Bail out if there is an argument that has no available types.
	if (ArgTypes[A].size() == 0)
	return;

	// Builtins such as "max" have an "sgentype" argument that represents
	// the corresponding scalar type of a gentype. The number of gentypes
	// must be a multiple of the number of sgentypes.
	assert(GenTypeMaxCnt % ArgTypes[A].size() == 0 &&
	"argument type count not compatible with gentype type count");
	unsigned Idx = IGenType % ArgTypes[A].size();
	ArgList.push_back(ArgTypes[A][Idx]);
	}

	FunctionList.push_back(Context.getFunctionType(
	RetTypes[(RetTypes.size() != 1) ? IGenType : 0], ArgList, PI));
	}
	}

	/// When trying to resolve a function name, if isOpenCLBuiltin() returns a
	/// non-null <Index, Len> pair, then the name is referencing an OpenCL
	/// builtin function. Add all candidate signatures to the LookUpResult.
	///
	/// \param S (in) The Sema instance.
	/// \param LR (inout) The LookupResult instance.
	/// \param II (in) The identifier being resolved.
	/// \param FctIndex (in) Starting index in the BuiltinTable.
	/// \param Len (in) The signature list has Len elements.
	static void InsertOCLBuiltinDeclarationsFromTable(Sema &S, LookupResult &LR,
	IdentifierInfo *II,
	const unsigned FctIndex,
	const unsigned Len) {
	// The builtin function declaration uses generic types (gentype).
	bool HasGenType = false;

	// Maximum number of types contained in a generic type used as return type or
	// as argument. Only meaningful for generic types, otherwise equals 1.
	unsigned GenTypeMaxCnt;

	ASTContext &Context = S.Context;

	for (unsigned SignatureIndex = 0; SignatureIndex < Len; SignatureIndex++) {
	const OpenCLBuiltinStruct &OpenCLBuiltin =
	BuiltinTable[FctIndex + SignatureIndex];

	// Ignore this builtin function if it is not available in the currently
	// selected language version.
	if (!isOpenCLVersionContainedInMask(Context.getLangOpts(),
	OpenCLBuiltin.Versions))
	continue;

	// Ignore this builtin function if it carries an extension macro that is
	// not defined. This indicates that the extension is not supported by the
	// target, so the builtin function should not be available.
	StringRef Extensions = FunctionExtensionTable[OpenCLBuiltin.Extension];
	if (!Extensions.empty()) {
	SmallVector<StringRef, 2> ExtVec;
	Extensions.split(ExtVec, " ");
	bool AllExtensionsDefined = true;
	for (StringRef Ext : ExtVec) {
	if (!S.getPreprocessor().isMacroDefined(Ext)) {
	AllExtensionsDefined = false;
	break;
	}
	}
	if (!AllExtensionsDefined)
	continue;
	}

	SmallVector<QualType, 1> RetTypes;
	SmallVector<SmallVector<QualType, 1>, 5> ArgTypes;

	// Obtain QualType lists for the function signature.
	GetQualTypesForOpenCLBuiltin(S, OpenCLBuiltin, GenTypeMaxCnt, RetTypes,
	ArgTypes);
	if (GenTypeMaxCnt > 1) {
	HasGenType = true;
	}

	// Create function overload for each type combination.
	std::vector<QualType> FunctionList;
	GetOpenCLBuiltinFctOverloads(Context, GenTypeMaxCnt, FunctionList, RetTypes,
	ArgTypes);

	SourceLocation Loc = LR.getNameLoc();
	DeclContext *Parent = Context.getTranslationUnitDecl();
	FunctionDecl *NewOpenCLBuiltin;

	for (const auto &FTy : FunctionList) {
	NewOpenCLBuiltin = FunctionDecl::Create(
	Context, Parent, Loc, Loc, II, FTy, /TInfo=/nullptr, SC_Extern,
	S.getCurFPFeatures().isFPConstrained(), false,
	FTy->isFunctionProtoType());
	NewOpenCLBuiltin->setImplicit();

	// Create Decl objects for each parameter, adding them to the
	// FunctionDecl.
	const auto *FP = cast<FunctionProtoType>(FTy);
	SmallVector<ParmVarDecl *, 4> ParmList;
	for (unsigned IParm = 0, e = FP->getNumParams(); IParm != e; ++IParm) {
	ParmVarDecl *Parm = ParmVarDecl::Create(
	Context, NewOpenCLBuiltin, SourceLocation(), SourceLocation(),
	nullptr, FP->getParamType(IParm), nullptr, SC_None, nullptr);
	Parm->setScopeInfo(0, IParm);
	ParmList.push_back(Parm);
	}
	NewOpenCLBuiltin->setParams(ParmList);

	// Add function attributes.
	if (OpenCLBuiltin.IsPure)
	NewOpenCLBuiltin->addAttr(PureAttr::CreateImplicit(Context));
	if (OpenCLBuiltin.IsConst)
	NewOpenCLBuiltin->addAttr(ConstAttr::CreateImplicit(Context));
	if (OpenCLBuiltin.IsConv)
	NewOpenCLBuiltin->addAttr(ConvergentAttr::CreateImplicit(Context));

	if (!S.getLangOpts().OpenCLCPlusPlus)
	NewOpenCLBuiltin->addAttr(OverloadableAttr::CreateImplicit(Context));

	LR.addDecl(NewOpenCLBuiltin);
	}
	}

	// If we added overloads, need to resolve the lookup result.
	if (Len > 1 \|\| HasGenType)
	LR.resolveKind();
	}

	bool Sema::LookupBuiltin(LookupResult &R) {
	Sema::LookupNameKind NameKind = R.getLookupKind();

	// If we didn't find a use of this identifier, and if the identifier
	// corresponds to a compiler builtin, create the decl object for the builtin
	// now, injecting it into translation unit scope, and return it.
	if (NameKind == Sema::LookupOrdinaryName \|\|
	NameKind == Sema::LookupRedeclarationWithLinkage) {
	IdentifierInfo *II = R.getLookupName().getAsIdentifierInfo();
	if (II) {
	if (getLangOpts().CPlusPlus && NameKind == Sema::LookupOrdinaryName) {
	if (II == getASTContext().getMakeIntegerSeqName()) {
	R.addDecl(getASTContext().getMakeIntegerSeqDecl());
	return true;
	} else if (II == getASTContext().getTypePackElementName()) {
	R.addDecl(getASTContext().getTypePackElementDecl());
	return true;
	}
	}

	// Check if this is an OpenCL Builtin, and if so, insert its overloads.
	if (getLangOpts().OpenCL && getLangOpts().DeclareOpenCLBuiltins) {
	auto Index = isOpenCLBuiltin(II->getName());
	if (Index.first) {
	InsertOCLBuiltinDeclarationsFromTable(*this, R, II, Index.first - 1,
	Index.second);
	return true;
	}
	}

	if (RISCV().DeclareRVVBuiltins \|\| RISCV().DeclareSiFiveVectorBuiltins) {
	if (!RISCV().IntrinsicManager)
	RISCV().IntrinsicManager = CreateRISCVIntrinsicManager(*this);

	RISCV().IntrinsicManager->InitIntrinsicList();

	if (RISCV().IntrinsicManager->CreateIntrinsicIfFound(R, II, PP))
	return true;
	}

	// If this is a builtin on this (or all) targets, create the decl.
	if (unsigned BuiltinID = II->getBuiltinID()) {
	// In C++ and OpenCL (spec v1.2 s6.9.f), we don't have any predefined
	// library functions like 'malloc'. Instead, we'll just error.
	if ((getLangOpts().CPlusPlus \|\| getLangOpts().OpenCL) &&
	Context.BuiltinInfo.isPredefinedLibFunction(BuiltinID))
	return false;

	if (NamedDecl *D =
	LazilyCreateBuiltin(II, BuiltinID, TUScope,
	R.isForRedeclaration(), R.getNameLoc())) {
	R.addDecl(D);
	return true;
	}
	}
	}
	}

	return false;
	}

	/// Looks up the declaration of "struct objc_super" and
	/// saves it for later use in building builtin declaration of
	/// objc_msgSendSuper and objc_msgSendSuper_stret.
	static void LookupPredefedObjCSuperType(Sema &Sema, Scope *S) {
	ASTContext &Context = Sema.Context;
	LookupResult Result(Sema, &Context.Idents.get("objc_super"), SourceLocation(),
	Sema::LookupTagName);
	Sema.LookupName(Result, S);
	if (Result.getResultKind() == LookupResult::Found)
	if (const TagDecl *TD = Result.getAsSingle<TagDecl>())
	Context.setObjCSuperType(Context.getTagDeclType(TD));
	}

	void Sema::LookupNecessaryTypesForBuiltin(Scope *S, unsigned ID) {
	if (ID == Builtin::BIobjc_msgSendSuper)
	LookupPredefedObjCSuperType(*this, S);
	}

	/// Determine whether we can declare a special member function within
	/// the class at this point.
	static bool CanDeclareSpecialMemberFunction(const CXXRecordDecl *Class) {
	// We need to have a definition for the class.
	if (!Class->getDefinition() \|\| Class->isDependentContext())
	return false;

	// We can't be in the middle of defining the class.
	return !Class->isBeingDefined();
	}

	void Sema::ForceDeclarationOfImplicitMembers(CXXRecordDecl *Class) {
	if (!CanDeclareSpecialMemberFunction(Class))
	return;

	// If the default constructor has not yet been declared, do so now.
	if (Class->needsImplicitDefaultConstructor())
	DeclareImplicitDefaultConstructor(Class);

	// If the copy constructor has not yet been declared, do so now.
	if (Class->needsImplicitCopyConstructor())
	DeclareImplicitCopyConstructor(Class);

	// If the copy assignment operator has not yet been declared, do so now.
	if (Class->needsImplicitCopyAssignment())
	DeclareImplicitCopyAssignment(Class);

	if (getLangOpts().CPlusPlus11) {
	// If the move constructor has not yet been declared, do so now.
	if (Class->needsImplicitMoveConstructor())
	DeclareImplicitMoveConstructor(Class);

	// If the move assignment operator has not yet been declared, do so now.
	if (Class->needsImplicitMoveAssignment())
	DeclareImplicitMoveAssignment(Class);
	}

	// If the destructor has not yet been declared, do so now.
	if (Class->needsImplicitDestructor())
	DeclareImplicitDestructor(Class);
	}

	/// Determine whether this is the name of an implicitly-declared
	/// special member function.
	static bool isImplicitlyDeclaredMemberFunctionName(DeclarationName Name) {
	switch (Name.getNameKind()) {
	case DeclarationName::CXXConstructorName:
	case DeclarationName::CXXDestructorName:
	return true;

	case DeclarationName::CXXOperatorName:
	return Name.getCXXOverloadedOperator() == OO_Equal;

	default:
	break;
	}

	return false;
	}

	/// If there are any implicit member functions with the given name
	/// that need to be declared in the given declaration context, do so.
	static void DeclareImplicitMemberFunctionsWithName(Sema &S,
	DeclarationName Name,
	SourceLocation Loc,
	const DeclContext *DC) {
	if (!DC)
	return;

	switch (Name.getNameKind()) {
	case DeclarationName::CXXConstructorName:
	if (const CXXRecordDecl *Record = dyn_cast<CXXRecordDecl>(DC))
	if (Record->getDefinition() && CanDeclareSpecialMemberFunction(Record)) {
	CXXRecordDecl Class = const_cast<CXXRecordDecl >(Record);
	if (Record->needsImplicitDefaultConstructor())
	S.DeclareImplicitDefaultConstructor(Class);
	if (Record->needsImplicitCopyConstructor())
	S.DeclareImplicitCopyConstructor(Class);
	if (S.getLangOpts().CPlusPlus11 &&
	Record->needsImplicitMoveConstructor())
	S.DeclareImplicitMoveConstructor(Class);
	}
	break;

	case DeclarationName::CXXDestructorName:
	if (const CXXRecordDecl *Record = dyn_cast<CXXRecordDecl>(DC))
	if (Record->getDefinition() && Record->needsImplicitDestructor() &&
	CanDeclareSpecialMemberFunction(Record))
	S.DeclareImplicitDestructor(const_cast<CXXRecordDecl *>(Record));
	break;

	case DeclarationName::CXXOperatorName:
	if (Name.getCXXOverloadedOperator() != OO_Equal)
	break;

	if (const CXXRecordDecl *Record = dyn_cast<CXXRecordDecl>(DC)) {
	if (Record->getDefinition() && CanDeclareSpecialMemberFunction(Record)) {
	CXXRecordDecl Class = const_cast<CXXRecordDecl >(Record);
	if (Record->needsImplicitCopyAssignment())
	S.DeclareImplicitCopyAssignment(Class);
	if (S.getLangOpts().CPlusPlus11 &&
	Record->needsImplicitMoveAssignment())
	S.DeclareImplicitMoveAssignment(Class);
	}
	}
	break;

	case DeclarationName::CXXDeductionGuideName:
	S.DeclareImplicitDeductionGuides(Name.getCXXDeductionGuideTemplate(), Loc);
	break;

	default:
	break;
	}
	}

	// Adds all qualifying matches for a name within a decl context to the
	// given lookup result. Returns true if any matches were found.
	static bool LookupDirect(Sema &S, LookupResult &R, const DeclContext *DC) {
	bool Found = false;

	// Lazily declare C++ special member functions.
	if (S.getLangOpts().CPlusPlus)
	DeclareImplicitMemberFunctionsWithName(S, R.getLookupName(), R.getNameLoc(),
	DC);

	// Perform lookup into this declaration context.
	DeclContext::lookup_result DR = DC->lookup(R.getLookupName());
	for (NamedDecl *D : DR) {
	if ((D = R.getAcceptableDecl(D))) {
	R.addDecl(D);
	Found = true;
	}
	}

	if (!Found && DC->isTranslationUnit() && S.LookupBuiltin(R))
	return true;

	if (R.getLookupName().getNameKind()
	!= DeclarationName::CXXConversionFunctionName \|\|
	R.getLookupName().getCXXNameType()->isDependentType() \|\|
	!isa<CXXRecordDecl>(DC))
	return Found;

	// C++ [temp.mem]p6:
	// A specialization of a conversion function template is not found by
	// name lookup. Instead, any conversion function templates visible in the
	// context of the use are considered. [...]
	const CXXRecordDecl *Record = cast<CXXRecordDecl>(DC);
	if (!Record->isCompleteDefinition())
	return Found;

	// For conversion operators, 'operator auto' should only match
	// 'operator auto'. Since 'auto' is not a type, it shouldn't be considered
	// as a candidate for template substitution.
	auto *ContainedDeducedType =
	R.getLookupName().getCXXNameType()->getContainedDeducedType();
	if (R.getLookupName().getNameKind() ==
	DeclarationName::CXXConversionFunctionName &&
	ContainedDeducedType && ContainedDeducedType->isUndeducedType())
	return Found;

	for (CXXRecordDecl::conversion_iterator U = Record->conversion_begin(),
	UEnd = Record->conversion_end(); U != UEnd; ++U) {
	FunctionTemplateDecl ConvTemplate = dyn_cast<FunctionTemplateDecl>(U);
	if (!ConvTemplate)
	continue;

	// When we're performing lookup for the purposes of redeclaration, just
	// add the conversion function template. When we deduce template
	// arguments for specializations, we'll end up unifying the return
	// type of the new declaration with the type of the function template.
	if (R.isForRedeclaration()) {
	R.addDecl(ConvTemplate);
	Found = true;
	continue;
	}

	// C++ [temp.mem]p6:
	// [...] For each such operator, if argument deduction succeeds
	// (14.9.2.3), the resulting specialization is used as if found by
	// name lookup.
	//
	// When referencing a conversion function for any purpose other than
	// a redeclaration (such that we'll be building an expression with the
	// result), perform template argument deduction and place the
	// specialization into the result set. We do this to avoid forcing all
	// callers to perform special deduction for conversion functions.
	TemplateDeductionInfo Info(R.getNameLoc());
	FunctionDecl *Specialization = nullptr;

	const FunctionProtoType *ConvProto
	= ConvTemplate->getTemplatedDecl()->getType()->getAs<FunctionProtoType>();
	assert(ConvProto && "Nonsensical conversion function template type");

	// Compute the type of the function that we would expect the conversion
	// function to have, if it were to match the name given.
	// FIXME: Calling convention!
	FunctionProtoType::ExtProtoInfo EPI = ConvProto->getExtProtoInfo();
	EPI.ExtInfo = EPI.ExtInfo.withCallingConv(CC_C);
	EPI.ExceptionSpec = EST_None;
	QualType ExpectedType = R.getSema().Context.getFunctionType(
	R.getLookupName().getCXXNameType(), std::nullopt, EPI);

	// Perform template argument deduction against the type that we would
	// expect the function to have.
	if (R.getSema().DeduceTemplateArguments(ConvTemplate, nullptr, ExpectedType,
	Specialization, Info) ==
	TemplateDeductionResult::Success) {
	R.addDecl(Specialization);
	Found = true;
	}
	}

	return Found;
	}

	// Performs C++ unqualified lookup into the given file context.
	static bool CppNamespaceLookup(Sema &S, LookupResult &R, ASTContext &Context,
	const DeclContext *NS,
	UnqualUsingDirectiveSet &UDirs) {

	assert(NS && NS->isFileContext() && "CppNamespaceLookup() requires namespace!");

	// Perform direct name lookup into the LookupCtx.
	bool Found = LookupDirect(S, R, NS);

	// Perform direct name lookup into the namespaces nominated by the
	// using directives whose common ancestor is this namespace.
	for (const UnqualUsingEntry &UUE : UDirs.getNamespacesFor(NS))
	if (LookupDirect(S, R, UUE.getNominatedNamespace()))
	Found = true;

	R.resolveKind();

	return Found;
	}

	static bool isNamespaceOrTranslationUnitScope(Scope *S) {
	if (DeclContext *Ctx = S->getEntity())
	return Ctx->isFileContext();
	return false;
	}

	/// Find the outer declaration context from this scope. This indicates the
	/// context that we should search up to (exclusive) before considering the
	/// parent of the specified scope.
	static DeclContext findOuterContext(Scope S) {
	for (Scope *OuterS = S->getParent(); OuterS; OuterS = OuterS->getParent())
	if (DeclContext *DC = OuterS->getLookupEntity())
	return DC;
	return nullptr;
	}

	namespace {
	/// An RAII object to specify that we want to find block scope extern
	/// declarations.
	struct FindLocalExternScope {
	FindLocalExternScope(LookupResult &R)
	: R(R), OldFindLocalExtern(R.getIdentifierNamespace() &
	Decl::IDNS_LocalExtern) {
	R.setFindLocalExtern(R.getIdentifierNamespace() &
	(Decl::IDNS_Ordinary \| Decl::IDNS_NonMemberOperator));
	}
	void restore() {
	R.setFindLocalExtern(OldFindLocalExtern);
	}
	~FindLocalExternScope() {
	restore();
	}
	LookupResult &R;
	bool OldFindLocalExtern;
	};
	} // end anonymous namespace

	bool Sema::CppLookupName(LookupResult &R, Scope *S) {
	assert(getLangOpts().CPlusPlus && "Can perform only C++ lookup");

	DeclarationName Name = R.getLookupName();
	Sema::LookupNameKind NameKind = R.getLookupKind();

	// If this is the name of an implicitly-declared special member function,
	// go through the scope stack to implicitly declare
	if (isImplicitlyDeclaredMemberFunctionName(Name)) {
	for (Scope *PreS = S; PreS; PreS = PreS->getParent())
	if (DeclContext *DC = PreS->getEntity())
	DeclareImplicitMemberFunctionsWithName(*this, Name, R.getNameLoc(), DC);
	}

	// C++23 [temp.dep.general]p2:
	// The component name of an unqualified-id is dependent if
	// - it is a conversion-function-id whose conversion-type-id
	// is dependent, or
	// - it is operator= and the current class is a templated entity, or
	// - the unqualified-id is the postfix-expression in a dependent call.
	if (Name.getNameKind() == DeclarationName::CXXConversionFunctionName &&
	Name.getCXXNameType()->isDependentType()) {
	R.setNotFoundInCurrentInstantiation();
	return false;
	}

	// Implicitly declare member functions with the name we're looking for, if in
	// fact we are in a scope where it matters.

	Scope *Initial = S;
	IdentifierResolver::iterator
	I = IdResolver.begin(Name),
	IEnd = IdResolver.end();

	// First we lookup local scope.
	// We don't consider using-directives, as per 7.3.4.p1 [namespace.udir]
	// ...During unqualified name lookup (3.4.1), the names appear as if
	// they were declared in the nearest enclosing namespace which contains
	// both the using-directive and the nominated namespace.
	// [Note: in this context, "contains" means "contains directly or
	// indirectly".
	//
	// For example:
	// namespace A { int i; }
	// void foo() {
	// int i;
	// {
	// using namespace A;
	// ++i; // finds local 'i', A::i appears at global scope
	// }
	// }
	//
	UnqualUsingDirectiveSet UDirs(*this);
	bool VisitedUsingDirectives = false;
	bool LeftStartingScope = false;

	// When performing a scope lookup, we want to find local extern decls.
	FindLocalExternScope FindLocals(R);

	for (; S && !isNamespaceOrTranslationUnitScope(S); S = S->getParent()) {
	bool SearchNamespaceScope = true;
	// Check whether the IdResolver has anything in this scope.
	for (; I != IEnd && S->isDeclScope(*I); ++I) {
	if (NamedDecl ND = R.getAcceptableDecl(I)) {
	if (NameKind == LookupRedeclarationWithLinkage &&
	!(*I)->isTemplateParameter()) {
	// If it's a template parameter, we still find it, so we can diagnose
	// the invalid redeclaration.

	// Determine whether this (or a previous) declaration is
	// out-of-scope.
	if (!LeftStartingScope && !Initial->isDeclScope(*I))
	LeftStartingScope = true;

	// If we found something outside of our starting scope that
	// does not have linkage, skip it.
	if (LeftStartingScope && !((*I)->hasLinkage())) {
	R.setShadowed();
	continue;
	}
	} else {
	// We found something in this scope, we should not look at the
	// namespace scope
	SearchNamespaceScope = false;
	}
	R.addDecl(ND);
	}
	}
	if (!SearchNamespaceScope) {
	R.resolveKind();
	if (S->isClassScope())
	if (auto *Record = dyn_cast_if_present<CXXRecordDecl>(S->getEntity()))
	R.setNamingClass(Record);
	return true;
	}

	if (NameKind == LookupLocalFriendName && !S->isClassScope()) {
	// C++11 [class.friend]p11:
	// If a friend declaration appears in a local class and the name
	// specified is an unqualified name, a prior declaration is
	// looked up without considering scopes that are outside the
	// innermost enclosing non-class scope.
	return false;
	}

	if (DeclContext *Ctx = S->getLookupEntity()) {
	DeclContext *OuterCtx = findOuterContext(S);
	for (; Ctx && !Ctx->Equals(OuterCtx); Ctx = Ctx->getLookupParent()) {
	// We do not directly look into transparent contexts, since
	// those entities will be found in the nearest enclosing
	// non-transparent context.
	if (Ctx->isTransparentContext())
	continue;

	// We do not look directly into function or method contexts,
	// since all of the local variables and parameters of the
	// function/method are present within the Scope.
	if (Ctx->isFunctionOrMethod()) {
	// If we have an Objective-C instance method, look for ivars
	// in the corresponding interface.
	if (ObjCMethodDecl *Method = dyn_cast<ObjCMethodDecl>(Ctx)) {
	if (Method->isInstanceMethod() && Name.getAsIdentifierInfo())
	if (ObjCInterfaceDecl *Class = Method->getClassInterface()) {
	ObjCInterfaceDecl *ClassDeclared;
	if (ObjCIvarDecl *Ivar = Class->lookupInstanceVariable(
	Name.getAsIdentifierInfo(),
	ClassDeclared)) {
	if (NamedDecl *ND = R.getAcceptableDecl(Ivar)) {
	R.addDecl(ND);
	R.resolveKind();
	return true;
	}
	}
	}
	}

	continue;
	}

	// If this is a file context, we need to perform unqualified name
	// lookup considering using directives.
	if (Ctx->isFileContext()) {
	// If we haven't handled using directives yet, do so now.
	if (!VisitedUsingDirectives) {
	// Add using directives from this context up to the top level.
	for (DeclContext *UCtx = Ctx; UCtx; UCtx = UCtx->getParent()) {
	if (UCtx->isTransparentContext())
	continue;

	UDirs.visit(UCtx, UCtx);
	}

	// Find the innermost file scope, so we can add using directives
	// from local scopes.
	Scope *InnermostFileScope = S;
	while (InnermostFileScope &&
	!isNamespaceOrTranslationUnitScope(InnermostFileScope))
	InnermostFileScope = InnermostFileScope->getParent();
	UDirs.visitScopeChain(Initial, InnermostFileScope);

	UDirs.done();

	VisitedUsingDirectives = true;
	}

	if (CppNamespaceLookup(*this, R, Context, Ctx, UDirs)) {
	R.resolveKind();
	return true;
	}

	continue;
	}

	// Perform qualified name lookup into this context.
	// FIXME: In some cases, we know that every name that could be found by
	// this qualified name lookup will also be on the identifier chain. For
	// example, inside a class without any base classes, we never need to
	// perform qualified lookup because all of the members are on top of the
	// identifier chain.
	if (LookupQualifiedName(R, Ctx, /InUnqualifiedLookup=/true))
	return true;
	}
	}
	}

	// Stop if we ran out of scopes.
	// FIXME: This really, really shouldn't be happening.
	if (!S) return false;

	// If we are looking for members, no need to look into global/namespace scope.
	if (NameKind == LookupMemberName)
	return false;

	// Collect UsingDirectiveDecls in all scopes, and recursively all
	// nominated namespaces by those using-directives.
	//
	// FIXME: Cache this sorted list in Scope structure, and DeclContext, so we
	// don't build it for each lookup!
	if (!VisitedUsingDirectives) {
	UDirs.visitScopeChain(Initial, S);
	UDirs.done();
	}

	// If we're not performing redeclaration lookup, do not look for local
	// extern declarations outside of a function scope.
	if (!R.isForRedeclaration())
	FindLocals.restore();

	// Lookup namespace scope, and global scope.
	// Unqualified name lookup in C++ requires looking into scopes
	// that aren't strictly lexical, and therefore we walk through the
	// context as well as walking through the scopes.
	for (; S; S = S->getParent()) {
	// Check whether the IdResolver has anything in this scope.
	bool Found = false;
	for (; I != IEnd && S->isDeclScope(*I); ++I) {
	if (NamedDecl ND = R.getAcceptableDecl(I)) {
	// We found something. Look for anything else in our scope
	// with this same name and in an acceptable identifier
	// namespace, so that we can construct an overload set if we
	// need to.
	Found = true;
	R.addDecl(ND);
	}
	}

	if (Found && S->isTemplateParamScope()) {
	R.resolveKind();
	return true;
	}

	DeclContext *Ctx = S->getLookupEntity();
	if (Ctx) {
	DeclContext *OuterCtx = findOuterContext(S);
	for (; Ctx && !Ctx->Equals(OuterCtx); Ctx = Ctx->getLookupParent()) {
	// We do not directly look into transparent contexts, since
	// those entities will be found in the nearest enclosing
	// non-transparent context.
	if (Ctx->isTransparentContext())
	continue;

	// If we have a context, and it's not a context stashed in the
	// template parameter scope for an out-of-line definition, also
	// look into that context.
	if (!(Found && S->isTemplateParamScope())) {
	assert(Ctx->isFileContext() &&
	"We should have been looking only at file context here already.");

	// Look into context considering using-directives.
	if (CppNamespaceLookup(*this, R, Context, Ctx, UDirs))
	Found = true;
	}

	if (Found) {
	R.resolveKind();
	return true;
	}

	if (R.isForRedeclaration() && !Ctx->isTransparentContext())
	return false;
	}
	}

	if (R.isForRedeclaration() && Ctx && !Ctx->isTransparentContext())
	return false;
	}

	return !R.empty();
	}

	void Sema::makeMergedDefinitionVisible(NamedDecl *ND) {
	if (auto *M = getCurrentModule())
	Context.mergeDefinitionIntoModule(ND, M);
	else
	// We're not building a module; just make the definition visible.
	ND->setVisibleDespiteOwningModule();

	// If ND is a template declaration, make the template parameters
	// visible too. They're not (necessarily) within a mergeable DeclContext.
	if (auto *TD = dyn_cast<TemplateDecl>(ND))
	for (auto Param : TD->getTemplateParameters())
	makeMergedDefinitionVisible(Param);
	}

	/// Find the module in which the given declaration was defined.
	static Module getDefiningModule(Sema &S, Decl Entity) {
	if (FunctionDecl *FD = dyn_cast<FunctionDecl>(Entity)) {
	// If this function was instantiated from a template, the defining module is
	// the module containing the pattern.
	if (FunctionDecl *Pattern = FD->getTemplateInstantiationPattern())
	Entity = Pattern;
	} else if (CXXRecordDecl *RD = dyn_cast<CXXRecordDecl>(Entity)) {
	if (CXXRecordDecl *Pattern = RD->getTemplateInstantiationPattern())
	Entity = Pattern;
	} else if (EnumDecl *ED = dyn_cast<EnumDecl>(Entity)) {
	if (auto *Pattern = ED->getTemplateInstantiationPattern())
	Entity = Pattern;
	} else if (VarDecl *VD = dyn_cast<VarDecl>(Entity)) {
	if (VarDecl *Pattern = VD->getTemplateInstantiationPattern())
	Entity = Pattern;
	}

	// Walk up to the containing context. That might also have been instantiated
	// from a template.
	DeclContext *Context = Entity->getLexicalDeclContext();
	if (Context->isFileContext())
	return S.getOwningModule(Entity);
	return getDefiningModule(S, cast<Decl>(Context));
	}

	llvm::DenseSet<Module*> &Sema::getLookupModules() {
	unsigned N = CodeSynthesisContexts.size();
	for (unsigned I = CodeSynthesisContextLookupModules.size();
	I != N; ++I) {
	Module *M = CodeSynthesisContexts[I].Entity ?
	getDefiningModule(*this, CodeSynthesisContexts[I].Entity) :
	nullptr;
	if (M && !LookupModulesCache.insert(M).second)
	M = nullptr;
	CodeSynthesisContextLookupModules.push_back(M);
	}
	return LookupModulesCache;
	}

	bool Sema::isUsableModule(const Module *M) {
	assert(M && "We shouldn't check nullness for module here");
	// Return quickly if we cached the result.
	if (UsableModuleUnitsCache.count(M))
	return true;

	// If M is the global module fragment of the current translation unit. So it
	// should be usable.
	// [module.global.frag]p1:
	// The global module fragment can be used to provide declarations that are
	// attached to the global module and usable within the module unit.
	if (M == TheGlobalModuleFragment \|\| M == TheImplicitGlobalModuleFragment) {
	UsableModuleUnitsCache.insert(M);
	return true;
	}

	// Otherwise, the global module fragment from other translation unit is not
	// directly usable.
	if (M->isGlobalModule())
	return false;

	Module *Current = getCurrentModule();

	// If we're not parsing a module, we can't use all the declarations from
	// another module easily.
	if (!Current)
	return false;

	// If M is the module we're parsing or M and the current module unit lives in
	// the same module, M should be usable.
	//
	// Note: It should be fine to search the vector `ModuleScopes` linearly since
	// it should be generally small enough. There should be rare module fragments
	// in a named module unit.
	if (llvm::count_if(ModuleScopes,
	[&M](const ModuleScope &MS) { return MS.Module == M; }) \|\|
	getASTContext().isInSameModule(M, Current)) {
	UsableModuleUnitsCache.insert(M);
	return true;
	}

	return false;
	}

	bool Sema::hasVisibleMergedDefinition(const NamedDecl *Def) {
	for (const Module *Merged : Context.getModulesWithMergedDefinition(Def))
	if (isModuleVisible(Merged))
	return true;
	return false;
	}

	bool Sema::hasMergedDefinitionInCurrentModule(const NamedDecl *Def) {
	for (const Module *Merged : Context.getModulesWithMergedDefinition(Def))
	if (isUsableModule(Merged))
	return true;
	return false;
	}

	template <typename ParmDecl>
	static bool
	hasAcceptableDefaultArgument(Sema &S, const ParmDecl *D,
	llvm::SmallVectorImpl<Module > Modules,
	Sema::AcceptableKind Kind) {
	if (!D->hasDefaultArgument())
	return false;

	llvm::SmallPtrSet<const ParmDecl *, 4> Visited;
	while (D && Visited.insert(D).second) {
	auto &DefaultArg = D->getDefaultArgStorage();
	if (!DefaultArg.isInherited() && S.isAcceptable(D, Kind))
	return true;

	if (!DefaultArg.isInherited() && Modules) {
	auto NonConstD = const_cast<ParmDecl>(D);
	Modules->push_back(S.getOwningModule(NonConstD));
	}

	// If there was a previous default argument, maybe its parameter is
	// acceptable.
	D = DefaultArg.getInheritedFrom();
	}
	return false;
	}

	bool Sema::hasAcceptableDefaultArgument(
	const NamedDecl D, llvm::SmallVectorImpl<Module > *Modules,
	Sema::AcceptableKind Kind) {
	if (auto *P = dyn_cast<TemplateTypeParmDecl>(D))
	return ::hasAcceptableDefaultArgument(*this, P, Modules, Kind);

	if (auto *P = dyn_cast<NonTypeTemplateParmDecl>(D))
	return ::hasAcceptableDefaultArgument(*this, P, Modules, Kind);

	return ::hasAcceptableDefaultArgument(
	*this, cast<TemplateTemplateParmDecl>(D), Modules, Kind);
	}

	bool Sema::hasVisibleDefaultArgument(const NamedDecl *D,
	llvm::SmallVectorImpl<Module > Modules) {
	return hasAcceptableDefaultArgument(D, Modules,
	Sema::AcceptableKind::Visible);
	}

	bool Sema::hasReachableDefaultArgument(
	const NamedDecl D, llvm::SmallVectorImpl<Module > *Modules) {
	return hasAcceptableDefaultArgument(D, Modules,
	Sema::AcceptableKind::Reachable);
	}

	template <typename Filter>
	static bool
	hasAcceptableDeclarationImpl(Sema &S, const NamedDecl *D,
	llvm::SmallVectorImpl<Module > Modules, Filter F,
	Sema::AcceptableKind Kind) {
	bool HasFilteredRedecls = false;

	for (auto *Redecl : D->redecls()) {
	auto *R = cast<NamedDecl>(Redecl);
	if (!F(R))
	continue;

	if (S.isAcceptable(R, Kind))
	return true;

	HasFilteredRedecls = true;

	if (Modules)
	Modules->push_back(R->getOwningModule());
	}

	// Only return false if there is at least one redecl that is not filtered out.
	if (HasFilteredRedecls)
	return false;

	return true;
	}

	static bool
	hasAcceptableExplicitSpecialization(Sema &S, const NamedDecl *D,
	llvm::SmallVectorImpl<Module > Modules,
	Sema::AcceptableKind Kind) {
	return hasAcceptableDeclarationImpl(
	S, D, Modules,
	[](const NamedDecl *D) {
	if (auto *RD = dyn_cast<CXXRecordDecl>(D))
	return RD->getTemplateSpecializationKind() ==
	TSK_ExplicitSpecialization;
	if (auto *FD = dyn_cast<FunctionDecl>(D))
	return FD->getTemplateSpecializationKind() ==
	TSK_ExplicitSpecialization;
	if (auto *VD = dyn_cast<VarDecl>(D))
	return VD->getTemplateSpecializationKind() ==
	TSK_ExplicitSpecialization;
	llvm_unreachable("unknown explicit specialization kind");
	},
	Kind);
	}

	bool Sema::hasVisibleExplicitSpecialization(
	const NamedDecl D, llvm::SmallVectorImpl<Module > *Modules) {
	return ::hasAcceptableExplicitSpecialization(*this, D, Modules,
	Sema::AcceptableKind::Visible);
	}

	bool Sema::hasReachableExplicitSpecialization(
	const NamedDecl D, llvm::SmallVectorImpl<Module > *Modules) {
	return ::hasAcceptableExplicitSpecialization(*this, D, Modules,
	Sema::AcceptableKind::Reachable);
	}

	static bool
	hasAcceptableMemberSpecialization(Sema &S, const NamedDecl *D,
	llvm::SmallVectorImpl<Module > Modules,
	Sema::AcceptableKind Kind) {
	assert(isa<CXXRecordDecl>(D->getDeclContext()) &&
	"not a member specialization");
	return hasAcceptableDeclarationImpl(
	S, D, Modules,
	[](const NamedDecl *D) {
	// If the specialization is declared at namespace scope, then it's a
	// member specialization declaration. If it's lexically inside the class
	// definition then it was instantiated.
	//
	// FIXME: This is a hack. There should be a better way to determine
	// this.
	// FIXME: What about MS-style explicit specializations declared within a
	// class definition?
	return D->getLexicalDeclContext()->isFileContext();
	},
	Kind);
	}

	bool Sema::hasVisibleMemberSpecialization(
	const NamedDecl D, llvm::SmallVectorImpl<Module > *Modules) {
	return hasAcceptableMemberSpecialization(*this, D, Modules,
	Sema::AcceptableKind::Visible);
	}

	bool Sema::hasReachableMemberSpecialization(
	const NamedDecl D, llvm::SmallVectorImpl<Module > *Modules) {
	return hasAcceptableMemberSpecialization(*this, D, Modules,
	Sema::AcceptableKind::Reachable);
	}

	/// Determine whether a declaration is acceptable to name lookup.
	///
	/// This routine determines whether the declaration D is acceptable in the
	/// current lookup context, taking into account the current template
	/// instantiation stack. During template instantiation, a declaration is
	/// acceptable if it is acceptable from a module containing any entity on the
	/// template instantiation path (by instantiating a template, you allow it to
	/// see the declarations that your module can see, including those later on in
	/// your module).
	bool LookupResult::isAcceptableSlow(Sema &SemaRef, NamedDecl *D,
	Sema::AcceptableKind Kind) {
	assert(!D->isUnconditionallyVisible() &&
	"should not call this: not in slow case");

	Module *DeclModule = SemaRef.getOwningModule(D);
	assert(DeclModule && "hidden decl has no owning module");

	// If the owning module is visible, the decl is acceptable.
	if (SemaRef.isModuleVisible(DeclModule,
	D->isInvisibleOutsideTheOwningModule()))
	return true;

	// Determine whether a decl context is a file context for the purpose of
	// visibility/reachability. This looks through some (export and linkage spec)
	// transparent contexts, but not others (enums).
	auto IsEffectivelyFileContext = [](const DeclContext *DC) {
	return DC->isFileContext() \|\| isa<LinkageSpecDecl>(DC) \|\|
	isa<ExportDecl>(DC);
	};

	// If this declaration is not at namespace scope
	// then it is acceptable if its lexical parent has a acceptable definition.
	DeclContext *DC = D->getLexicalDeclContext();
	if (DC && !IsEffectivelyFileContext(DC)) {
	// For a parameter, check whether our current template declaration's
	// lexical context is acceptable, not whether there's some other acceptable
	// definition of it, because parameters aren't "within" the definition.
	//
	// In C++ we need to check for a acceptable definition due to ODR merging,
	// and in C we must not because each declaration of a function gets its own
	// set of declarations for tags in prototype scope.
	bool AcceptableWithinParent;
	if (D->isTemplateParameter()) {
	bool SearchDefinitions = true;
	if (const auto *DCD = dyn_cast<Decl>(DC)) {
	if (const auto *TD = DCD->getDescribedTemplate()) {
	TemplateParameterList *TPL = TD->getTemplateParameters();
	auto Index = getDepthAndIndex(D).second;
	SearchDefinitions = Index >= TPL->size() \|\| TPL->getParam(Index) != D;
	}
	}
	if (SearchDefinitions)
	AcceptableWithinParent =
	SemaRef.hasAcceptableDefinition(cast<NamedDecl>(DC), Kind);
	else
	AcceptableWithinParent =
	isAcceptable(SemaRef, cast<NamedDecl>(DC), Kind);
	} else if (isa<ParmVarDecl>(D) \|\|
	(isa<FunctionDecl>(DC) && !SemaRef.getLangOpts().CPlusPlus))
	AcceptableWithinParent = isAcceptable(SemaRef, cast<NamedDecl>(DC), Kind);
	else if (D->isModulePrivate()) {
	// A module-private declaration is only acceptable if an enclosing lexical
	// parent was merged with another definition in the current module.
	AcceptableWithinParent = false;
	do {
	if (SemaRef.hasMergedDefinitionInCurrentModule(cast<NamedDecl>(DC))) {
	AcceptableWithinParent = true;
	break;
	}
	DC = DC->getLexicalParent();
	} while (!IsEffectivelyFileContext(DC));
	} else {
	AcceptableWithinParent =
	SemaRef.hasAcceptableDefinition(cast<NamedDecl>(DC), Kind);
	}

	if (AcceptableWithinParent && SemaRef.CodeSynthesisContexts.empty() &&
	Kind == Sema::AcceptableKind::Visible &&
	// FIXME: Do something better in this case.
	!SemaRef.getLangOpts().ModulesLocalVisibility) {
	// Cache the fact that this declaration is implicitly visible because
	// its parent has a visible definition.
	D->setVisibleDespiteOwningModule();
	}
	return AcceptableWithinParent;
	}

	if (Kind == Sema::AcceptableKind::Visible)
	return false;

	assert(Kind == Sema::AcceptableKind::Reachable &&
	"Additional Sema::AcceptableKind?");
	return isReachableSlow(SemaRef, D);
	}

	bool Sema::isModuleVisible(const Module *M, bool ModulePrivate) {
	// The module might be ordinarily visible. For a module-private query, that
	// means it is part of the current module.
	if (ModulePrivate && isUsableModule(M))
	return true;

	// For a query which is not module-private, that means it is in our visible
	// module set.
	if (!ModulePrivate && VisibleModules.isVisible(M))
	return true;

	// Otherwise, it might be visible by virtue of the query being within a
	// template instantiation or similar that is permitted to look inside M.

	// Find the extra places where we need to look.
	const auto &LookupModules = getLookupModules();
	if (LookupModules.empty())
	return false;

	// If our lookup set contains the module, it's visible.
	if (LookupModules.count(M))
	return true;

	// The global module fragments are visible to its corresponding module unit.
	// So the global module fragment should be visible if the its corresponding
	// module unit is visible.
	if (M->isGlobalModule() && LookupModules.count(M->getTopLevelModule()))
	return true;

	// For a module-private query, that's everywhere we get to look.
	if (ModulePrivate)
	return false;

	// Check whether M is transitively exported to an import of the lookup set.
	return llvm::any_of(LookupModules, [&](const Module *LookupM) {
	return LookupM->isModuleVisible(M);
	});
	}

	// FIXME: Return false directly if we don't have an interface dependency on the
	// translation unit containing D.
	bool LookupResult::isReachableSlow(Sema &SemaRef, NamedDecl *D) {
	assert(!isVisible(SemaRef, D) && "Shouldn't call the slow case.\n");

	Module *DeclModule = SemaRef.getOwningModule(D);
	assert(DeclModule && "hidden decl has no owning module");

	// Entities in header like modules are reachable only if they're visible.
	if (DeclModule->isHeaderLikeModule())
	return false;

	if (!D->isInAnotherModuleUnit())
	return true;

	// [module.reach]/p3:
	// A declaration D is reachable from a point P if:
	// ...
	// - D is not discarded ([module.global.frag]), appears in a translation unit
	// that is reachable from P, and does not appear within a private module
	// fragment.
	//
	// A declaration that's discarded in the GMF should be module-private.
	if (D->isModulePrivate())
	return false;

	// [module.reach]/p1
	// A translation unit U is necessarily reachable from a point P if U is a
	// module interface unit on which the translation unit containing P has an
	// interface dependency, or the translation unit containing P imports U, in
	// either case prior to P ([module.import]).
	//
	// [module.import]/p10
	// A translation unit has an interface dependency on a translation unit U if
	// it contains a declaration (possibly a module-declaration) that imports U
	// or if it has an interface dependency on a translation unit that has an
	// interface dependency on U.
	//
	// So we could conclude the module unit U is necessarily reachable if:
	// (1) The module unit U is module interface unit.
	// (2) The current unit has an interface dependency on the module unit U.
	//
	// Here we only check for the first condition. Since we couldn't see
	// DeclModule if it isn't (transitively) imported.
	if (DeclModule->getTopLevelModule()->isModuleInterfaceUnit())
	return true;

	// [module.reach]/p2
	// Additional translation units on
	// which the point within the program has an interface dependency may be
	// considered reachable, but it is unspecified which are and under what
	// circumstances.
	//
	// The decision here is to treat all additional tranditional units as
	// unreachable.
	return false;
	}

	bool Sema::isAcceptableSlow(const NamedDecl *D, Sema::AcceptableKind Kind) {
	return LookupResult::isAcceptable(this, const_cast<NamedDecl >(D), Kind);
	}

	bool Sema::shouldLinkPossiblyHiddenDecl(LookupResult &R, const NamedDecl *New) {
	// FIXME: If there are both visible and hidden declarations, we need to take
	// into account whether redeclaration is possible. Example:
	//
	// Non-imported module:
	// int f(T); // #1
	// Some TU:
	// static int f(U); // #2, not a redeclaration of #1
	// int f(T); // #3, finds both, should link with #1 if T != U, but
	// // with #2 if T == U; neither should be ambiguous.
	for (auto *D : R) {
	if (isVisible(D))
	return true;
	assert(D->isExternallyDeclarable() &&
	"should not have hidden, non-externally-declarable result here");
	}

	// This function is called once "New" is essentially complete, but before a
	// previous declaration is attached. We can't query the linkage of "New" in
	// general, because attaching the previous declaration can change the
	// linkage of New to match the previous declaration.
	//
	// However, because we've just determined that there is no visible prior
	// declaration, we can compute the linkage here. There are two possibilities:
	//
	// * This is not a redeclaration; it's safe to compute the linkage now.
	//
	// * This is a redeclaration of a prior declaration that is externally
	// redeclarable. In that case, the linkage of the declaration is not
	// changed by attaching the prior declaration, because both are externally
	// declarable (and thus ExternalLinkage or VisibleNoLinkage).
	//
	// FIXME: This is subtle and fragile.
	return New->isExternallyDeclarable();
	}

	/// Retrieve the visible declaration corresponding to D, if any.
	///
	/// This routine determines whether the declaration D is visible in the current
	/// module, with the current imports. If not, it checks whether any
	/// redeclaration of D is visible, and if so, returns that declaration.
	///
	/// \returns D, or a visible previous declaration of D, whichever is more recent
	/// and visible. If no declaration of D is visible, returns null.
	static NamedDecl findAcceptableDecl(Sema &SemaRef, NamedDecl D,
	unsigned IDNS) {
	assert(!LookupResult::isAvailableForLookup(SemaRef, D) && "not in slow case");

	for (auto *RD : D->redecls()) {
	// Don't bother with extra checks if we already know this one isn't visible.
	if (RD == D)
	continue;

	auto ND = cast<NamedDecl>(RD);
	// FIXME: This is wrong in the case where the previous declaration is not
	// visible in the same scope as D. This needs to be done much more
	// carefully.
	if (ND->isInIdentifierNamespace(IDNS) &&
	LookupResult::isAvailableForLookup(SemaRef, ND))
	return ND;
	}

	return nullptr;
	}

	bool Sema::hasVisibleDeclarationSlow(const NamedDecl *D,
	llvm::SmallVectorImpl<Module > Modules) {
	assert(!isVisible(D) && "not in slow case");
	return hasAcceptableDeclarationImpl(
	this, D, Modules, [](const NamedDecl ) { return true; },
	Sema::AcceptableKind::Visible);
	}

	bool Sema::hasReachableDeclarationSlow(
	const NamedDecl D, llvm::SmallVectorImpl<Module > *Modules) {
	assert(!isReachable(D) && "not in slow case");
	return hasAcceptableDeclarationImpl(
	this, D, Modules, [](const NamedDecl ) { return true; },
	Sema::AcceptableKind::Reachable);
	}

	NamedDecl LookupResult::getAcceptableDeclSlow(NamedDecl D) const {
	if (auto *ND = dyn_cast<NamespaceDecl>(D)) {
	// Namespaces are a bit of a special case: we expect there to be a lot of
	// redeclarations of some namespaces, all declarations of a namespace are
	// essentially interchangeable, all declarations are found by name lookup
	// if any is, and namespaces are never looked up during template
	// instantiation. So we benefit from caching the check in this case, and
	// it is correct to do so.
	auto *Key = ND->getCanonicalDecl();
	if (auto *Acceptable = getSema().VisibleNamespaceCache.lookup(Key))
	return Acceptable;
	auto *Acceptable = isVisible(getSema(), Key)
	? Key
	: findAcceptableDecl(getSema(), Key, IDNS);
	if (Acceptable)
	getSema().VisibleNamespaceCache.insert(std::make_pair(Key, Acceptable));
	return Acceptable;
	}

	return findAcceptableDecl(getSema(), D, IDNS);
	}

	bool LookupResult::isVisible(Sema &SemaRef, NamedDecl *D) {
	// If this declaration is already visible, return it directly.
	if (D->isUnconditionallyVisible())
	return true;

	// During template instantiation, we can refer to hidden declarations, if
	// they were visible in any module along the path of instantiation.
	return isAcceptableSlow(SemaRef, D, Sema::AcceptableKind::Visible);
	}

	bool LookupResult::isReachable(Sema &SemaRef, NamedDecl *D) {
	if (D->isUnconditionallyVisible())
	return true;

	return isAcceptableSlow(SemaRef, D, Sema::AcceptableKind::Reachable);
	}

	bool LookupResult::isAvailableForLookup(Sema &SemaRef, NamedDecl *ND) {
	// We should check the visibility at the callsite already.
	if (isVisible(SemaRef, ND))
	return true;

	// Deduction guide lives in namespace scope generally, but it is just a
	// hint to the compilers. What we actually lookup for is the generated member
	// of the corresponding template. So it is sufficient to check the
	// reachability of the template decl.
	if (auto *DeductionGuide = ND->getDeclName().getCXXDeductionGuideTemplate())
	return SemaRef.hasReachableDefinition(DeductionGuide);

	// FIXME: The lookup for allocation function is a standalone process.
	// (We can find the logics in Sema::FindAllocationFunctions)
	//
	// Such structure makes it a problem when we instantiate a template
	// declaration using placement allocation function if the placement
	// allocation function is invisible.
	// (See https://github.com/llvm/llvm-project/issues/59601)
	//
	// Here we workaround it by making the placement allocation functions
	// always acceptable. The downside is that we can't diagnose the direct
	// use of the invisible placement allocation functions. (Although such uses
	// should be rare).
	if (auto *FD = dyn_cast<FunctionDecl>(ND);
	FD && FD->isReservedGlobalPlacementOperator())
	return true;

	auto *DC = ND->getDeclContext();
	// If ND is not visible and it is at namespace scope, it shouldn't be found
	// by name lookup.
	if (DC->isFileContext())
	return false;

	// [module.interface]p7
	// Class and enumeration member names can be found by name lookup in any
	// context in which a definition of the type is reachable.
	//
	// FIXME: The current implementation didn't consider about scope. For example,
	// ```
	// // m.cppm
	// export module m;
	// enum E1 { e1 };
	// // Use.cpp
	// import m;
	// void test() {
	// auto a = E1::e1; // Error as expected.
	// auto b = e1; // Should be error. namespace-scope name e1 is not visible
	// }
	// ```
	// For the above example, the current implementation would emit error for `a`
	// correctly. However, the implementation wouldn't diagnose about `b` now.
	// Since we only check the reachability for the parent only.
	// See clang/test/CXX/module/module.interface/p7.cpp for example.
	if (auto *TD = dyn_cast<TagDecl>(DC))
	return SemaRef.hasReachableDefinition(TD);

	return false;
	}

	bool Sema::LookupName(LookupResult &R, Scope *S, bool AllowBuiltinCreation,
	bool ForceNoCPlusPlus) {
	DeclarationName Name = R.getLookupName();
	if (!Name) return false;

	LookupNameKind NameKind = R.getLookupKind();

	if (!getLangOpts().CPlusPlus \|\| ForceNoCPlusPlus) {
	// Unqualified name lookup in C/Objective-C is purely lexical, so
	// search in the declarations attached to the name.
	if (NameKind == Sema::LookupRedeclarationWithLinkage) {
	// Find the nearest non-transparent declaration scope.
	while (!(S->getFlags() & Scope::DeclScope) \|\|
	(S->getEntity() && S->getEntity()->isTransparentContext()))
	S = S->getParent();
	}

	// When performing a scope lookup, we want to find local extern decls.
	FindLocalExternScope FindLocals(R);

	// Scan up the scope chain looking for a decl that matches this
	// identifier that is in the appropriate namespace. This search
	// should not take long, as shadowing of names is uncommon, and
	// deep shadowing is extremely uncommon.
	bool LeftStartingScope = false;

	for (IdentifierResolver::iterator I = IdResolver.begin(Name),
	IEnd = IdResolver.end();
	I != IEnd; ++I)
	if (NamedDecl D = R.getAcceptableDecl(I)) {
	if (NameKind == LookupRedeclarationWithLinkage) {
	// Determine whether this (or a previous) declaration is
	// out-of-scope.
	if (!LeftStartingScope && !S->isDeclScope(*I))
	LeftStartingScope = true;

	// If we found something outside of our starting scope that
	// does not have linkage, skip it.
	if (LeftStartingScope && !((*I)->hasLinkage())) {
	R.setShadowed();
	continue;
	}
	}
	else if (NameKind == LookupObjCImplicitSelfParam &&
	!isa<ImplicitParamDecl>(*I))
	continue;

	R.addDecl(D);

	// Check whether there are any other declarations with the same name
	// and in the same scope.
	if (I != IEnd) {
	// Find the scope in which this declaration was declared (if it
	// actually exists in a Scope).
	while (S && !S->isDeclScope(D))
	S = S->getParent();

	// If the scope containing the declaration is the translation unit,
	// then we'll need to perform our checks based on the matching
	// DeclContexts rather than matching scopes.
	if (S && isNamespaceOrTranslationUnitScope(S))
	S = nullptr;

	// Compute the DeclContext, if we need it.
	DeclContext *DC = nullptr;
	if (!S)
	DC = (*I)->getDeclContext()->getRedeclContext();

	IdentifierResolver::iterator LastI = I;
	for (++LastI; LastI != IEnd; ++LastI) {
	if (S) {
	// Match based on scope.
	if (!S->isDeclScope(*LastI))
	break;
	} else {
	// Match based on DeclContext.
	DeclContext *LastDC
	= (*LastI)->getDeclContext()->getRedeclContext();
	if (!LastDC->Equals(DC))
	break;
	}

	// If the declaration is in the right namespace and visible, add it.
	if (NamedDecl LastD = R.getAcceptableDecl(LastI))
	R.addDecl(LastD);
	}

	R.resolveKind();
	}

	return true;
	}
	} else {
	// Perform C++ unqualified name lookup.
	if (CppLookupName(R, S))
	return true;
	}

	// If we didn't find a use of this identifier, and if the identifier
	// corresponds to a compiler builtin, create the decl object for the builtin
	// now, injecting it into translation unit scope, and return it.
	if (AllowBuiltinCreation && LookupBuiltin(R))
	return true;

	// If we didn't find a use of this identifier, the ExternalSource
	// may be able to handle the situation.
	// Note: some lookup failures are expected!
	// See e.g. R.isForRedeclaration().
	return (ExternalSource && ExternalSource->LookupUnqualified(R, S));
	}

	/// Perform qualified name lookup in the namespaces nominated by
	/// using directives by the given context.
	///
	/// C++98 [namespace.qual]p2:
	/// Given X::m (where X is a user-declared namespace), or given \::m
	/// (where X is the global namespace), let S be the set of all
	/// declarations of m in X and in the transitive closure of all
	/// namespaces nominated by using-directives in X and its used
	/// namespaces, except that using-directives are ignored in any
	/// namespace, including X, directly containing one or more
	/// declarations of m. No namespace is searched more than once in
	/// the lookup of a name. If S is the empty set, the program is
	/// ill-formed. Otherwise, if S has exactly one member, or if the
	/// context of the reference is a using-declaration
	/// (namespace.udecl), S is the required set of declarations of
	/// m. Otherwise if the use of m is not one that allows a unique
	/// declaration to be chosen from S, the program is ill-formed.
	///
	/// C++98 [namespace.qual]p5:
	/// During the lookup of a qualified namespace member name, if the
	/// lookup finds more than one declaration of the member, and if one
	/// declaration introduces a class name or enumeration name and the
	/// other declarations either introduce the same object, the same
	/// enumerator or a set of functions, the non-type name hides the
	/// class or enumeration name if and only if the declarations are
	/// from the same namespace; otherwise (the declarations are from
	/// different namespaces), the program is ill-formed.
	static bool LookupQualifiedNameInUsingDirectives(Sema &S, LookupResult &R,
	DeclContext *StartDC) {
	assert(StartDC->isFileContext() && "start context is not a file context");

	// We have not yet looked into these namespaces, much less added
	// their "using-children" to the queue.
	SmallVector<NamespaceDecl*, 8> Queue;

	// We have at least added all these contexts to the queue.
	llvm::SmallPtrSet<DeclContext*, 8> Visited;
	Visited.insert(StartDC);

	// We have already looked into the initial namespace; seed the queue
	// with its using-children.
	for (auto *I : StartDC->using_directives()) {
	NamespaceDecl *ND = I->getNominatedNamespace()->getFirstDecl();
	if (S.isVisible(I) && Visited.insert(ND).second)
	Queue.push_back(ND);
	}

	// The easiest way to implement the restriction in [namespace.qual]p5
	// is to check whether any of the individual results found a tag
	// and, if so, to declare an ambiguity if the final result is not
	// a tag.
	bool FoundTag = false;
	bool FoundNonTag = false;

	LookupResult LocalR(LookupResult::Temporary, R);

	bool Found = false;
	while (!Queue.empty()) {
	NamespaceDecl *ND = Queue.pop_back_val();

	// We go through some convolutions here to avoid copying results
	// between LookupResults.
	bool UseLocal = !R.empty();
	LookupResult &DirectR = UseLocal ? LocalR : R;
	bool FoundDirect = LookupDirect(S, DirectR, ND);

	if (FoundDirect) {
	// First do any local hiding.
	DirectR.resolveKind();

	// If the local result is a tag, remember that.
	if (DirectR.isSingleTagDecl())
	FoundTag = true;
	else
	FoundNonTag = true;

	// Append the local results to the total results if necessary.
	if (UseLocal) {
	R.addAllDecls(LocalR);
	LocalR.clear();
	}
	}

	// If we find names in this namespace, ignore its using directives.
	if (FoundDirect) {
	Found = true;
	continue;
	}

	for (auto *I : ND->using_directives()) {
	NamespaceDecl *Nom = I->getNominatedNamespace();
	if (S.isVisible(I) && Visited.insert(Nom).second)
	Queue.push_back(Nom);
	}
	}

	if (Found) {
	if (FoundTag && FoundNonTag)
	R.setAmbiguousQualifiedTagHiding();
	else
	R.resolveKind();
	}

	return Found;
	}

	bool Sema::LookupQualifiedName(LookupResult &R, DeclContext *LookupCtx,
	bool InUnqualifiedLookup) {
	assert(LookupCtx && "Sema::LookupQualifiedName requires a lookup context");

	if (!R.getLookupName())
	return false;

	// Make sure that the declaration context is complete.
	assert((!isa<TagDecl>(LookupCtx) \|\|
	LookupCtx->isDependentContext() \|\|
	cast<TagDecl>(LookupCtx)->isCompleteDefinition() \|\|
	cast<TagDecl>(LookupCtx)->isBeingDefined()) &&
	"Declaration context must already be complete!");

	struct QualifiedLookupInScope {
	bool oldVal;
	DeclContext *Context;
	// Set flag in DeclContext informing debugger that we're looking for qualified name
	QualifiedLookupInScope(DeclContext *ctx)
	: oldVal(ctx->shouldUseQualifiedLookup()), Context(ctx) {
	ctx->setUseQualifiedLookup();
	}
	~QualifiedLookupInScope() {
	Context->setUseQualifiedLookup(oldVal);
	}
	} QL(LookupCtx);

	CXXRecordDecl *LookupRec = dyn_cast<CXXRecordDecl>(LookupCtx);
	// FIXME: Per [temp.dep.general]p2, an unqualified name is also dependent
	// if it's a dependent conversion-function-id or operator= where the current
	// class is a templated entity. This should be handled in LookupName.
	if (!InUnqualifiedLookup && !R.isForRedeclaration()) {
	// C++23 [temp.dep.type]p5:
	// A qualified name is dependent if
	// - it is a conversion-function-id whose conversion-type-id
	// is dependent, or
	// - [...]
	// - its lookup context is the current instantiation and it
	// is operator=, or
	// - [...]
	if (DeclarationName Name = R.getLookupName();
	Name.getNameKind() == DeclarationName::CXXConversionFunctionName &&
	Name.getCXXNameType()->isDependentType()) {
	R.setNotFoundInCurrentInstantiation();
	return false;
	}
	}

	if (LookupDirect(*this, R, LookupCtx)) {
	R.resolveKind();
	if (LookupRec)
	R.setNamingClass(LookupRec);
	return true;
	}

	// Don't descend into implied contexts for redeclarations.
	// C++98 [namespace.qual]p6:
	// In a declaration for a namespace member in which the
	// declarator-id is a qualified-id, given that the qualified-id
	// for the namespace member has the form
	// nested-name-specifier unqualified-id
	// the unqualified-id shall name a member of the namespace
	// designated by the nested-name-specifier.
	// See also [class.mfct]p5 and [class.static.data]p2.
	if (R.isForRedeclaration())
	return false;

	// If this is a namespace, look it up in the implied namespaces.
	if (LookupCtx->isFileContext())
	return LookupQualifiedNameInUsingDirectives(*this, R, LookupCtx);

	// If this isn't a C++ class, we aren't allowed to look into base
	// classes, we're done.
	if (!LookupRec \|\| !LookupRec->getDefinition())
	return false;

	// We're done for lookups that can never succeed for C++ classes.
	if (R.getLookupKind() == LookupOperatorName \|\|
	R.getLookupKind() == LookupNamespaceName \|\|
	R.getLookupKind() == LookupObjCProtocolName \|\|
	R.getLookupKind() == LookupLabel)
	return false;

	// If we're performing qualified name lookup into a dependent class,
	// then we are actually looking into a current instantiation. If we have any
	// dependent base classes, then we either have to delay lookup until
	// template instantiation time (at which point all bases will be available)
	// or we have to fail.
	if (!InUnqualifiedLookup && LookupRec->isDependentContext() &&
	LookupRec->hasAnyDependentBases()) {
	R.setNotFoundInCurrentInstantiation();
	return false;
	}

	// Perform lookup into our base classes.

	DeclarationName Name = R.getLookupName();
	unsigned IDNS = R.getIdentifierNamespace();

	// Look for this member in our base classes.
	auto BaseCallback = [Name, IDNS](const CXXBaseSpecifier *Specifier,
	CXXBasePath &Path) -> bool {
	CXXRecordDecl *BaseRecord = Specifier->getType()->getAsCXXRecordDecl();
	// Drop leading non-matching lookup results from the declaration list so
	// we don't need to consider them again below.
	for (Path.Decls = BaseRecord->lookup(Name).begin();
	Path.Decls != Path.Decls.end(); ++Path.Decls) {
	if ((*Path.Decls)->isInIdentifierNamespace(IDNS))
	return true;
	}
	return false;
	};

	CXXBasePaths Paths;
	Paths.setOrigin(LookupRec);
	if (!LookupRec->lookupInBases(BaseCallback, Paths))
	return false;

	R.setNamingClass(LookupRec);

	// C++ [class.member.lookup]p2:
	// [...] If the resulting set of declarations are not all from
	// sub-objects of the same type, or the set has a nonstatic member
	// and includes members from distinct sub-objects, there is an
	// ambiguity and the program is ill-formed. Otherwise that set is
	// the result of the lookup.
	QualType SubobjectType;
	int SubobjectNumber = 0;
	AccessSpecifier SubobjectAccess = AS_none;

	// Check whether the given lookup result contains only static members.
	auto HasOnlyStaticMembers = [&](DeclContext::lookup_iterator Result) {
	for (DeclContext::lookup_iterator I = Result, E = I.end(); I != E; ++I)
	if ((I)->isInIdentifierNamespace(IDNS) && (I)->isCXXInstanceMember())
	return false;
	return true;
	};

	bool TemplateNameLookup = R.isTemplateNameLookup();

	// Determine whether two sets of members contain the same members, as
	// required by C++ [class.member.lookup]p6.
	auto HasSameDeclarations = [&](DeclContext::lookup_iterator A,
	DeclContext::lookup_iterator B) {
	using Iterator = DeclContextLookupResult::iterator;
	using Result = const void *;

	auto Next = [&](Iterator &It, Iterator End) -> Result {
	while (It != End) {
	NamedDecl ND = It++;
	if (!ND->isInIdentifierNamespace(IDNS))
	continue;

	// C++ [temp.local]p3:
	// A lookup that finds an injected-class-name (10.2) can result in
	// an ambiguity in certain cases (for example, if it is found in
	// more than one base class). If all of the injected-class-names
	// that are found refer to specializations of the same class
	// template, and if the name is used as a template-name, the
	// reference refers to the class template itself and not a
	// specialization thereof, and is not ambiguous.
	if (TemplateNameLookup)
	if (auto *TD = getAsTemplateNameDecl(ND))
	ND = TD;

	// C++ [class.member.lookup]p3:
	// type declarations (including injected-class-names) are replaced by
	// the types they designate
	if (const TypeDecl *TD = dyn_cast<TypeDecl>(ND->getUnderlyingDecl())) {
	QualType T = Context.getTypeDeclType(TD);
	return T.getCanonicalType().getAsOpaquePtr();
	}

	return ND->getUnderlyingDecl()->getCanonicalDecl();
	}
	return nullptr;
	};

	// We'll often find the declarations are in the same order. Handle this
	// case (and the special case of only one declaration) efficiently.
	Iterator AIt = A, BIt = B, AEnd, BEnd;
	while (true) {
	Result AResult = Next(AIt, AEnd);
	Result BResult = Next(BIt, BEnd);
	if (!AResult && !BResult)
	return true;
	if (!AResult \|\| !BResult)
	return false;
	if (AResult != BResult) {
	// Found a mismatch; carefully check both lists, accounting for the
	// possibility of declarations appearing more than once.
	llvm::SmallDenseMap<Result, bool, 32> AResults;
	for (; AResult; AResult = Next(AIt, AEnd))
	AResults.insert({AResult, /FoundInB/false});
	unsigned Found = 0;
	for (; BResult; BResult = Next(BIt, BEnd)) {
	auto It = AResults.find(BResult);
	if (It == AResults.end())
	return false;
	if (!It->second) {
	It->second = true;
	++Found;
	}
	}
	return AResults.size() == Found;
	}
	}
	};

	for (CXXBasePaths::paths_iterator Path = Paths.begin(), PathEnd = Paths.end();
	Path != PathEnd; ++Path) {
	const CXXBasePathElement &PathElement = Path->back();

	// Pick the best (i.e. most permissive i.e. numerically lowest) access
	// across all paths.
	SubobjectAccess = std::min(SubobjectAccess, Path->Access);

	// Determine whether we're looking at a distinct sub-object or not.
	if (SubobjectType.isNull()) {
	// This is the first subobject we've looked at. Record its type.
	SubobjectType = Context.getCanonicalType(PathElement.Base->getType());
	SubobjectNumber = PathElement.SubobjectNumber;
	continue;
	}

	if (SubobjectType !=
	Context.getCanonicalType(PathElement.Base->getType())) {
	// We found members of the given name in two subobjects of
	// different types. If the declaration sets aren't the same, this
	// lookup is ambiguous.
	//
	// FIXME: The language rule says that this applies irrespective of
	// whether the sets contain only static members.
	if (HasOnlyStaticMembers(Path->Decls) &&
	HasSameDeclarations(Paths.begin()->Decls, Path->Decls))
	continue;

	R.setAmbiguousBaseSubobjectTypes(Paths);
	return true;
	}

	// FIXME: This language rule no longer exists. Checking for ambiguous base
	// subobjects should be done as part of formation of a class member access
	// expression (when converting the object parameter to the member's type).
	if (SubobjectNumber != PathElement.SubobjectNumber) {
	// We have a different subobject of the same type.

	// C++ [class.member.lookup]p5:
	// A static member, a nested type or an enumerator defined in
	// a base class T can unambiguously be found even if an object
	// has more than one base class subobject of type T.
	if (HasOnlyStaticMembers(Path->Decls))
	continue;

	// We have found a nonstatic member name in multiple, distinct
	// subobjects. Name lookup is ambiguous.
	R.setAmbiguousBaseSubobjects(Paths);
	return true;
	}
	}

	// Lookup in a base class succeeded; return these results.

	for (DeclContext::lookup_iterator I = Paths.front().Decls, E = I.end();
	I != E; ++I) {
	AccessSpecifier AS = CXXRecordDecl::MergeAccess(SubobjectAccess,
	(*I)->getAccess());
	if (NamedDecl ND = R.getAcceptableDecl(I))
	R.addDecl(ND, AS);
	}
	R.resolveKind();
	return true;
	}

	bool Sema::LookupQualifiedName(LookupResult &R, DeclContext *LookupCtx,
	CXXScopeSpec &SS) {
	auto *NNS = SS.getScopeRep();
	if (NNS && NNS->getKind() == NestedNameSpecifier::Super)
	return LookupInSuper(R, NNS->getAsRecordDecl());
	else

	return LookupQualifiedName(R, LookupCtx);
	}

	bool Sema::LookupParsedName(LookupResult &R, Scope S, CXXScopeSpec SS,
	QualType ObjectType, bool AllowBuiltinCreation,
	bool EnteringContext) {
	// When the scope specifier is invalid, don't even look for anything.
	if (SS && SS->isInvalid())
	return false;

	// Determine where to perform name lookup
	DeclContext *DC = nullptr;
	bool IsDependent = false;
	if (!ObjectType.isNull()) {
	// This nested-name-specifier occurs in a member access expression, e.g.,
	// x->B::f, and we are looking into the type of the object.
	assert((!SS \|\| SS->isEmpty()) &&
	"ObjectType and scope specifier cannot coexist");
	DC = computeDeclContext(ObjectType);
	IsDependent = !DC && ObjectType->isDependentType();
	assert(((!DC && ObjectType->isDependentType()) \|\|
	!ObjectType->isIncompleteType() \|\| !ObjectType->getAs<TagType>() \|\|
	ObjectType->castAs<TagType>()->isBeingDefined()) &&
	"Caller should have completed object type");
	} else if (SS && SS->isNotEmpty()) {
	// This nested-name-specifier occurs after another nested-name-specifier,
	// so long into the context associated with the prior nested-name-specifier.
	if ((DC = computeDeclContext(*SS, EnteringContext))) {
	// The declaration context must be complete.
	if (!DC->isDependentContext() && RequireCompleteDeclContext(*SS, DC))
	return false;
	R.setContextRange(SS->getRange());
	// FIXME: '__super' lookup semantics could be implemented by a
	// LookupResult::isSuperLookup flag which skips the initial search of
	// the lookup context in LookupQualified.
	if (NestedNameSpecifier *NNS = SS->getScopeRep();
	NNS->getKind() == NestedNameSpecifier::Super)
	return LookupInSuper(R, NNS->getAsRecordDecl());
	}
	IsDependent = !DC && isDependentScopeSpecifier(*SS);
	} else {
	// Perform unqualified name lookup starting in the given scope.
	return LookupName(R, S, AllowBuiltinCreation);
	}

	// If we were able to compute a declaration context, perform qualified name
	// lookup in that context.
	if (DC)
	return LookupQualifiedName(R, DC);
	else if (IsDependent)
	// We could not resolve the scope specified to a specific declaration
	// context, which means that SS refers to an unknown specialization.
	// Name lookup can't find anything in this case.
	R.setNotFoundInCurrentInstantiation();
	return false;
	}

	bool Sema::LookupInSuper(LookupResult &R, CXXRecordDecl *Class) {
	// The access-control rules we use here are essentially the rules for
	// doing a lookup in Class that just magically skipped the direct
	// members of Class itself. That is, the naming class is Class, and the
	// access includes the access of the base.
	for (const auto &BaseSpec : Class->bases()) {
	CXXRecordDecl *RD = cast<CXXRecordDecl>(
	BaseSpec.getType()->castAs<RecordType>()->getDecl());
	LookupResult Result(*this, R.getLookupNameInfo(), R.getLookupKind());
	Result.setBaseObjectType(Context.getRecordType(Class));
	LookupQualifiedName(Result, RD);

	// Copy the lookup results into the target, merging the base's access into
	// the path access.
	for (auto I = Result.begin(), E = Result.end(); I != E; ++I) {
	R.addDecl(I.getDecl(),
	CXXRecordDecl::MergeAccess(BaseSpec.getAccessSpecifier(),
	I.getAccess()));
	}

	Result.suppressDiagnostics();
	}

	R.resolveKind();
	R.setNamingClass(Class);

	return !R.empty();
	}

	void Sema::DiagnoseAmbiguousLookup(LookupResult &Result) {
	assert(Result.isAmbiguous() && "Lookup result must be ambiguous");

	DeclarationName Name = Result.getLookupName();
	SourceLocation NameLoc = Result.getNameLoc();
	SourceRange LookupRange = Result.getContextRange();

	switch (Result.getAmbiguityKind()) {
	case LookupResult::AmbiguousBaseSubobjects: {
	CXXBasePaths *Paths = Result.getBasePaths();
	QualType SubobjectType = Paths->front().back().Base->getType();
	Diag(NameLoc, diag::err_ambiguous_member_multiple_subobjects)
	<< Name << SubobjectType << getAmbiguousPathsDisplayString(*Paths)
	<< LookupRange;

	DeclContext::lookup_iterator Found = Paths->front().Decls;
	while (isa<CXXMethodDecl>(*Found) &&
	cast<CXXMethodDecl>(*Found)->isStatic())
	++Found;

	Diag((*Found)->getLocation(), diag::note_ambiguous_member_found);
	break;
	}

	case LookupResult::AmbiguousBaseSubobjectTypes: {
	Diag(NameLoc, diag::err_ambiguous_member_multiple_subobject_types)
	<< Name << LookupRange;

	CXXBasePaths *Paths = Result.getBasePaths();
	std::set<const NamedDecl *> DeclsPrinted;
	for (CXXBasePaths::paths_iterator Path = Paths->begin(),
	PathEnd = Paths->end();
	Path != PathEnd; ++Path) {
	const NamedDecl D = Path->Decls;
	if (!D->isInIdentifierNamespace(Result.getIdentifierNamespace()))
	continue;
	if (DeclsPrinted.insert(D).second) {
	if (const auto *TD = dyn_cast<TypedefNameDecl>(D->getUnderlyingDecl()))
	Diag(D->getLocation(), diag::note_ambiguous_member_type_found)
	<< TD->getUnderlyingType();
	else if (const auto *TD = dyn_cast<TypeDecl>(D->getUnderlyingDecl()))
	Diag(D->getLocation(), diag::note_ambiguous_member_type_found)
	<< Context.getTypeDeclType(TD);
	else
	Diag(D->getLocation(), diag::note_ambiguous_member_found);
	}
	}
	break;
	}

	case LookupResult::AmbiguousTagHiding: {
	Diag(NameLoc, diag::err_ambiguous_tag_hiding) << Name << LookupRange;

	llvm::SmallPtrSet<NamedDecl*, 8> TagDecls;

	for (auto *D : Result)
	if (TagDecl *TD = dyn_cast<TagDecl>(D)) {
	TagDecls.insert(TD);
	Diag(TD->getLocation(), diag::note_hidden_tag);
	}

	for (auto *D : Result)
	if (!isa<TagDecl>(D))
	Diag(D->getLocation(), diag::note_hiding_object);

	// For recovery purposes, go ahead and implement the hiding.
	LookupResult::Filter F = Result.makeFilter();
	while (F.hasNext()) {
	if (TagDecls.count(F.next()))
	F.erase();
	}
	F.done();
	break;
	}

	case LookupResult::AmbiguousReferenceToPlaceholderVariable: {
	Diag(NameLoc, diag::err_using_placeholder_variable) << Name << LookupRange;
	DeclContext *DC = nullptr;
	for (auto *D : Result) {
	Diag(D->getLocation(), diag::note_reference_placeholder) << D;
	if (DC != nullptr && DC != D->getDeclContext())
	break;
	DC = D->getDeclContext();
	}
	break;
	}

	case LookupResult::AmbiguousReference: {
	Diag(NameLoc, diag::err_ambiguous_reference) << Name << LookupRange;

	for (auto *D : Result)
	Diag(D->getLocation(), diag::note_ambiguous_candidate) << D;
	break;
	}
	}
	}

	namespace {
	struct AssociatedLookup {
	AssociatedLookup(Sema &S, SourceLocation InstantiationLoc,
	Sema::AssociatedNamespaceSet &Namespaces,
	Sema::AssociatedClassSet &Classes)
	: S(S), Namespaces(Namespaces), Classes(Classes),
	InstantiationLoc(InstantiationLoc) {
	}

	bool addClassTransitive(CXXRecordDecl *RD) {
	Classes.insert(RD);
	return ClassesTransitive.insert(RD);
	}

	Sema &S;
	Sema::AssociatedNamespaceSet &Namespaces;
	Sema::AssociatedClassSet &Classes;
	SourceLocation InstantiationLoc;

	private:
	Sema::AssociatedClassSet ClassesTransitive;
	};
	} // end anonymous namespace

	static void
	addAssociatedClassesAndNamespaces(AssociatedLookup &Result, QualType T);

	// Given the declaration context \param Ctx of a class, class template or
	// enumeration, add the associated namespaces to \param Namespaces as described
	// in [basic.lookup.argdep]p2.
	static void CollectEnclosingNamespace(Sema::AssociatedNamespaceSet &Namespaces,
	DeclContext *Ctx) {
	// The exact wording has been changed in C++14 as a result of
	// CWG 1691 (see also CWG 1690 and CWG 1692). We apply it unconditionally
	// to all language versions since it is possible to return a local type
	// from a lambda in C++11.
	//
	// C++14 [basic.lookup.argdep]p2:
	// If T is a class type [...]. Its associated namespaces are the innermost
	// enclosing namespaces of its associated classes. [...]
	//
	// If T is an enumeration type, its associated namespace is the innermost
	// enclosing namespace of its declaration. [...]

	// We additionally skip inline namespaces. The innermost non-inline namespace
	// contains all names of all its nested inline namespaces anyway, so we can
	// replace the entire inline namespace tree with its root.
	while (!Ctx->isFileContext() \|\| Ctx->isInlineNamespace())
	Ctx = Ctx->getParent();

	Namespaces.insert(Ctx->getPrimaryContext());
	}

	// Add the associated classes and namespaces for argument-dependent
	// lookup that involves a template argument (C++ [basic.lookup.argdep]p2).
	static void
	addAssociatedClassesAndNamespaces(AssociatedLookup &Result,
	const TemplateArgument &Arg) {
	// C++ [basic.lookup.argdep]p2, last bullet:
	// -- [...] ;
	switch (Arg.getKind()) {
	case TemplateArgument::Null:
	break;

	case TemplateArgument::Type:
	// [...] the namespaces and classes associated with the types of the
	// template arguments provided for template type parameters (excluding
	// template template parameters)
	addAssociatedClassesAndNamespaces(Result, Arg.getAsType());
	break;

	case TemplateArgument::Template:
	case TemplateArgument::TemplateExpansion: {
	// [...] the namespaces in which any template template arguments are
	// defined; and the classes in which any member templates used as
	// template template arguments are defined.
	TemplateName Template = Arg.getAsTemplateOrTemplatePattern();
	if (ClassTemplateDecl *ClassTemplate
	= dyn_cast<ClassTemplateDecl>(Template.getAsTemplateDecl())) {
	DeclContext *Ctx = ClassTemplate->getDeclContext();
	if (CXXRecordDecl *EnclosingClass = dyn_cast<CXXRecordDecl>(Ctx))
	Result.Classes.insert(EnclosingClass);
	// Add the associated namespace for this class.
	CollectEnclosingNamespace(Result.Namespaces, Ctx);
	}
	break;
	}

	case TemplateArgument::Declaration:
	case TemplateArgument::Integral:
	case TemplateArgument::Expression:
	case TemplateArgument::NullPtr:
	case TemplateArgument::StructuralValue:
	// [Note: non-type template arguments do not contribute to the set of
	// associated namespaces. ]
	break;

	case TemplateArgument::Pack:
	for (const auto &P : Arg.pack_elements())
	addAssociatedClassesAndNamespaces(Result, P);
	break;
	}
	}

	// Add the associated classes and namespaces for argument-dependent lookup
	// with an argument of class type (C++ [basic.lookup.argdep]p2).
	static void
	addAssociatedClassesAndNamespaces(AssociatedLookup &Result,
	CXXRecordDecl *Class) {

	// Just silently ignore anything whose name is __va_list_tag.
	if (Class->getDeclName() == Result.S.VAListTagName)
	return;

	// C++ [basic.lookup.argdep]p2:
	// [...]
	// -- If T is a class type (including unions), its associated
	// classes are: the class itself; the class of which it is a
	// member, if any; and its direct and indirect base classes.
	// Its associated namespaces are the innermost enclosing
	// namespaces of its associated classes.

	// Add the class of which it is a member, if any.
	DeclContext *Ctx = Class->getDeclContext();
	if (CXXRecordDecl *EnclosingClass = dyn_cast<CXXRecordDecl>(Ctx))
	Result.Classes.insert(EnclosingClass);

	// Add the associated namespace for this class.
	CollectEnclosingNamespace(Result.Namespaces, Ctx);

	// -- If T is a template-id, its associated namespaces and classes are
	// the namespace in which the template is defined; for member
	// templates, the member template's class; the namespaces and classes
	// associated with the types of the template arguments provided for
	// template type parameters (excluding template template parameters); the
	// namespaces in which any template template arguments are defined; and
	// the classes in which any member templates used as template template
	// arguments are defined. [Note: non-type template arguments do not
	// contribute to the set of associated namespaces. ]
	if (ClassTemplateSpecializationDecl *Spec
	= dyn_cast<ClassTemplateSpecializationDecl>(Class)) {
	DeclContext *Ctx = Spec->getSpecializedTemplate()->getDeclContext();
	if (CXXRecordDecl *EnclosingClass = dyn_cast<CXXRecordDecl>(Ctx))
	Result.Classes.insert(EnclosingClass);
	// Add the associated namespace for this class.
	CollectEnclosingNamespace(Result.Namespaces, Ctx);

	const TemplateArgumentList &TemplateArgs = Spec->getTemplateArgs();
	for (unsigned I = 0, N = TemplateArgs.size(); I != N; ++I)
	addAssociatedClassesAndNamespaces(Result, TemplateArgs[I]);
	}

	// Add the class itself. If we've already transitively visited this class,
	// we don't need to visit base classes.
	if (!Result.addClassTransitive(Class))
	return;

	// Only recurse into base classes for complete types.
	if (!Result.S.isCompleteType(Result.InstantiationLoc,
	Result.S.Context.getRecordType(Class)))
	return;

	// Add direct and indirect base classes along with their associated
	// namespaces.
	SmallVector<CXXRecordDecl *, 32> Bases;
	Bases.push_back(Class);
	while (!Bases.empty()) {
	// Pop this class off the stack.
	Class = Bases.pop_back_val();

	// Visit the base classes.
	for (const auto &Base : Class->bases()) {
	const RecordType *BaseType = Base.getType()->getAs<RecordType>();
	// In dependent contexts, we do ADL twice, and the first time around,
	// the base type might be a dependent TemplateSpecializationType, or a
	// TemplateTypeParmType. If that happens, simply ignore it.
	// FIXME: If we want to support export, we probably need to add the
	// namespace of the template in a TemplateSpecializationType, or even
	// the classes and namespaces of known non-dependent arguments.
	if (!BaseType)
	continue;
	CXXRecordDecl *BaseDecl = cast<CXXRecordDecl>(BaseType->getDecl());
	if (Result.addClassTransitive(BaseDecl)) {
	// Find the associated namespace for this base class.
	DeclContext *BaseCtx = BaseDecl->getDeclContext();
	CollectEnclosingNamespace(Result.Namespaces, BaseCtx);

	// Make sure we visit the bases of this base class.
	if (BaseDecl->bases_begin() != BaseDecl->bases_end())
	Bases.push_back(BaseDecl);
	}
	}
	}
	}

	// Add the associated classes and namespaces for
	// argument-dependent lookup with an argument of type T
	// (C++ [basic.lookup.koenig]p2).
	static void
	addAssociatedClassesAndNamespaces(AssociatedLookup &Result, QualType Ty) {
	// C++ [basic.lookup.koenig]p2:
	//
	// For each argument type T in the function call, there is a set
	// of zero or more associated namespaces and a set of zero or more
	// associated classes to be considered. The sets of namespaces and
	// classes is determined entirely by the types of the function
	// arguments (and the namespace of any template template
	// argument). Typedef names and using-declarations used to specify
	// the types do not contribute to this set. The sets of namespaces
	// and classes are determined in the following way:

	SmallVector<const Type *, 16> Queue;
	const Type *T = Ty->getCanonicalTypeInternal().getTypePtr();

	while (true) {
	switch (T->getTypeClass()) {

	#define TYPE(Class, Base)
	#define DEPENDENT_TYPE(Class, Base) case Type::Class:
	#define NON_CANONICAL_TYPE(Class, Base) case Type::Class:
	#define NON_CANONICAL_UNLESS_DEPENDENT_TYPE(Class, Base) case Type::Class:
	#define ABSTRACT_TYPE(Class, Base)
	#include "clang/AST/TypeNodes.inc"
	// T is canonical. We can also ignore dependent types because
	// we don't need to do ADL at the definition point, but if we
	// wanted to implement template export (or if we find some other
	// use for associated classes and namespaces...) this would be
	// wrong.
	break;

	// -- If T is a pointer to U or an array of U, its associated
	// namespaces and classes are those associated with U.
	case Type::Pointer:
	T = cast<PointerType>(T)->getPointeeType().getTypePtr();
	continue;
	case Type::ConstantArray:
	case Type::IncompleteArray:
	case Type::VariableArray:
	T = cast<ArrayType>(T)->getElementType().getTypePtr();
	continue;

	// -- If T is a fundamental type, its associated sets of
	// namespaces and classes are both empty.
	case Type::Builtin:
	break;

	// -- If T is a class type (including unions), its associated
	// classes are: the class itself; the class of which it is
	// a member, if any; and its direct and indirect base classes.
	// Its associated namespaces are the innermost enclosing
	// namespaces of its associated classes.
	case Type::Record: {
	CXXRecordDecl *Class =
	cast<CXXRecordDecl>(cast<RecordType>(T)->getDecl());
	addAssociatedClassesAndNamespaces(Result, Class);
	break;
	}

	// -- If T is an enumeration type, its associated namespace
	// is the innermost enclosing namespace of its declaration.
	// If it is a class member, its associated class is the
	// member’s class; else it has no associated class.
	case Type::Enum: {
	EnumDecl *Enum = cast<EnumType>(T)->getDecl();

	DeclContext *Ctx = Enum->getDeclContext();
	if (CXXRecordDecl *EnclosingClass = dyn_cast<CXXRecordDecl>(Ctx))
	Result.Classes.insert(EnclosingClass);

	// Add the associated namespace for this enumeration.
	CollectEnclosingNamespace(Result.Namespaces, Ctx);

	break;
	}

	// -- If T is a function type, its associated namespaces and
	// classes are those associated with the function parameter
	// types and those associated with the return type.
	case Type::FunctionProto: {
	const FunctionProtoType *Proto = cast<FunctionProtoType>(T);
	for (const auto &Arg : Proto->param_types())
	Queue.push_back(Arg.getTypePtr());
	// fallthrough
	[[fallthrough]];
	}
	case Type::FunctionNoProto: {
	const FunctionType *FnType = cast<FunctionType>(T);
	T = FnType->getReturnType().getTypePtr();
	continue;
	}

	// -- If T is a pointer to a member function of a class X, its
	// associated namespaces and classes are those associated
	// with the function parameter types and return type,
	// together with those associated with X.
	//
	// -- If T is a pointer to a data member of class X, its
	// associated namespaces and classes are those associated
	// with the member type together with those associated with
	// X.
	case Type::MemberPointer: {
	const MemberPointerType *MemberPtr = cast<MemberPointerType>(T);

	// Queue up the class type into which this points.
	Queue.push_back(MemberPtr->getClass());

	// And directly continue with the pointee type.
	T = MemberPtr->getPointeeType().getTypePtr();
	continue;
	}

	// As an extension, treat this like a normal pointer.
	case Type::BlockPointer:
	T = cast<BlockPointerType>(T)->getPointeeType().getTypePtr();
	continue;

	// References aren't covered by the standard, but that's such an
	// obvious defect that we cover them anyway.
	case Type::LValueReference:
	case Type::RValueReference:
	T = cast<ReferenceType>(T)->getPointeeType().getTypePtr();
	continue;

	// These are fundamental types.
	case Type::Vector:
	case Type::ExtVector:
	case Type::ConstantMatrix:
	case Type::Complex:
	case Type::BitInt:
	break;

	// Non-deduced auto types only get here for error cases.
	case Type::Auto:
	case Type::DeducedTemplateSpecialization:
	break;

	// If T is an Objective-C object or interface type, or a pointer to an
	// object or interface type, the associated namespace is the global
	// namespace.
	case Type::ObjCObject:
	case Type::ObjCInterface:
	case Type::ObjCObjectPointer:
	Result.Namespaces.insert(Result.S.Context.getTranslationUnitDecl());
	break;

	// Atomic types are just wrappers; use the associations of the
	// contained type.
	case Type::Atomic:
	T = cast<AtomicType>(T)->getValueType().getTypePtr();
	continue;
	case Type::Pipe:
	T = cast<PipeType>(T)->getElementType().getTypePtr();
	continue;

	// Array parameter types are treated as fundamental types.
	case Type::ArrayParameter:
	break;
	}

	if (Queue.empty())
	break;
	T = Queue.pop_back_val();
	}
	}

	void Sema::FindAssociatedClassesAndNamespaces(
	SourceLocation InstantiationLoc, ArrayRef<Expr *> Args,
	AssociatedNamespaceSet &AssociatedNamespaces,
	AssociatedClassSet &AssociatedClasses) {
	AssociatedNamespaces.clear();
	AssociatedClasses.clear();

	AssociatedLookup Result(*this, InstantiationLoc,
	AssociatedNamespaces, AssociatedClasses);

	// C++ [basic.lookup.koenig]p2:
	// For each argument type T in the function call, there is a set
	// of zero or more associated namespaces and a set of zero or more
	// associated classes to be considered. The sets of namespaces and
	// classes is determined entirely by the types of the function
	// arguments (and the namespace of any template template
	// argument).
	for (unsigned ArgIdx = 0; ArgIdx != Args.size(); ++ArgIdx) {
	Expr *Arg = Args[ArgIdx];

	if (Arg->getType() != Context.OverloadTy) {
	addAssociatedClassesAndNamespaces(Result, Arg->getType());
	continue;
	}

	// [...] In addition, if the argument is the name or address of a
	// set of overloaded functions and/or function templates, its
	// associated classes and namespaces are the union of those
	// associated with each of the members of the set: the namespace
	// in which the function or function template is defined and the
	// classes and namespaces associated with its (non-dependent)
	// parameter types and return type.
	OverloadExpr *OE = OverloadExpr::find(Arg).Expression;

	for (const NamedDecl *D : OE->decls()) {
	// Look through any using declarations to find the underlying function.
	const FunctionDecl *FDecl = D->getUnderlyingDecl()->getAsFunction();

	// Add the classes and namespaces associated with the parameter
	// types and return type of this function.
	addAssociatedClassesAndNamespaces(Result, FDecl->getType());
	}
	}
	}

	NamedDecl Sema::LookupSingleName(Scope S, DeclarationName Name,
	SourceLocation Loc,
	LookupNameKind NameKind,
	RedeclarationKind Redecl) {
	LookupResult R(*this, Name, Loc, NameKind, Redecl);
	LookupName(R, S);
	return R.getAsSingle<NamedDecl>();
	}

	void Sema::LookupOverloadedOperatorName(OverloadedOperatorKind Op, Scope *S,
	UnresolvedSetImpl &Functions) {
	// C++ [over.match.oper]p3:
	// -- The set of non-member candidates is the result of the
	// unqualified lookup of operator@ in the context of the
	// expression according to the usual rules for name lookup in
	// unqualified function calls (3.4.2) except that all member
	// functions are ignored.
	DeclarationName OpName = Context.DeclarationNames.getCXXOperatorName(Op);
	LookupResult Operators(*this, OpName, SourceLocation(), LookupOperatorName);
	LookupName(Operators, S);

	assert(!Operators.isAmbiguous() && "Operator lookup cannot be ambiguous");
	Functions.append(Operators.begin(), Operators.end());
	}

	Sema::SpecialMemberOverloadResult
	Sema::LookupSpecialMember(CXXRecordDecl *RD, CXXSpecialMemberKind SM,
	bool ConstArg, bool VolatileArg, bool RValueThis,
	bool ConstThis, bool VolatileThis) {
	assert(CanDeclareSpecialMemberFunction(RD) &&
	"doing special member lookup into record that isn't fully complete");
	RD = RD->getDefinition();
	if (RValueThis \|\| ConstThis \|\| VolatileThis)
	assert((SM == CXXSpecialMemberKind::CopyAssignment \|\|
	SM == CXXSpecialMemberKind::MoveAssignment) &&
	"constructors and destructors always have unqualified lvalue this");
	if (ConstArg \|\| VolatileArg)
	assert((SM != CXXSpecialMemberKind::DefaultConstructor &&
	SM != CXXSpecialMemberKind::Destructor) &&
	"parameter-less special members can't have qualified arguments");

	// FIXME: Get the caller to pass in a location for the lookup.
	SourceLocation LookupLoc = RD->getLocation();

	llvm::FoldingSetNodeID ID;
	ID.AddPointer(RD);
	ID.AddInteger(llvm::to_underlying(SM));
	ID.AddInteger(ConstArg);
	ID.AddInteger(VolatileArg);
	ID.AddInteger(RValueThis);
	ID.AddInteger(ConstThis);
	ID.AddInteger(VolatileThis);

	void *InsertPoint;
	SpecialMemberOverloadResultEntry *Result =
	SpecialMemberCache.FindNodeOrInsertPos(ID, InsertPoint);

	// This was already cached
	if (Result)
	return *Result;

	Result = BumpAlloc.Allocate<SpecialMemberOverloadResultEntry>();
	Result = new (Result) SpecialMemberOverloadResultEntry(ID);
	SpecialMemberCache.InsertNode(Result, InsertPoint);

	if (SM == CXXSpecialMemberKind::Destructor) {
	if (RD->needsImplicitDestructor()) {
	runWithSufficientStackSpace(RD->getLocation(), [&] {
	DeclareImplicitDestructor(RD);
	});
	}
	CXXDestructorDecl *DD = RD->getDestructor();
	Result->setMethod(DD);
	Result->setKind(DD && !DD->isDeleted()
	? SpecialMemberOverloadResult::Success
	: SpecialMemberOverloadResult::NoMemberOrDeleted);
	return *Result;
	}

	// Prepare for overload resolution. Here we construct a synthetic argument
	// if necessary and make sure that implicit functions are declared.
	CanQualType CanTy = Context.getCanonicalType(Context.getTagDeclType(RD));
	DeclarationName Name;
	Expr *Arg = nullptr;
	unsigned NumArgs;

	QualType ArgType = CanTy;
	ExprValueKind VK = VK_LValue;

	if (SM == CXXSpecialMemberKind::DefaultConstructor) {
	Name = Context.DeclarationNames.getCXXConstructorName(CanTy);
	NumArgs = 0;
	if (RD->needsImplicitDefaultConstructor()) {
	runWithSufficientStackSpace(RD->getLocation(), [&] {
	DeclareImplicitDefaultConstructor(RD);
	});
	}
	} else {
	if (SM == CXXSpecialMemberKind::CopyConstructor \|\|
	SM == CXXSpecialMemberKind::MoveConstructor) {
	Name = Context.DeclarationNames.getCXXConstructorName(CanTy);
	if (RD->needsImplicitCopyConstructor()) {
	runWithSufficientStackSpace(RD->getLocation(), [&] {
	DeclareImplicitCopyConstructor(RD);
	});
	}
	if (getLangOpts().CPlusPlus11 && RD->needsImplicitMoveConstructor()) {
	runWithSufficientStackSpace(RD->getLocation(), [&] {
	DeclareImplicitMoveConstructor(RD);
	});
	}
	} else {
	Name = Context.DeclarationNames.getCXXOperatorName(OO_Equal);
	if (RD->needsImplicitCopyAssignment()) {
	runWithSufficientStackSpace(RD->getLocation(), [&] {
	DeclareImplicitCopyAssignment(RD);
	});
	}
	if (getLangOpts().CPlusPlus11 && RD->needsImplicitMoveAssignment()) {
	runWithSufficientStackSpace(RD->getLocation(), [&] {
	DeclareImplicitMoveAssignment(RD);
	});
	}
	}

	if (ConstArg)
	ArgType.addConst();
	if (VolatileArg)
	ArgType.addVolatile();

	// This isn't /really/ specified by the standard, but it's implied
	// we should be working from a PRValue in the case of move to ensure
	// that we prefer to bind to rvalue references, and an LValue in the
	// case of copy to ensure we don't bind to rvalue references.
	// Possibly an XValue is actually correct in the case of move, but
	// there is no semantic difference for class types in this restricted
	// case.
	if (SM == CXXSpecialMemberKind::CopyConstructor \|\|
	SM == CXXSpecialMemberKind::CopyAssignment)
	VK = VK_LValue;
	else
	VK = VK_PRValue;
	}

	OpaqueValueExpr FakeArg(LookupLoc, ArgType, VK);

	if (SM != CXXSpecialMemberKind::DefaultConstructor) {
	NumArgs = 1;
	Arg = &FakeArg;
	}

	// Create the object argument
	QualType ThisTy = CanTy;
	if (ConstThis)
	ThisTy.addConst();
	if (VolatileThis)
	ThisTy.addVolatile();
	Expr::Classification Classification =
	OpaqueValueExpr(LookupLoc, ThisTy, RValueThis ? VK_PRValue : VK_LValue)
	.Classify(Context);

	// Now we perform lookup on the name we computed earlier and do overload
	// resolution. Lookup is only performed directly into the class since there
	// will always be a (possibly implicit) declaration to shadow any others.
	OverloadCandidateSet OCS(LookupLoc, OverloadCandidateSet::CSK_Normal);
	DeclContext::lookup_result R = RD->lookup(Name);

	if (R.empty()) {
	// We might have no default constructor because we have a lambda's closure
	// type, rather than because there's some other declared constructor.
	// Every class has a copy/move constructor, copy/move assignment, and
	// destructor.
	assert(SM == CXXSpecialMemberKind::DefaultConstructor &&
	"lookup for a constructor or assignment operator was empty");
	Result->setMethod(nullptr);
	Result->setKind(SpecialMemberOverloadResult::NoMemberOrDeleted);
	return *Result;
	}

	// Copy the candidates as our processing of them may load new declarations
	// from an external source and invalidate lookup_result.
	SmallVector<NamedDecl *, 8> Candidates(R.begin(), R.end());

	for (NamedDecl *CandDecl : Candidates) {
	if (CandDecl->isInvalidDecl())
	continue;

	DeclAccessPair Cand = DeclAccessPair::make(CandDecl, AS_public);
	auto CtorInfo = getConstructorInfo(Cand);
	if (CXXMethodDecl *M = dyn_cast<CXXMethodDecl>(Cand->getUnderlyingDecl())) {
	if (SM == CXXSpecialMemberKind::CopyAssignment \|\|
	SM == CXXSpecialMemberKind::MoveAssignment)
	AddMethodCandidate(M, Cand, RD, ThisTy, Classification,
	llvm::ArrayRef(&Arg, NumArgs), OCS, true);
	else if (CtorInfo)
	AddOverloadCandidate(CtorInfo.Constructor, CtorInfo.FoundDecl,
	llvm::ArrayRef(&Arg, NumArgs), OCS,
	/SuppressUserConversions/ true);
	else
	AddOverloadCandidate(M, Cand, llvm::ArrayRef(&Arg, NumArgs), OCS,
	/SuppressUserConversions/ true);
	} else if (FunctionTemplateDecl *Tmpl =
	dyn_cast<FunctionTemplateDecl>(Cand->getUnderlyingDecl())) {
	if (SM == CXXSpecialMemberKind::CopyAssignment \|\|
	SM == CXXSpecialMemberKind::MoveAssignment)
	AddMethodTemplateCandidate(Tmpl, Cand, RD, nullptr, ThisTy,
	Classification,
	llvm::ArrayRef(&Arg, NumArgs), OCS, true);
	else if (CtorInfo)
	AddTemplateOverloadCandidate(CtorInfo.ConstructorTmpl,
	CtorInfo.FoundDecl, nullptr,
	llvm::ArrayRef(&Arg, NumArgs), OCS, true);
	else
	AddTemplateOverloadCandidate(Tmpl, Cand, nullptr,
	llvm::ArrayRef(&Arg, NumArgs), OCS, true);
	} else {
	assert(isa<UsingDecl>(Cand.getDecl()) &&
	"illegal Kind of operator = Decl");
	}
	}

	OverloadCandidateSet::iterator Best;
	switch (OCS.BestViableFunction(*this, LookupLoc, Best)) {
	case OR_Success:
	Result->setMethod(cast<CXXMethodDecl>(Best->Function));
	Result->setKind(SpecialMemberOverloadResult::Success);
	break;

	case OR_Deleted:
	Result->setMethod(cast<CXXMethodDecl>(Best->Function));
	Result->setKind(SpecialMemberOverloadResult::NoMemberOrDeleted);
	break;

	case OR_Ambiguous:
	Result->setMethod(nullptr);
	Result->setKind(SpecialMemberOverloadResult::Ambiguous);
	break;

	case OR_No_Viable_Function:
	Result->setMethod(nullptr);
	Result->setKind(SpecialMemberOverloadResult::NoMemberOrDeleted);
	break;
	}

	return *Result;
	}

	CXXConstructorDecl Sema::LookupDefaultConstructor(CXXRecordDecl Class) {
	SpecialMemberOverloadResult Result =
	LookupSpecialMember(Class, CXXSpecialMemberKind::DefaultConstructor,
	false, false, false, false, false);

	return cast_or_null<CXXConstructorDecl>(Result.getMethod());
	}

	CXXConstructorDecl Sema::LookupCopyingConstructor(CXXRecordDecl Class,
	unsigned Quals) {
	assert(!(Quals & ~(Qualifiers::Const \| Qualifiers::Volatile)) &&
	"non-const, non-volatile qualifiers for copy ctor arg");
	SpecialMemberOverloadResult Result = LookupSpecialMember(
	Class, CXXSpecialMemberKind::CopyConstructor, Quals & Qualifiers::Const,
	Quals & Qualifiers::Volatile, false, false, false);

	return cast_or_null<CXXConstructorDecl>(Result.getMethod());
	}

	CXXConstructorDecl Sema::LookupMovingConstructor(CXXRecordDecl Class,
	unsigned Quals) {
	SpecialMemberOverloadResult Result = LookupSpecialMember(
	Class, CXXSpecialMemberKind::MoveConstructor, Quals & Qualifiers::Const,
	Quals & Qualifiers::Volatile, false, false, false);

	return cast_or_null<CXXConstructorDecl>(Result.getMethod());
	}

	DeclContext::lookup_result Sema::LookupConstructors(CXXRecordDecl *Class) {
	// If the implicit constructors have not yet been declared, do so now.
	if (CanDeclareSpecialMemberFunction(Class)) {
	runWithSufficientStackSpace(Class->getLocation(), [&] {
	if (Class->needsImplicitDefaultConstructor())
	DeclareImplicitDefaultConstructor(Class);
	if (Class->needsImplicitCopyConstructor())
	DeclareImplicitCopyConstructor(Class);
	if (getLangOpts().CPlusPlus11 && Class->needsImplicitMoveConstructor())
	DeclareImplicitMoveConstructor(Class);
	});
	}

	CanQualType T = Context.getCanonicalType(Context.getTypeDeclType(Class));
	DeclarationName Name = Context.DeclarationNames.getCXXConstructorName(T);
	return Class->lookup(Name);
	}

	CXXMethodDecl Sema::LookupCopyingAssignment(CXXRecordDecl Class,
	unsigned Quals, bool RValueThis,
	unsigned ThisQuals) {
	assert(!(Quals & ~(Qualifiers::Const \| Qualifiers::Volatile)) &&
	"non-const, non-volatile qualifiers for copy assignment arg");
	assert(!(ThisQuals & ~(Qualifiers::Const \| Qualifiers::Volatile)) &&
	"non-const, non-volatile qualifiers for copy assignment this");
	SpecialMemberOverloadResult Result = LookupSpecialMember(
	Class, CXXSpecialMemberKind::CopyAssignment, Quals & Qualifiers::Const,
	Quals & Qualifiers::Volatile, RValueThis, ThisQuals & Qualifiers::Const,
	ThisQuals & Qualifiers::Volatile);

	return Result.getMethod();
	}

	CXXMethodDecl Sema::LookupMovingAssignment(CXXRecordDecl Class,
	unsigned Quals,
	bool RValueThis,
	unsigned ThisQuals) {
	assert(!(ThisQuals & ~(Qualifiers::Const \| Qualifiers::Volatile)) &&
	"non-const, non-volatile qualifiers for copy assignment this");
	SpecialMemberOverloadResult Result = LookupSpecialMember(
	Class, CXXSpecialMemberKind::MoveAssignment, Quals & Qualifiers::Const,
	Quals & Qualifiers::Volatile, RValueThis, ThisQuals & Qualifiers::Const,
	ThisQuals & Qualifiers::Volatile);

	return Result.getMethod();
	}

	CXXDestructorDecl Sema::LookupDestructor(CXXRecordDecl Class) {
	return cast_or_null<CXXDestructorDecl>(
	LookupSpecialMember(Class, CXXSpecialMemberKind::Destructor, false, false,
	false, false, false)
	.getMethod());
	}

	Sema::LiteralOperatorLookupResult
	Sema::LookupLiteralOperator(Scope *S, LookupResult &R,
	ArrayRef<QualType> ArgTys, bool AllowRaw,
	bool AllowTemplate, bool AllowStringTemplatePack,
	bool DiagnoseMissing, StringLiteral *StringLit) {
	LookupName(R, S);
	assert(R.getResultKind() != LookupResult::Ambiguous &&
	"literal operator lookup can't be ambiguous");

	// Filter the lookup results appropriately.
	LookupResult::Filter F = R.makeFilter();

	bool AllowCooked = true;
	bool FoundRaw = false;
	bool FoundTemplate = false;
	bool FoundStringTemplatePack = false;
	bool FoundCooked = false;

	while (F.hasNext()) {
	Decl *D = F.next();
	if (UsingShadowDecl *USD = dyn_cast<UsingShadowDecl>(D))
	D = USD->getTargetDecl();

	// If the declaration we found is invalid, skip it.
	if (D->isInvalidDecl()) {
	F.erase();
	continue;
	}

	bool IsRaw = false;
	bool IsTemplate = false;
	bool IsStringTemplatePack = false;
	bool IsCooked = false;

	if (FunctionDecl *FD = dyn_cast<FunctionDecl>(D)) {
	if (FD->getNumParams() == 1 &&
	FD->getParamDecl(0)->getType()->getAs<PointerType>())
	IsRaw = true;
	else if (FD->getNumParams() == ArgTys.size()) {
	IsCooked = true;
	for (unsigned ArgIdx = 0; ArgIdx != ArgTys.size(); ++ArgIdx) {
	QualType ParamTy = FD->getParamDecl(ArgIdx)->getType();
	if (!Context.hasSameUnqualifiedType(ArgTys[ArgIdx], ParamTy)) {
	IsCooked = false;
	break;
	}
	}
	}
	}
	if (FunctionTemplateDecl *FD = dyn_cast<FunctionTemplateDecl>(D)) {
	TemplateParameterList *Params = FD->getTemplateParameters();
	if (Params->size() == 1) {
	IsTemplate = true;
	if (!Params->getParam(0)->isTemplateParameterPack() && !StringLit) {
	// Implied but not stated: user-defined integer and floating literals
	// only ever use numeric literal operator templates, not templates
	// taking a parameter of class type.
	F.erase();
	continue;
	}

	// A string literal template is only considered if the string literal
	// is a well-formed template argument for the template parameter.
	if (StringLit) {
	SFINAETrap Trap(*this);
	SmallVector<TemplateArgument, 1> SugaredChecked, CanonicalChecked;
	TemplateArgumentLoc Arg(TemplateArgument(StringLit), StringLit);
	if (CheckTemplateArgument(
	Params->getParam(0), Arg, FD, R.getNameLoc(), R.getNameLoc(),
	0, SugaredChecked, CanonicalChecked, CTAK_Specified) \|\|
	Trap.hasErrorOccurred())
	IsTemplate = false;
	}
	} else {
	IsStringTemplatePack = true;
	}
	}

	if (AllowTemplate && StringLit && IsTemplate) {
	FoundTemplate = true;
	AllowRaw = false;
	AllowCooked = false;
	AllowStringTemplatePack = false;
	if (FoundRaw \|\| FoundCooked \|\| FoundStringTemplatePack) {
	F.restart();
	FoundRaw = FoundCooked = FoundStringTemplatePack = false;
	}
	} else if (AllowCooked && IsCooked) {
	FoundCooked = true;
	AllowRaw = false;
	AllowTemplate = StringLit;
	AllowStringTemplatePack = false;
	if (FoundRaw \|\| FoundTemplate \|\| FoundStringTemplatePack) {
	// Go through again and remove the raw and template decls we've
	// already found.
	F.restart();
	FoundRaw = FoundTemplate = FoundStringTemplatePack = false;
	}
	} else if (AllowRaw && IsRaw) {
	FoundRaw = true;
	} else if (AllowTemplate && IsTemplate) {
	FoundTemplate = true;
	} else if (AllowStringTemplatePack && IsStringTemplatePack) {
	FoundStringTemplatePack = true;
	} else {
	F.erase();
	}
	}

	F.done();

	// Per C++20 [lex.ext]p5, we prefer the template form over the non-template
	// form for string literal operator templates.
	if (StringLit && FoundTemplate)
	return LOLR_Template;

	// C++11 [lex.ext]p3, p4: If S contains a literal operator with a matching
	// parameter type, that is used in preference to a raw literal operator
	// or literal operator template.
	if (FoundCooked)
	return LOLR_Cooked;

	// C++11 [lex.ext]p3, p4: S shall contain a raw literal operator or a literal
	// operator template, but not both.
	if (FoundRaw && FoundTemplate) {
	Diag(R.getNameLoc(), diag::err_ovl_ambiguous_call) << R.getLookupName();
	for (const NamedDecl *D : R)
	NoteOverloadCandidate(D, D->getUnderlyingDecl()->getAsFunction());
	return LOLR_Error;
	}

	if (FoundRaw)
	return LOLR_Raw;

	if (FoundTemplate)
	return LOLR_Template;

	if (FoundStringTemplatePack)
	return LOLR_StringTemplatePack;

	// Didn't find anything we could use.
	if (DiagnoseMissing) {
	Diag(R.getNameLoc(), diag::err_ovl_no_viable_literal_operator)
	<< R.getLookupName() << (int)ArgTys.size() << ArgTys[0]
	<< (ArgTys.size() == 2 ? ArgTys[1] : QualType()) << AllowRaw
	<< (AllowTemplate \|\| AllowStringTemplatePack);
	return LOLR_Error;
	}

	return LOLR_ErrorNoDiagnostic;
	}

	void ADLResult::insert(NamedDecl *New) {
	NamedDecl *&Old = Decls[cast<NamedDecl>(New->getCanonicalDecl())];

	// If we haven't yet seen a decl for this key, or the last decl
	// was exactly this one, we're done.
	if (Old == nullptr \|\| Old == New) {
	Old = New;
	return;
	}

	// Otherwise, decide which is a more recent redeclaration.
	FunctionDecl *OldFD = Old->getAsFunction();
	FunctionDecl *NewFD = New->getAsFunction();

	FunctionDecl *Cursor = NewFD;
	while (true) {
	Cursor = Cursor->getPreviousDecl();

	// If we got to the end without finding OldFD, OldFD is the newer
	// declaration; leave things as they are.
	if (!Cursor) return;

	// If we do find OldFD, then NewFD is newer.
	if (Cursor == OldFD) break;

	// Otherwise, keep looking.
	}

	Old = New;
	}

	void Sema::ArgumentDependentLookup(DeclarationName Name, SourceLocation Loc,
	ArrayRef<Expr *> Args, ADLResult &Result) {
	// Find all of the associated namespaces and classes based on the
	// arguments we have.
	AssociatedNamespaceSet AssociatedNamespaces;
	AssociatedClassSet AssociatedClasses;
	FindAssociatedClassesAndNamespaces(Loc, Args,
	AssociatedNamespaces,
	AssociatedClasses);

	// C++ [basic.lookup.argdep]p3:
	// Let X be the lookup set produced by unqualified lookup (3.4.1)
	// and let Y be the lookup set produced by argument dependent
	// lookup (defined as follows). If X contains [...] then Y is
	// empty. Otherwise Y is the set of declarations found in the
	// namespaces associated with the argument types as described
	// below. The set of declarations found by the lookup of the name
	// is the union of X and Y.
	//
	// Here, we compute Y and add its members to the overloaded
	// candidate set.
	for (auto *NS : AssociatedNamespaces) {
	// When considering an associated namespace, the lookup is the
	// same as the lookup performed when the associated namespace is
	// used as a qualifier (3.4.3.2) except that:
	//
	// -- Any using-directives in the associated namespace are
	// ignored.
	//
	// -- Any namespace-scope friend functions declared in
	// associated classes are visible within their respective
	// namespaces even if they are not visible during an ordinary
	// lookup (11.4).
	//
	// C++20 [basic.lookup.argdep] p4.3
	// -- are exported, are attached to a named module M, do not appear
	// in the translation unit containing the point of the lookup, and
	// have the same innermost enclosing non-inline namespace scope as
	// a declaration of an associated entity attached to M.
	DeclContext::lookup_result R = NS->lookup(Name);
	for (auto *D : R) {
	auto *Underlying = D;
	if (auto *USD = dyn_cast<UsingShadowDecl>(D))
	Underlying = USD->getTargetDecl();

	if (!isa<FunctionDecl>(Underlying) &&
	!isa<FunctionTemplateDecl>(Underlying))
	continue;

	// The declaration is visible to argument-dependent lookup if either
	// it's ordinarily visible or declared as a friend in an associated
	// class.
	bool Visible = false;
	for (D = D->getMostRecentDecl(); D;
	D = cast_or_null<NamedDecl>(D->getPreviousDecl())) {
	if (D->getIdentifierNamespace() & Decl::IDNS_Ordinary) {
	if (isVisible(D)) {
	Visible = true;
	break;
	}

	if (!getLangOpts().CPlusPlusModules)
	continue;

	if (D->isInExportDeclContext()) {
	Module *FM = D->getOwningModule();
	// C++20 [basic.lookup.argdep] p4.3 .. are exported ...
	// exports are only valid in module purview and outside of any
	// PMF (although a PMF should not even be present in a module
	// with an import).
	assert(FM && FM->isNamedModule() && !FM->isPrivateModule() &&
	"bad export context");
	// .. are attached to a named module M, do not appear in the
	// translation unit containing the point of the lookup..
	if (D->isInAnotherModuleUnit() &&
	llvm::any_of(AssociatedClasses, [&](auto *E) {
	// ... and have the same innermost enclosing non-inline
	// namespace scope as a declaration of an associated entity
	// attached to M
	if (E->getOwningModule() != FM)
	return false;
	// TODO: maybe this could be cached when generating the
	// associated namespaces / entities.
	DeclContext *Ctx = E->getDeclContext();
	while (!Ctx->isFileContext() \|\| Ctx->isInlineNamespace())
	Ctx = Ctx->getParent();
	return Ctx == NS;
	})) {
	Visible = true;
	break;
	}
	}
	} else if (D->getFriendObjectKind()) {
	auto *RD = cast<CXXRecordDecl>(D->getLexicalDeclContext());
	// [basic.lookup.argdep]p4:
	// Argument-dependent lookup finds all declarations of functions and
	// function templates that
	// - ...
	// - are declared as a friend ([class.friend]) of any class with a
	// reachable definition in the set of associated entities,
	//
	// FIXME: If there's a merged definition of D that is reachable, then
	// the friend declaration should be considered.
	if (AssociatedClasses.count(RD) && isReachable(D)) {
	Visible = true;
	break;
	}
	}
	}

	// FIXME: Preserve D as the FoundDecl.
	if (Visible)
	Result.insert(Underlying);
	}
	}
	}

	//----------------------------------------------------------------------------
	// Search for all visible declarations.
	//----------------------------------------------------------------------------
	VisibleDeclConsumer::~VisibleDeclConsumer() { }

	bool VisibleDeclConsumer::includeHiddenDecls() const { return false; }

	namespace {

	class ShadowContextRAII;

	class VisibleDeclsRecord {
	public:
	/// An entry in the shadow map, which is optimized to store a
	/// single declaration (the common case) but can also store a list
	/// of declarations.
	typedef llvm::TinyPtrVector<NamedDecl*> ShadowMapEntry;

	private:
	/// A mapping from declaration names to the declarations that have
	/// this name within a particular scope.
	typedef llvm::DenseMap<DeclarationName, ShadowMapEntry> ShadowMap;

	/// A list of shadow maps, which is used to model name hiding.
	std::list<ShadowMap> ShadowMaps;

	/// The declaration contexts we have already visited.
	llvm::SmallPtrSet<DeclContext *, 8> VisitedContexts;

	friend class ShadowContextRAII;

	public:
	/// Determine whether we have already visited this context
	/// (and, if not, note that we are going to visit that context now).
	bool visitedContext(DeclContext *Ctx) {
	return !VisitedContexts.insert(Ctx).second;
	}

	bool alreadyVisitedContext(DeclContext *Ctx) {
	return VisitedContexts.count(Ctx);
	}

	/// Determine whether the given declaration is hidden in the
	/// current scope.
	///
	/// \returns the declaration that hides the given declaration, or
	/// NULL if no such declaration exists.
	NamedDecl checkHidden(NamedDecl ND);

	/// Add a declaration to the current shadow map.
	void add(NamedDecl *ND) {
	ShadowMaps.back()[ND->getDeclName()].push_back(ND);
	}
	};

	/// RAII object that records when we've entered a shadow context.
	class ShadowContextRAII {
	VisibleDeclsRecord &Visible;

	typedef VisibleDeclsRecord::ShadowMap ShadowMap;

	public:
	ShadowContextRAII(VisibleDeclsRecord &Visible) : Visible(Visible) {
	Visible.ShadowMaps.emplace_back();
	}

	~ShadowContextRAII() {
	Visible.ShadowMaps.pop_back();
	}
	};

	} // end anonymous namespace

	NamedDecl VisibleDeclsRecord::checkHidden(NamedDecl ND) {
	unsigned IDNS = ND->getIdentifierNamespace();
	std::list<ShadowMap>::reverse_iterator SM = ShadowMaps.rbegin();
	for (std::list<ShadowMap>::reverse_iterator SMEnd = ShadowMaps.rend();
	SM != SMEnd; ++SM) {
	ShadowMap::iterator Pos = SM->find(ND->getDeclName());
	if (Pos == SM->end())
	continue;

	for (auto *D : Pos->second) {
	// A tag declaration does not hide a non-tag declaration.
	if (D->hasTagIdentifierNamespace() &&
	(IDNS & (Decl::IDNS_Member \| Decl::IDNS_Ordinary \|
	Decl::IDNS_ObjCProtocol)))
	continue;

	// Protocols are in distinct namespaces from everything else.
	if (((D->getIdentifierNamespace() & Decl::IDNS_ObjCProtocol)
	\|\| (IDNS & Decl::IDNS_ObjCProtocol)) &&
	D->getIdentifierNamespace() != IDNS)
	continue;

	// Functions and function templates in the same scope overload
	// rather than hide. FIXME: Look for hiding based on function
	// signatures!
	if (D->getUnderlyingDecl()->isFunctionOrFunctionTemplate() &&
	ND->getUnderlyingDecl()->isFunctionOrFunctionTemplate() &&
	SM == ShadowMaps.rbegin())
	continue;

	// A shadow declaration that's created by a resolved using declaration
	// is not hidden by the same using declaration.
	if (isa<UsingShadowDecl>(ND) && isa<UsingDecl>(D) &&
	cast<UsingShadowDecl>(ND)->getIntroducer() == D)
	continue;

	// We've found a declaration that hides this one.
	return D;
	}
	}

	return nullptr;
	}

	namespace {
	class LookupVisibleHelper {
	public:
	LookupVisibleHelper(VisibleDeclConsumer &Consumer, bool IncludeDependentBases,
	bool LoadExternal)
	: Consumer(Consumer), IncludeDependentBases(IncludeDependentBases),
	LoadExternal(LoadExternal) {}

	void lookupVisibleDecls(Sema &SemaRef, Scope *S, Sema::LookupNameKind Kind,
	bool IncludeGlobalScope) {
	// Determine the set of using directives available during
	// unqualified name lookup.
	Scope *Initial = S;
	UnqualUsingDirectiveSet UDirs(SemaRef);
	if (SemaRef.getLangOpts().CPlusPlus) {
	// Find the first namespace or translation-unit scope.
	while (S && !isNamespaceOrTranslationUnitScope(S))
	S = S->getParent();

	UDirs.visitScopeChain(Initial, S);
	}
	UDirs.done();

	// Look for visible declarations.
	LookupResult Result(SemaRef, DeclarationName(), SourceLocation(), Kind);
	Result.setAllowHidden(Consumer.includeHiddenDecls());
	if (!IncludeGlobalScope)
	Visited.visitedContext(SemaRef.getASTContext().getTranslationUnitDecl());
	ShadowContextRAII Shadow(Visited);
	lookupInScope(Initial, Result, UDirs);
	}

	void lookupVisibleDecls(Sema &SemaRef, DeclContext *Ctx,
	Sema::LookupNameKind Kind, bool IncludeGlobalScope) {
	LookupResult Result(SemaRef, DeclarationName(), SourceLocation(), Kind);
	Result.setAllowHidden(Consumer.includeHiddenDecls());
	if (!IncludeGlobalScope)
	Visited.visitedContext(SemaRef.getASTContext().getTranslationUnitDecl());

	ShadowContextRAII Shadow(Visited);
	lookupInDeclContext(Ctx, Result, /QualifiedNameLookup=/true,
	/InBaseClass=/false);
	}

	private:
	void lookupInDeclContext(DeclContext *Ctx, LookupResult &Result,
	bool QualifiedNameLookup, bool InBaseClass) {
	if (!Ctx)
	return;

	// Make sure we don't visit the same context twice.
	if (Visited.visitedContext(Ctx->getPrimaryContext()))
	return;

	Consumer.EnteredContext(Ctx);

	// Outside C++, lookup results for the TU live on identifiers.
	if (isa<TranslationUnitDecl>(Ctx) &&
	!Result.getSema().getLangOpts().CPlusPlus) {
	auto &S = Result.getSema();
	auto &Idents = S.Context.Idents;

	// Ensure all external identifiers are in the identifier table.
	if (LoadExternal)
	if (IdentifierInfoLookup *External =
	Idents.getExternalIdentifierLookup()) {
	std::unique_ptr<IdentifierIterator> Iter(External->getIdentifiers());
	for (StringRef Name = Iter->Next(); !Name.empty();
	Name = Iter->Next())
	Idents.get(Name);
	}

	// Walk all lookup results in the TU for each identifier.
	for (const auto &Ident : Idents) {
	for (auto I = S.IdResolver.begin(Ident.getValue()),
	E = S.IdResolver.end();
	I != E; ++I) {
	if (S.IdResolver.isDeclInScope(*I, Ctx)) {
	if (NamedDecl ND = Result.getAcceptableDecl(I)) {
	Consumer.FoundDecl(ND, Visited.checkHidden(ND), Ctx, InBaseClass);
	Visited.add(ND);
	}
	}
	}
	}

	return;
	}

	if (CXXRecordDecl *Class = dyn_cast<CXXRecordDecl>(Ctx))
	Result.getSema().ForceDeclarationOfImplicitMembers(Class);

	llvm::SmallVector<NamedDecl *, 4> DeclsToVisit;
	// We sometimes skip loading namespace-level results (they tend to be huge).
	bool Load = LoadExternal \|\|
	!(isa<TranslationUnitDecl>(Ctx) \|\| isa<NamespaceDecl>(Ctx));
	// Enumerate all of the results in this context.
	for (DeclContextLookupResult R :
	Load ? Ctx->lookups()
	: Ctx->noload_lookups(/PreserveInternalState=/false))
	for (auto *D : R)
	// Rather than visit immediately, we put ND into a vector and visit
	// all decls, in order, outside of this loop. The reason is that
	// Consumer.FoundDecl() and LookupResult::getAcceptableDecl(D)
	// may invalidate the iterators used in the two
	// loops above.
	DeclsToVisit.push_back(D);

	for (auto *D : DeclsToVisit)
	if (auto *ND = Result.getAcceptableDecl(D)) {
	Consumer.FoundDecl(ND, Visited.checkHidden(ND), Ctx, InBaseClass);
	Visited.add(ND);
	}

	DeclsToVisit.clear();

	// Traverse using directives for qualified name lookup.
	if (QualifiedNameLookup) {
	ShadowContextRAII Shadow(Visited);
	for (auto *I : Ctx->using_directives()) {
	if (!Result.getSema().isVisible(I))
	continue;
	lookupInDeclContext(I->getNominatedNamespace(), Result,
	QualifiedNameLookup, InBaseClass);
	}
	}

	// Traverse the contexts of inherited C++ classes.
	if (CXXRecordDecl *Record = dyn_cast<CXXRecordDecl>(Ctx)) {
	if (!Record->hasDefinition())
	return;

	for (const auto &B : Record->bases()) {
	QualType BaseType = B.getType();

	RecordDecl *RD;
	if (BaseType->isDependentType()) {
	if (!IncludeDependentBases) {
	// Don't look into dependent bases, because name lookup can't look
	// there anyway.
	continue;
	}
	const auto *TST = BaseType->getAs<TemplateSpecializationType>();
	if (!TST)
	continue;
	TemplateName TN = TST->getTemplateName();
	const auto *TD =
	dyn_cast_or_null<ClassTemplateDecl>(TN.getAsTemplateDecl());
	if (!TD)
	continue;
	RD = TD->getTemplatedDecl();
	} else {
	const auto *Record = BaseType->getAs<RecordType>();
	if (!Record)
	continue;
	RD = Record->getDecl();
	}

	// FIXME: It would be nice to be able to determine whether referencing
	// a particular member would be ambiguous. For example, given
	//
	// struct A { int member; };
	// struct B { int member; };
	// struct C : A, B { };
	//
	// void f(C *c) { c->### }
	//
	// accessing 'member' would result in an ambiguity. However, we
	// could be smart enough to qualify the member with the base
	// class, e.g.,
	//
	// c->B::member
	//
	// or
	//
	// c->A::member

	// Find results in this base class (and its bases).
	ShadowContextRAII Shadow(Visited);
	lookupInDeclContext(RD, Result, QualifiedNameLookup,
	/InBaseClass=/true);
	}
	}

	// Traverse the contexts of Objective-C classes.
	if (ObjCInterfaceDecl *IFace = dyn_cast<ObjCInterfaceDecl>(Ctx)) {
	// Traverse categories.
	for (auto *Cat : IFace->visible_categories()) {
	ShadowContextRAII Shadow(Visited);
	lookupInDeclContext(Cat, Result, QualifiedNameLookup,
	/InBaseClass=/false);
	}

	// Traverse protocols.
	for (auto *I : IFace->all_referenced_protocols()) {
	ShadowContextRAII Shadow(Visited);
	lookupInDeclContext(I, Result, QualifiedNameLookup,
	/InBaseClass=/false);
	}

	// Traverse the superclass.
	if (IFace->getSuperClass()) {
	ShadowContextRAII Shadow(Visited);
	lookupInDeclContext(IFace->getSuperClass(), Result, QualifiedNameLookup,
	/InBaseClass=/true);
	}

	// If there is an implementation, traverse it. We do this to find
	// synthesized ivars.
	if (IFace->getImplementation()) {
	ShadowContextRAII Shadow(Visited);
	lookupInDeclContext(IFace->getImplementation(), Result,
	QualifiedNameLookup, InBaseClass);
	}
	} else if (ObjCProtocolDecl *Protocol = dyn_cast<ObjCProtocolDecl>(Ctx)) {
	for (auto *I : Protocol->protocols()) {
	ShadowContextRAII Shadow(Visited);
	lookupInDeclContext(I, Result, QualifiedNameLookup,
	/InBaseClass=/false);
	}
	} else if (ObjCCategoryDecl *Category = dyn_cast<ObjCCategoryDecl>(Ctx)) {
	for (auto *I : Category->protocols()) {
	ShadowContextRAII Shadow(Visited);
	lookupInDeclContext(I, Result, QualifiedNameLookup,
	/InBaseClass=/false);
	}

	// If there is an implementation, traverse it.
	if (Category->getImplementation()) {
	ShadowContextRAII Shadow(Visited);
	lookupInDeclContext(Category->getImplementation(), Result,
	QualifiedNameLookup, /InBaseClass=/true);
	}
	}
	}

	void lookupInScope(Scope *S, LookupResult &Result,
	UnqualUsingDirectiveSet &UDirs) {
	// No clients run in this mode and it's not supported. Please add tests and
	// remove the assertion if you start relying on it.
	assert(!IncludeDependentBases && "Unsupported flag for lookupInScope");

	if (!S)
	return;

	if (!S->getEntity() \|\|
	(!S->getParent() && !Visited.alreadyVisitedContext(S->getEntity())) \|\|
	(S->getEntity())->isFunctionOrMethod()) {
	FindLocalExternScope FindLocals(Result);
	// Walk through the declarations in this Scope. The consumer might add new
	// decls to the scope as part of deserialization, so make a copy first.
	SmallVector<Decl *, 8> ScopeDecls(S->decls().begin(), S->decls().end());
	for (Decl *D : ScopeDecls) {
	if (NamedDecl *ND = dyn_cast<NamedDecl>(D))
	if ((ND = Result.getAcceptableDecl(ND))) {
	Consumer.FoundDecl(ND, Visited.checkHidden(ND), nullptr, false);
	Visited.add(ND);
	}
	}
	}

	DeclContext *Entity = S->getLookupEntity();
	if (Entity) {
	// Look into this scope's declaration context, along with any of its
	// parent lookup contexts (e.g., enclosing classes), up to the point
	// where we hit the context stored in the next outer scope.
	DeclContext *OuterCtx = findOuterContext(S);

	for (DeclContext *Ctx = Entity; Ctx && !Ctx->Equals(OuterCtx);
	Ctx = Ctx->getLookupParent()) {
	if (ObjCMethodDecl *Method = dyn_cast<ObjCMethodDecl>(Ctx)) {
	if (Method->isInstanceMethod()) {
	// For instance methods, look for ivars in the method's interface.
	LookupResult IvarResult(Result.getSema(), Result.getLookupName(),
	Result.getNameLoc(),
	Sema::LookupMemberName);
	if (ObjCInterfaceDecl *IFace = Method->getClassInterface()) {
	lookupInDeclContext(IFace, IvarResult,
	/QualifiedNameLookup=/false,
	/InBaseClass=/false);
	}
	}

	// We've already performed all of the name lookup that we need
	// to for Objective-C methods; the next context will be the
	// outer scope.
	break;
	}

	if (Ctx->isFunctionOrMethod())
	continue;

	lookupInDeclContext(Ctx, Result, /QualifiedNameLookup=/false,
	/InBaseClass=/false);
	}
	} else if (!S->getParent()) {
	// Look into the translation unit scope. We walk through the translation
	// unit's declaration context, because the Scope itself won't have all of
	// the declarations if we loaded a precompiled header.
	// FIXME: We would like the translation unit's Scope object to point to
	// the translation unit, so we don't need this special "if" branch.
	// However, doing so would force the normal C++ name-lookup code to look
	// into the translation unit decl when the IdentifierInfo chains would
	// suffice. Once we fix that problem (which is part of a more general
	// "don't look in DeclContexts unless we have to" optimization), we can
	// eliminate this.
	Entity = Result.getSema().Context.getTranslationUnitDecl();
	lookupInDeclContext(Entity, Result, /QualifiedNameLookup=/false,
	/InBaseClass=/false);
	}

	if (Entity) {
	// Lookup visible declarations in any namespaces found by using
	// directives.
	for (const UnqualUsingEntry &UUE : UDirs.getNamespacesFor(Entity))
	lookupInDeclContext(
	const_cast<DeclContext *>(UUE.getNominatedNamespace()), Result,
	/QualifiedNameLookup=/false,
	/InBaseClass=/false);
	}

	// Lookup names in the parent scope.
	ShadowContextRAII Shadow(Visited);
	lookupInScope(S->getParent(), Result, UDirs);
	}

	private:
	VisibleDeclsRecord Visited;
	VisibleDeclConsumer &Consumer;
	bool IncludeDependentBases;
	bool LoadExternal;
	};
	} // namespace

	void Sema::LookupVisibleDecls(Scope *S, LookupNameKind Kind,
	VisibleDeclConsumer &Consumer,
	bool IncludeGlobalScope, bool LoadExternal) {
	LookupVisibleHelper H(Consumer, /IncludeDependentBases=/false,
	LoadExternal);
	H.lookupVisibleDecls(*this, S, Kind, IncludeGlobalScope);
	}

	void Sema::LookupVisibleDecls(DeclContext *Ctx, LookupNameKind Kind,
	VisibleDeclConsumer &Consumer,
	bool IncludeGlobalScope,
	bool IncludeDependentBases, bool LoadExternal) {
	LookupVisibleHelper H(Consumer, IncludeDependentBases, LoadExternal);
	H.lookupVisibleDecls(*this, Ctx, Kind, IncludeGlobalScope);
	}

	LabelDecl Sema::LookupOrCreateLabel(IdentifierInfo II, SourceLocation Loc,
	SourceLocation GnuLabelLoc) {
	// Do a lookup to see if we have a label with this name already.
	NamedDecl *Res = nullptr;

	if (GnuLabelLoc.isValid()) {
	// Local label definitions always shadow existing labels.
	Res = LabelDecl::Create(Context, CurContext, Loc, II, GnuLabelLoc);
	Scope *S = CurScope;
	PushOnScopeChains(Res, S, true);
	return cast<LabelDecl>(Res);
	}

	// Not a GNU local label.
	Res = LookupSingleName(CurScope, II, Loc, LookupLabel,
	RedeclarationKind::NotForRedeclaration);
	// If we found a label, check to see if it is in the same context as us.
	// When in a Block, we don't want to reuse a label in an enclosing function.
	if (Res && Res->getDeclContext() != CurContext)
	Res = nullptr;
	if (!Res) {
	// If not forward referenced or defined already, create the backing decl.
	Res = LabelDecl::Create(Context, CurContext, Loc, II);
	Scope *S = CurScope->getFnParent();
	assert(S && "Not in a function?");
	PushOnScopeChains(Res, S, true);
	}
	return cast<LabelDecl>(Res);
	}

	//===----------------------------------------------------------------------===//
	// Typo correction
	//===----------------------------------------------------------------------===//

	static bool isCandidateViable(CorrectionCandidateCallback &CCC,
	TypoCorrection &Candidate) {
	Candidate.setCallbackDistance(CCC.RankCandidate(Candidate));
	return Candidate.getEditDistance(false) != TypoCorrection::InvalidDistance;
	}

	static void LookupPotentialTypoResult(Sema &SemaRef,
	LookupResult &Res,
	IdentifierInfo *Name,
	Scope S, CXXScopeSpec SS,
	DeclContext *MemberContext,
	bool EnteringContext,
	bool isObjCIvarLookup,
	bool FindHidden);

	/// Check whether the declarations found for a typo correction are
	/// visible. Set the correction's RequiresImport flag to true if none of the
	/// declarations are visible, false otherwise.
	static void checkCorrectionVisibility(Sema &SemaRef, TypoCorrection &TC) {
	TypoCorrection::decl_iterator DI = TC.begin(), DE = TC.end();

	for (/**/; DI != DE; ++DI)
	if (!LookupResult::isVisible(SemaRef, *DI))
	break;
	// No filtering needed if all decls are visible.
	if (DI == DE) {
	TC.setRequiresImport(false);
	return;
	}

	llvm::SmallVector<NamedDecl*, 4> NewDecls(TC.begin(), DI);
	bool AnyVisibleDecls = !NewDecls.empty();

	for (/**/; DI != DE; ++DI) {
	if (LookupResult::isVisible(SemaRef, *DI)) {
	if (!AnyVisibleDecls) {
	// Found a visible decl, discard all hidden ones.
	AnyVisibleDecls = true;
	NewDecls.clear();
	}
	NewDecls.push_back(*DI);
	} else if (!AnyVisibleDecls && !(*DI)->isModulePrivate())
	NewDecls.push_back(*DI);
	}

	if (NewDecls.empty())
	TC = TypoCorrection();
	else {
	TC.setCorrectionDecls(NewDecls);
	TC.setRequiresImport(!AnyVisibleDecls);
	}
	}

	// Fill the supplied vector with the IdentifierInfo pointers for each piece of
	// the given NestedNameSpecifier (i.e. given a NestedNameSpecifier "foo::bar::",
	// fill the vector with the IdentifierInfo pointers for "foo" and "bar").
	static void getNestedNameSpecifierIdentifiers(
	NestedNameSpecifier *NNS,
	SmallVectorImpl<const IdentifierInfo*> &Identifiers) {
	if (NestedNameSpecifier *Prefix = NNS->getPrefix())
	getNestedNameSpecifierIdentifiers(Prefix, Identifiers);
	else
	Identifiers.clear();

	const IdentifierInfo *II = nullptr;

	switch (NNS->getKind()) {
	case NestedNameSpecifier::Identifier:
	II = NNS->getAsIdentifier();
	break;

	case NestedNameSpecifier::Namespace:
	if (NNS->getAsNamespace()->isAnonymousNamespace())
	return;
	II = NNS->getAsNamespace()->getIdentifier();
	break;

	case NestedNameSpecifier::NamespaceAlias:
	II = NNS->getAsNamespaceAlias()->getIdentifier();
	break;

	case NestedNameSpecifier::TypeSpecWithTemplate:
	case NestedNameSpecifier::TypeSpec:
	II = QualType(NNS->getAsType(), 0).getBaseTypeIdentifier();
	break;

	case NestedNameSpecifier::Global:
	case NestedNameSpecifier::Super:
	return;
	}

	if (II)
	Identifiers.push_back(II);
	}

	void TypoCorrectionConsumer::FoundDecl(NamedDecl ND, NamedDecl Hiding,
	DeclContext *Ctx, bool InBaseClass) {
	// Don't consider hidden names for typo correction.
	if (Hiding)
	return;

	// Only consider entities with identifiers for names, ignoring
	// special names (constructors, overloaded operators, selectors,
	// etc.).
	IdentifierInfo *Name = ND->getIdentifier();
	if (!Name)
	return;

	// Only consider visible declarations and declarations from modules with
	// names that exactly match.
	if (!LookupResult::isVisible(SemaRef, ND) && Name != Typo)
	return;

	FoundName(Name->getName());
	}

	void TypoCorrectionConsumer::FoundName(StringRef Name) {
	// Compute the edit distance between the typo and the name of this
	// entity, and add the identifier to the list of results.
	addName(Name, nullptr);
	}

	void TypoCorrectionConsumer::addKeywordResult(StringRef Keyword) {
	// Compute the edit distance between the typo and this keyword,
	// and add the keyword to the list of results.
	addName(Keyword, nullptr, nullptr, true);
	}

	void TypoCorrectionConsumer::addName(StringRef Name, NamedDecl *ND,
	NestedNameSpecifier *NNS, bool isKeyword) {
	// Use a simple length-based heuristic to determine the minimum possible
	// edit distance. If the minimum isn't good enough, bail out early.
	StringRef TypoStr = Typo->getName();
	unsigned MinED = abs((int)Name.size() - (int)TypoStr.size());
	if (MinED && TypoStr.size() / MinED < 3)
	return;

	// Compute an upper bound on the allowable edit distance, so that the
	// edit-distance algorithm can short-circuit.
	unsigned UpperBound = (TypoStr.size() + 2) / 3;
	unsigned ED = TypoStr.edit_distance(Name, true, UpperBound);
	if (ED > UpperBound) return;

	TypoCorrection TC(&SemaRef.Context.Idents.get(Name), ND, NNS, ED);
	if (isKeyword) TC.makeKeyword();
	TC.setCorrectionRange(nullptr, Result.getLookupNameInfo());
	addCorrection(TC);
	}

	static const unsigned MaxTypoDistanceResultSets = 5;

	void TypoCorrectionConsumer::addCorrection(TypoCorrection Correction) {
	StringRef TypoStr = Typo->getName();
	StringRef Name = Correction.getCorrectionAsIdentifierInfo()->getName();

	// For very short typos, ignore potential corrections that have a different
	// base identifier from the typo or which have a normalized edit distance
	// longer than the typo itself.
	if (TypoStr.size() < 3 &&
	(Name != TypoStr \|\| Correction.getEditDistance(true) > TypoStr.size()))
	return;

	// If the correction is resolved but is not viable, ignore it.
	if (Correction.isResolved()) {
	checkCorrectionVisibility(SemaRef, Correction);
	if (!Correction \|\| !isCandidateViable(*CorrectionValidator, Correction))
	return;
	}

	TypoResultList &CList =
	CorrectionResults[Correction.getEditDistance(false)][Name];

	if (!CList.empty() && !CList.back().isResolved())
	CList.pop_back();
	if (NamedDecl *NewND = Correction.getCorrectionDecl()) {
	auto RI = llvm::find_if(CList, [NewND](const TypoCorrection &TypoCorr) {
	return TypoCorr.getCorrectionDecl() == NewND;
	});
	if (RI != CList.end()) {
	// The Correction refers to a decl already in the list. No insertion is
	// necessary and all further cases will return.

	auto IsDeprecated = [](Decl *D) {
	while (D) {
	if (D->isDeprecated())
	return true;
	D = llvm::dyn_cast_or_null<NamespaceDecl>(D->getDeclContext());
	}
	return false;
	};

	// Prefer non deprecated Corrections over deprecated and only then
	// sort using an alphabetical order.
	std::pair<bool, std::string> NewKey = {
	IsDeprecated(Correction.getFoundDecl()),
	Correction.getAsString(SemaRef.getLangOpts())};

	std::pair<bool, std::string> PrevKey = {
	IsDeprecated(RI->getFoundDecl()),
	RI->getAsString(SemaRef.getLangOpts())};

	if (NewKey < PrevKey)
	*RI = Correction;
	return;
	}
	}
	if (CList.empty() \|\| Correction.isResolved())
	CList.push_back(Correction);

	while (CorrectionResults.size() > MaxTypoDistanceResultSets)
	CorrectionResults.erase(std::prev(CorrectionResults.end()));
	}

	void TypoCorrectionConsumer::addNamespaces(
	const llvm::MapVector<NamespaceDecl *, bool> &KnownNamespaces) {
	SearchNamespaces = true;

	for (auto KNPair : KnownNamespaces)
	Namespaces.addNameSpecifier(KNPair.first);

	bool SSIsTemplate = false;
	if (NestedNameSpecifier *NNS =
	(SS && SS->isValid()) ? SS->getScopeRep() : nullptr) {
	if (const Type *T = NNS->getAsType())
	SSIsTemplate = T->getTypeClass() == Type::TemplateSpecialization;
	}
	// Do not transform this into an iterator-based loop. The loop body can
	// trigger the creation of further types (through lazy deserialization) and
	// invalid iterators into this list.
	auto &Types = SemaRef.getASTContext().getTypes();
	for (unsigned I = 0; I != Types.size(); ++I) {
	const auto *TI = Types[I];
	if (CXXRecordDecl *CD = TI->getAsCXXRecordDecl()) {
	CD = CD->getCanonicalDecl();
	if (!CD->isDependentType() && !CD->isAnonymousStructOrUnion() &&
	!CD->isUnion() && CD->getIdentifier() &&
	(SSIsTemplate \|\| !isa<ClassTemplateSpecializationDecl>(CD)) &&
	(CD->isBeingDefined() \|\| CD->isCompleteDefinition()))
	Namespaces.addNameSpecifier(CD);
	}
	}
	}

	const TypoCorrection &TypoCorrectionConsumer::getNextCorrection() {
	if (++CurrentTCIndex < ValidatedCorrections.size())
	return ValidatedCorrections[CurrentTCIndex];

	CurrentTCIndex = ValidatedCorrections.size();
	while (!CorrectionResults.empty()) {
	auto DI = CorrectionResults.begin();
	if (DI->second.empty()) {
	CorrectionResults.erase(DI);
	continue;
	}

	auto RI = DI->second.begin();
	if (RI->second.empty()) {
	DI->second.erase(RI);
	performQualifiedLookups();
	continue;
	}

	TypoCorrection TC = RI->second.pop_back_val();
	if (TC.isResolved() \|\| TC.requiresImport() \|\| resolveCorrection(TC)) {
	ValidatedCorrections.push_back(TC);
	return ValidatedCorrections[CurrentTCIndex];
	}
	}
	return ValidatedCorrections[0]; // The empty correction.
	}

	bool TypoCorrectionConsumer::resolveCorrection(TypoCorrection &Candidate) {
	IdentifierInfo *Name = Candidate.getCorrectionAsIdentifierInfo();
	DeclContext *TempMemberContext = MemberContext;
	CXXScopeSpec *TempSS = SS.get();
	retry_lookup:
	LookupPotentialTypoResult(SemaRef, Result, Name, S, TempSS, TempMemberContext,
	EnteringContext,
	CorrectionValidator->IsObjCIvarLookup,
	Name == Typo && !Candidate.WillReplaceSpecifier());
	switch (Result.getResultKind()) {
	case LookupResult::NotFound:
	case LookupResult::NotFoundInCurrentInstantiation:
	case LookupResult::FoundUnresolvedValue:
	if (TempSS) {
	// Immediately retry the lookup without the given CXXScopeSpec
	TempSS = nullptr;
	Candidate.WillReplaceSpecifier(true);
	goto retry_lookup;
	}
	if (TempMemberContext) {
	if (SS && !TempSS)
	TempSS = SS.get();
	TempMemberContext = nullptr;
	goto retry_lookup;
	}
	if (SearchNamespaces)
	QualifiedResults.push_back(Candidate);
	break;

	case LookupResult::Ambiguous:
	// We don't deal with ambiguities.
	break;

	case LookupResult::Found:
	case LookupResult::FoundOverloaded:
	// Store all of the Decls for overloaded symbols
	for (auto *TRD : Result)
	Candidate.addCorrectionDecl(TRD);
	checkCorrectionVisibility(SemaRef, Candidate);
	if (!isCandidateViable(*CorrectionValidator, Candidate)) {
	if (SearchNamespaces)
	QualifiedResults.push_back(Candidate);
	break;
	}
	Candidate.setCorrectionRange(SS.get(), Result.getLookupNameInfo());
	return true;
	}
	return false;
	}

	void TypoCorrectionConsumer::performQualifiedLookups() {
	unsigned TypoLen = Typo->getName().size();
	for (const TypoCorrection &QR : QualifiedResults) {
	for (const auto &NSI : Namespaces) {
	DeclContext *Ctx = NSI.DeclCtx;
	const Type *NSType = NSI.NameSpecifier->getAsType();

	// If the current NestedNameSpecifier refers to a class and the
	// current correction candidate is the name of that class, then skip
	// it as it is unlikely a qualified version of the class' constructor
	// is an appropriate correction.
	if (CXXRecordDecl *NSDecl = NSType ? NSType->getAsCXXRecordDecl() :
	nullptr) {
	if (NSDecl->getIdentifier() == QR.getCorrectionAsIdentifierInfo())
	continue;
	}

	TypoCorrection TC(QR);
	TC.ClearCorrectionDecls();
	TC.setCorrectionSpecifier(NSI.NameSpecifier);
	TC.setQualifierDistance(NSI.EditDistance);
	TC.setCallbackDistance(0); // Reset the callback distance

	// If the current correction candidate and namespace combination are
	// too far away from the original typo based on the normalized edit
	// distance, then skip performing a qualified name lookup.
	unsigned TmpED = TC.getEditDistance(true);
	if (QR.getCorrectionAsIdentifierInfo() != Typo && TmpED &&
	TypoLen / TmpED < 3)
	continue;

	Result.clear();
	Result.setLookupName(QR.getCorrectionAsIdentifierInfo());
	if (!SemaRef.LookupQualifiedName(Result, Ctx))
	continue;

	// Any corrections added below will be validated in subsequent
	// iterations of the main while() loop over the Consumer's contents.
	switch (Result.getResultKind()) {
	case LookupResult::Found:
	case LookupResult::FoundOverloaded: {
	if (SS && SS->isValid()) {
	std::string NewQualified = TC.getAsString(SemaRef.getLangOpts());
	std::string OldQualified;
	llvm::raw_string_ostream OldOStream(OldQualified);
	SS->getScopeRep()->print(OldOStream, SemaRef.getPrintingPolicy());
	OldOStream << Typo->getName();
	// If correction candidate would be an identical written qualified
	// identifier, then the existing CXXScopeSpec probably included a
	// typedef that didn't get accounted for properly.
	if (OldOStream.str() == NewQualified)
	break;
	}
	for (LookupResult::iterator TRD = Result.begin(), TRDEnd = Result.end();
	TRD != TRDEnd; ++TRD) {
	if (SemaRef.CheckMemberAccess(TC.getCorrectionRange().getBegin(),
	NSType ? NSType->getAsCXXRecordDecl()
	: nullptr,
	TRD.getPair()) == Sema::AR_accessible)
	TC.addCorrectionDecl(*TRD);
	}
	if (TC.isResolved()) {
	TC.setCorrectionRange(SS.get(), Result.getLookupNameInfo());
	addCorrection(TC);
	}
	break;
	}
	case LookupResult::NotFound:
	case LookupResult::NotFoundInCurrentInstantiation:
	case LookupResult::Ambiguous:
	case LookupResult::FoundUnresolvedValue:
	break;
	}
	}
	}
	QualifiedResults.clear();
	}

	TypoCorrectionConsumer::NamespaceSpecifierSet::NamespaceSpecifierSet(
	ASTContext &Context, DeclContext CurContext, CXXScopeSpec CurScopeSpec)
	: Context(Context), CurContextChain(buildContextChain(CurContext)) {
	if (NestedNameSpecifier *NNS =
	CurScopeSpec ? CurScopeSpec->getScopeRep() : nullptr) {
	llvm::raw_string_ostream SpecifierOStream(CurNameSpecifier);
	NNS->print(SpecifierOStream, Context.getPrintingPolicy());

	getNestedNameSpecifierIdentifiers(NNS, CurNameSpecifierIdentifiers);
	}
	// Build the list of identifiers that would be used for an absolute
	// (from the global context) NestedNameSpecifier referring to the current
	// context.
	for (DeclContext *C : llvm::reverse(CurContextChain)) {
	if (auto *ND = dyn_cast_or_null<NamespaceDecl>(C))
	CurContextIdentifiers.push_back(ND->getIdentifier());
	}

	// Add the global context as a NestedNameSpecifier
	SpecifierInfo SI = {cast<DeclContext>(Context.getTranslationUnitDecl()),
	NestedNameSpecifier::GlobalSpecifier(Context), 1};
	DistanceMap[1].push_back(SI);
	}

	auto TypoCorrectionConsumer::NamespaceSpecifierSet::buildContextChain(
	DeclContext *Start) -> DeclContextList {
	assert(Start && "Building a context chain from a null context");
	DeclContextList Chain;
	for (DeclContext *DC = Start->getPrimaryContext(); DC != nullptr;
	DC = DC->getLookupParent()) {
	NamespaceDecl *ND = dyn_cast_or_null<NamespaceDecl>(DC);
	if (!DC->isInlineNamespace() && !DC->isTransparentContext() &&
	!(ND && ND->isAnonymousNamespace()))
	Chain.push_back(DC->getPrimaryContext());
	}
	return Chain;
	}

	unsigned
	TypoCorrectionConsumer::NamespaceSpecifierSet::buildNestedNameSpecifier(
	DeclContextList &DeclChain, NestedNameSpecifier *&NNS) {
	unsigned NumSpecifiers = 0;
	for (DeclContext *C : llvm::reverse(DeclChain)) {
	if (auto *ND = dyn_cast_or_null<NamespaceDecl>(C)) {
	NNS = NestedNameSpecifier::Create(Context, NNS, ND);
	++NumSpecifiers;
	} else if (auto *RD = dyn_cast_or_null<RecordDecl>(C)) {
	NNS = NestedNameSpecifier::Create(Context, NNS, RD->isTemplateDecl(),
	RD->getTypeForDecl());
	++NumSpecifiers;
	}
	}
	return NumSpecifiers;
	}

	void TypoCorrectionConsumer::NamespaceSpecifierSet::addNameSpecifier(
	DeclContext *Ctx) {
	NestedNameSpecifier *NNS = nullptr;
	unsigned NumSpecifiers = 0;
	DeclContextList NamespaceDeclChain(buildContextChain(Ctx));
	DeclContextList FullNamespaceDeclChain(NamespaceDeclChain);

	// Eliminate common elements from the two DeclContext chains.
	for (DeclContext *C : llvm::reverse(CurContextChain)) {
	if (NamespaceDeclChain.empty() \|\| NamespaceDeclChain.back() != C)
	break;
	NamespaceDeclChain.pop_back();
	}

	// Build the NestedNameSpecifier from what is left of the NamespaceDeclChain
	NumSpecifiers = buildNestedNameSpecifier(NamespaceDeclChain, NNS);

	// Add an explicit leading '::' specifier if needed.
	if (NamespaceDeclChain.empty()) {
	// Rebuild the NestedNameSpecifier as a globally-qualified specifier.
	NNS = NestedNameSpecifier::GlobalSpecifier(Context);
	NumSpecifiers =
	buildNestedNameSpecifier(FullNamespaceDeclChain, NNS);
	} else if (NamedDecl *ND =
	dyn_cast_or_null<NamedDecl>(NamespaceDeclChain.back())) {
	IdentifierInfo *Name = ND->getIdentifier();
	bool SameNameSpecifier = false;
	if (llvm::is_contained(CurNameSpecifierIdentifiers, Name)) {
	std::string NewNameSpecifier;
	llvm::raw_string_ostream SpecifierOStream(NewNameSpecifier);
	SmallVector<const IdentifierInfo *, 4> NewNameSpecifierIdentifiers;
	getNestedNameSpecifierIdentifiers(NNS, NewNameSpecifierIdentifiers);
	NNS->print(SpecifierOStream, Context.getPrintingPolicy());
	SpecifierOStream.flush();
	SameNameSpecifier = NewNameSpecifier == CurNameSpecifier;
	}
	if (SameNameSpecifier \|\| llvm::is_contained(CurContextIdentifiers, Name)) {
	// Rebuild the NestedNameSpecifier as a globally-qualified specifier.
	NNS = NestedNameSpecifier::GlobalSpecifier(Context);
	NumSpecifiers =
	buildNestedNameSpecifier(FullNamespaceDeclChain, NNS);
	}
	}

	// If the built NestedNameSpecifier would be replacing an existing
	// NestedNameSpecifier, use the number of component identifiers that
	// would need to be changed as the edit distance instead of the number
	// of components in the built NestedNameSpecifier.
	if (NNS && !CurNameSpecifierIdentifiers.empty()) {
	SmallVector<const IdentifierInfo*, 4> NewNameSpecifierIdentifiers;
	getNestedNameSpecifierIdentifiers(NNS, NewNameSpecifierIdentifiers);
	NumSpecifiers =
	llvm::ComputeEditDistance(llvm::ArrayRef(CurNameSpecifierIdentifiers),
	llvm::ArrayRef(NewNameSpecifierIdentifiers));
	}

	SpecifierInfo SI = {Ctx, NNS, NumSpecifiers};
	DistanceMap[NumSpecifiers].push_back(SI);
	}

	/// Perform name lookup for a possible result for typo correction.
	static void LookupPotentialTypoResult(Sema &SemaRef,
	LookupResult &Res,
	IdentifierInfo *Name,
	Scope S, CXXScopeSpec SS,
	DeclContext *MemberContext,
	bool EnteringContext,
	bool isObjCIvarLookup,
	bool FindHidden) {
	Res.suppressDiagnostics();
	Res.clear();
	Res.setLookupName(Name);
	Res.setAllowHidden(FindHidden);
	if (MemberContext) {
	if (ObjCInterfaceDecl *Class = dyn_cast<ObjCInterfaceDecl>(MemberContext)) {
	if (isObjCIvarLookup) {
	if (ObjCIvarDecl *Ivar = Class->lookupInstanceVariable(Name)) {
	Res.addDecl(Ivar);
	Res.resolveKind();
	return;
	}
	}

	if (ObjCPropertyDecl *Prop = Class->FindPropertyDeclaration(
	Name, ObjCPropertyQueryKind::OBJC_PR_query_instance)) {
	Res.addDecl(Prop);
	Res.resolveKind();
	return;
	}
	}

	SemaRef.LookupQualifiedName(Res, MemberContext);
	return;
	}

	SemaRef.LookupParsedName(Res, S, SS,
	/ObjectType=/QualType(),
	/AllowBuiltinCreation=/false, EnteringContext);

	// Fake ivar lookup; this should really be part of
	// LookupParsedName.
	if (ObjCMethodDecl *Method = SemaRef.getCurMethodDecl()) {
	if (Method->isInstanceMethod() && Method->getClassInterface() &&
	(Res.empty() \|\|
	(Res.isSingleResult() &&
	Res.getFoundDecl()->isDefinedOutsideFunctionOrMethod()))) {
	if (ObjCIvarDecl *IV
	= Method->getClassInterface()->lookupInstanceVariable(Name)) {
	Res.addDecl(IV);
	Res.resolveKind();
	}
	}
	}
	}

	/// Add keywords to the consumer as possible typo corrections.
	static void AddKeywordsToConsumer(Sema &SemaRef,
	TypoCorrectionConsumer &Consumer,
	Scope *S, CorrectionCandidateCallback &CCC,
	bool AfterNestedNameSpecifier) {
	if (AfterNestedNameSpecifier) {
	// For 'X::', we know exactly which keywords can appear next.
	Consumer.addKeywordResult("template");
	if (CCC.WantExpressionKeywords)
	Consumer.addKeywordResult("operator");
	return;
	}

	if (CCC.WantObjCSuper)
	Consumer.addKeywordResult("super");

	if (CCC.WantTypeSpecifiers) {
	// Add type-specifier keywords to the set of results.
	static const char *const CTypeSpecs[] = {
	"char", "const", "double", "enum", "float", "int", "long", "short",
	"signed", "struct", "union", "unsigned", "void", "volatile",
	"_Complex",
	// storage-specifiers as well
	"extern", "inline", "static", "typedef"
	};

	for (const auto *CTS : CTypeSpecs)
	Consumer.addKeywordResult(CTS);

	if (SemaRef.getLangOpts().C99 && !SemaRef.getLangOpts().C2y)
	Consumer.addKeywordResult("_Imaginary");

	if (SemaRef.getLangOpts().C99)
	Consumer.addKeywordResult("restrict");
	if (SemaRef.getLangOpts().Bool \|\| SemaRef.getLangOpts().CPlusPlus)
	Consumer.addKeywordResult("bool");
	else if (SemaRef.getLangOpts().C99)
	Consumer.addKeywordResult("_Bool");

	if (SemaRef.getLangOpts().CPlusPlus) {
	Consumer.addKeywordResult("class");
	Consumer.addKeywordResult("typename");
	Consumer.addKeywordResult("wchar_t");

	if (SemaRef.getLangOpts().CPlusPlus11) {
	Consumer.addKeywordResult("char16_t");
	Consumer.addKeywordResult("char32_t");
	Consumer.addKeywordResult("constexpr");
	Consumer.addKeywordResult("decltype");
	Consumer.addKeywordResult("thread_local");
	}
	}

	if (SemaRef.getLangOpts().GNUKeywords)
	Consumer.addKeywordResult("typeof");
	} else if (CCC.WantFunctionLikeCasts) {
	static const char *const CastableTypeSpecs[] = {
	"char", "double", "float", "int", "long", "short",
	"signed", "unsigned", "void"
	};
	for (auto *kw : CastableTypeSpecs)
	Consumer.addKeywordResult(kw);
	}

	if (CCC.WantCXXNamedCasts && SemaRef.getLangOpts().CPlusPlus) {
	Consumer.addKeywordResult("const_cast");
	Consumer.addKeywordResult("dynamic_cast");
	Consumer.addKeywordResult("reinterpret_cast");
	Consumer.addKeywordResult("static_cast");
	}

	if (CCC.WantExpressionKeywords) {
	Consumer.addKeywordResult("sizeof");
	if (SemaRef.getLangOpts().Bool \|\| SemaRef.getLangOpts().CPlusPlus) {
	Consumer.addKeywordResult("false");
	Consumer.addKeywordResult("true");
	}

	if (SemaRef.getLangOpts().CPlusPlus) {
	static const char *const CXXExprs[] = {
	"delete", "new", "operator", "throw", "typeid"
	};
	for (const auto *CE : CXXExprs)
	Consumer.addKeywordResult(CE);

	if (isa<CXXMethodDecl>(SemaRef.CurContext) &&
	cast<CXXMethodDecl>(SemaRef.CurContext)->isInstance())
	Consumer.addKeywordResult("this");

	if (SemaRef.getLangOpts().CPlusPlus11) {
	Consumer.addKeywordResult("alignof");
	Consumer.addKeywordResult("nullptr");
	}
	}

	if (SemaRef.getLangOpts().C11) {
	// FIXME: We should not suggest _Alignof if the alignof macro
	// is present.
	Consumer.addKeywordResult("_Alignof");
	}
	}

	if (CCC.WantRemainingKeywords) {
	if (SemaRef.getCurFunctionOrMethodDecl() \|\| SemaRef.getCurBlock()) {
	// Statements.
	static const char *const CStmts[] = {
	"do", "else", "for", "goto", "if", "return", "switch", "while" };
	for (const auto *CS : CStmts)
	Consumer.addKeywordResult(CS);

	if (SemaRef.getLangOpts().CPlusPlus) {
	Consumer.addKeywordResult("catch");
	Consumer.addKeywordResult("try");
	}

	if (S && S->getBreakParent())
	Consumer.addKeywordResult("break");

	if (S && S->getContinueParent())
	Consumer.addKeywordResult("continue");

	if (SemaRef.getCurFunction() &&
	!SemaRef.getCurFunction()->SwitchStack.empty()) {
	Consumer.addKeywordResult("case");
	Consumer.addKeywordResult("default");
	}
	} else {
	if (SemaRef.getLangOpts().CPlusPlus) {
	Consumer.addKeywordResult("namespace");
	Consumer.addKeywordResult("template");
	}

	if (S && S->isClassScope()) {
	Consumer.addKeywordResult("explicit");
	Consumer.addKeywordResult("friend");
	Consumer.addKeywordResult("mutable");
	Consumer.addKeywordResult("private");
	Consumer.addKeywordResult("protected");
	Consumer.addKeywordResult("public");
	Consumer.addKeywordResult("virtual");
	}
	}

	if (SemaRef.getLangOpts().CPlusPlus) {
	Consumer.addKeywordResult("using");

	if (SemaRef.getLangOpts().CPlusPlus11)
	Consumer.addKeywordResult("static_assert");
	}
	}
	}

	std::unique_ptr<TypoCorrectionConsumer> Sema::makeTypoCorrectionConsumer(
	const DeclarationNameInfo &TypoName, Sema::LookupNameKind LookupKind,
	Scope S, CXXScopeSpec SS, CorrectionCandidateCallback &CCC,
	DeclContext *MemberContext, bool EnteringContext,
	const ObjCObjectPointerType *OPT, bool ErrorRecovery) {

	if (Diags.hasFatalErrorOccurred() \|\| !getLangOpts().SpellChecking \|\|
	DisableTypoCorrection)
	return nullptr;

	// In Microsoft mode, don't perform typo correction in a template member
	// function dependent context because it interferes with the "lookup into
	// dependent bases of class templates" feature.
	if (getLangOpts().MSVCCompat && CurContext->isDependentContext() &&
	isa<CXXMethodDecl>(CurContext))
	return nullptr;

	// We only attempt to correct typos for identifiers.
	IdentifierInfo *Typo = TypoName.getName().getAsIdentifierInfo();
	if (!Typo)
	return nullptr;

	// If the scope specifier itself was invalid, don't try to correct
	// typos.
	if (SS && SS->isInvalid())
	return nullptr;

	// Never try to correct typos during any kind of code synthesis.
	if (!CodeSynthesisContexts.empty())
	return nullptr;

	// Don't try to correct 'super'.
	if (S && S->isInObjcMethodScope() && Typo == getSuperIdentifier())
	return nullptr;

	// Abort if typo correction already failed for this specific typo.
	IdentifierSourceLocations::iterator locs = TypoCorrectionFailures.find(Typo);
	if (locs != TypoCorrectionFailures.end() &&
	locs->second.count(TypoName.getLoc()))
	return nullptr;

	// Don't try to correct the identifier "vector" when in AltiVec mode.
	// TODO: Figure out why typo correction misbehaves in this case, fix it, and
	// remove this workaround.
	if ((getLangOpts().AltiVec \|\| getLangOpts().ZVector) && Typo->isStr("vector"))
	return nullptr;

	// Provide a stop gap for files that are just seriously broken. Trying
	// to correct all typos can turn into a HUGE performance penalty, causing
	// some files to take minutes to get rejected by the parser.
	unsigned Limit = getDiagnostics().getDiagnosticOptions().SpellCheckingLimit;
	if (Limit && TyposCorrected >= Limit)
	return nullptr;
	++TyposCorrected;

	// If we're handling a missing symbol error, using modules, and the
	// special search all modules option is used, look for a missing import.
	if (ErrorRecovery && getLangOpts().Modules &&
	getLangOpts().ModulesSearchAll) {
	// The following has the side effect of loading the missing module.
	getModuleLoader().lookupMissingImports(Typo->getName(),
	TypoName.getBeginLoc());
	}

	// Extend the lifetime of the callback. We delayed this until here
	// to avoid allocations in the hot path (which is where no typo correction
	// occurs). Note that CorrectionCandidateCallback is polymorphic and
	// initially stack-allocated.
	std::unique_ptr<CorrectionCandidateCallback> ClonedCCC = CCC.clone();
	auto Consumer = std::make_unique<TypoCorrectionConsumer>(
	*this, TypoName, LookupKind, S, SS, std::move(ClonedCCC), MemberContext,
	EnteringContext);

	// Perform name lookup to find visible, similarly-named entities.
	bool IsUnqualifiedLookup = false;
	DeclContext *QualifiedDC = MemberContext;
	if (MemberContext) {
	LookupVisibleDecls(MemberContext, LookupKind, *Consumer);

	// Look in qualified interfaces.
	if (OPT) {
	for (auto *I : OPT->quals())
	LookupVisibleDecls(I, LookupKind, *Consumer);
	}
	} else if (SS && SS->isSet()) {
	QualifiedDC = computeDeclContext(*SS, EnteringContext);
	if (!QualifiedDC)
	return nullptr;

	LookupVisibleDecls(QualifiedDC, LookupKind, *Consumer);
	} else {
	IsUnqualifiedLookup = true;
	}

	// Determine whether we are going to search in the various namespaces for
	// corrections.
	bool SearchNamespaces
	= getLangOpts().CPlusPlus &&
	(IsUnqualifiedLookup \|\| (SS && SS->isSet()));

	if (IsUnqualifiedLookup \|\| SearchNamespaces) {
	// For unqualified lookup, look through all of the names that we have
	// seen in this translation unit.
	// FIXME: Re-add the ability to skip very unlikely potential corrections.
	for (const auto &I : Context.Idents)
	Consumer->FoundName(I.getKey());

	// Walk through identifiers in external identifier sources.
	// FIXME: Re-add the ability to skip very unlikely potential corrections.
	if (IdentifierInfoLookup *External
	= Context.Idents.getExternalIdentifierLookup()) {
	std::unique_ptr<IdentifierIterator> Iter(External->getIdentifiers());
	do {
	StringRef Name = Iter->Next();
	if (Name.empty())
	break;

	Consumer->FoundName(Name);
	} while (true);
	}
	}

	AddKeywordsToConsumer(this, Consumer, S,
	*Consumer->getCorrectionValidator(),
	SS && SS->isNotEmpty());

	// Build the NestedNameSpecifiers for the KnownNamespaces, if we're going
	// to search those namespaces.
	if (SearchNamespaces) {
	// Load any externally-known namespaces.
	if (ExternalSource && !LoadedExternalKnownNamespaces) {
	SmallVector<NamespaceDecl *, 4> ExternalKnownNamespaces;
	LoadedExternalKnownNamespaces = true;
	ExternalSource->ReadKnownNamespaces(ExternalKnownNamespaces);
	for (auto *N : ExternalKnownNamespaces)
	KnownNamespaces[N] = true;
	}

	Consumer->addNamespaces(KnownNamespaces);
	}

	return Consumer;
	}

	TypoCorrection Sema::CorrectTypo(const DeclarationNameInfo &TypoName,
	Sema::LookupNameKind LookupKind,
	Scope S, CXXScopeSpec SS,
	CorrectionCandidateCallback &CCC,
	CorrectTypoKind Mode,
	DeclContext *MemberContext,
	bool EnteringContext,
	const ObjCObjectPointerType *OPT,
	bool RecordFailure) {
	// Always let the ExternalSource have the first chance at correction, even
	// if we would otherwise have given up.
	if (ExternalSource) {
	if (TypoCorrection Correction =
	ExternalSource->CorrectTypo(TypoName, LookupKind, S, SS, CCC,
	MemberContext, EnteringContext, OPT))
	return Correction;
	}

	// Ugly hack equivalent to CTC == CTC_ObjCMessageReceiver;
	// WantObjCSuper is only true for CTC_ObjCMessageReceiver and for
	// some instances of CTC_Unknown, while WantRemainingKeywords is true
	// for CTC_Unknown but not for CTC_ObjCMessageReceiver.
	bool ObjCMessageReceiver = CCC.WantObjCSuper && !CCC.WantRemainingKeywords;

	IdentifierInfo *Typo = TypoName.getName().getAsIdentifierInfo();
	auto Consumer = makeTypoCorrectionConsumer(TypoName, LookupKind, S, SS, CCC,
	MemberContext, EnteringContext,
	OPT, Mode == CTK_ErrorRecovery);

	if (!Consumer)
	return TypoCorrection();

	// If we haven't found anything, we're done.
	if (Consumer->empty())
	return FailedCorrection(Typo, TypoName.getLoc(), RecordFailure);

	// Make sure the best edit distance (prior to adding any namespace qualifiers)
	// is not more that about a third of the length of the typo's identifier.
	unsigned ED = Consumer->getBestEditDistance(true);
	unsigned TypoLen = Typo->getName().size();
	if (ED > 0 && TypoLen / ED < 3)
	return FailedCorrection(Typo, TypoName.getLoc(), RecordFailure);

	TypoCorrection BestTC = Consumer->getNextCorrection();
	TypoCorrection SecondBestTC = Consumer->getNextCorrection();
	if (!BestTC)
	return FailedCorrection(Typo, TypoName.getLoc(), RecordFailure);

	ED = BestTC.getEditDistance();

	if (TypoLen >= 3 && ED > 0 && TypoLen / ED < 3) {
	// If this was an unqualified lookup and we believe the callback
	// object wouldn't have filtered out possible corrections, note
	// that no correction was found.
	return FailedCorrection(Typo, TypoName.getLoc(), RecordFailure);
	}

	// If only a single name remains, return that result.
	if (!SecondBestTC \|\|
	SecondBestTC.getEditDistance(false) > BestTC.getEditDistance(false)) {
	const TypoCorrection &Result = BestTC;

	// Don't correct to a keyword that's the same as the typo; the keyword
	// wasn't actually in scope.
	if (ED == 0 && Result.isKeyword())
	return FailedCorrection(Typo, TypoName.getLoc(), RecordFailure);

	TypoCorrection TC = Result;
	TC.setCorrectionRange(SS, TypoName);
	checkCorrectionVisibility(*this, TC);
	return TC;
	} else if (SecondBestTC && ObjCMessageReceiver) {
	// Prefer 'super' when we're completing in a message-receiver
	// context.

	if (BestTC.getCorrection().getAsString() != "super") {
	if (SecondBestTC.getCorrection().getAsString() == "super")
	BestTC = SecondBestTC;
	else if ((*Consumer)["super"].front().isKeyword())
	BestTC = (*Consumer)["super"].front();
	}
	// Don't correct to a keyword that's the same as the typo; the keyword
	// wasn't actually in scope.
	if (BestTC.getEditDistance() == 0 \|\|
	BestTC.getCorrection().getAsString() != "super")
	return FailedCorrection(Typo, TypoName.getLoc(), RecordFailure);

	BestTC.setCorrectionRange(SS, TypoName);
	return BestTC;
	}

	// Record the failure's location if needed and return an empty correction. If
	// this was an unqualified lookup and we believe the callback object did not
	// filter out possible corrections, also cache the failure for the typo.
	return FailedCorrection(Typo, TypoName.getLoc(), RecordFailure && !SecondBestTC);
	}

	TypoExpr *Sema::CorrectTypoDelayed(
	const DeclarationNameInfo &TypoName, Sema::LookupNameKind LookupKind,
	Scope S, CXXScopeSpec SS, CorrectionCandidateCallback &CCC,
	TypoDiagnosticGenerator TDG, TypoRecoveryCallback TRC, CorrectTypoKind Mode,
	DeclContext *MemberContext, bool EnteringContext,
	const ObjCObjectPointerType *OPT) {
	auto Consumer = makeTypoCorrectionConsumer(TypoName, LookupKind, S, SS, CCC,
	MemberContext, EnteringContext,
	OPT, Mode == CTK_ErrorRecovery);

	// Give the external sema source a chance to correct the typo.
	TypoCorrection ExternalTypo;
	if (ExternalSource && Consumer) {
	ExternalTypo = ExternalSource->CorrectTypo(
	TypoName, LookupKind, S, SS, *Consumer->getCorrectionValidator(),
	MemberContext, EnteringContext, OPT);
	if (ExternalTypo)
	Consumer->addCorrection(ExternalTypo);
	}

	if (!Consumer \|\| Consumer->empty())
	return nullptr;

	// Make sure the best edit distance (prior to adding any namespace qualifiers)
	// is not more that about a third of the length of the typo's identifier.
	unsigned ED = Consumer->getBestEditDistance(true);
	IdentifierInfo *Typo = TypoName.getName().getAsIdentifierInfo();
	if (!ExternalTypo && ED > 0 && Typo->getName().size() / ED < 3)
	return nullptr;
	ExprEvalContexts.back().NumTypos++;
	return createDelayedTypo(std::move(Consumer), std::move(TDG), std::move(TRC),
	TypoName.getLoc());
	}

	void TypoCorrection::addCorrectionDecl(NamedDecl *CDecl) {
	if (!CDecl) return;

	if (isKeyword())
	CorrectionDecls.clear();

	CorrectionDecls.push_back(CDecl);

	if (!CorrectionName)
	CorrectionName = CDecl->getDeclName();
	}

	std::string TypoCorrection::getAsString(const LangOptions &LO) const {
	if (CorrectionNameSpec) {
	std::string tmpBuffer;
	llvm::raw_string_ostream PrefixOStream(tmpBuffer);
	CorrectionNameSpec->print(PrefixOStream, PrintingPolicy(LO));
	PrefixOStream << CorrectionName;
	return PrefixOStream.str();
	}

	return CorrectionName.getAsString();
	}

	bool CorrectionCandidateCallback::ValidateCandidate(
	const TypoCorrection &candidate) {
	if (!candidate.isResolved())
	return true;

	if (candidate.isKeyword())
	return WantTypeSpecifiers \|\| WantExpressionKeywords \|\| WantCXXNamedCasts \|\|
	WantRemainingKeywords \|\| WantObjCSuper;

	bool HasNonType = false;
	bool HasStaticMethod = false;
	bool HasNonStaticMethod = false;
	for (Decl *D : candidate) {
	if (FunctionTemplateDecl *FTD = dyn_cast<FunctionTemplateDecl>(D))
	D = FTD->getTemplatedDecl();
	if (CXXMethodDecl *Method = dyn_cast<CXXMethodDecl>(D)) {
	if (Method->isStatic())
	HasStaticMethod = true;
	else
	HasNonStaticMethod = true;
	}
	if (!isa<TypeDecl>(D))
	HasNonType = true;
	}

	if (IsAddressOfOperand && HasNonStaticMethod && !HasStaticMethod &&
	!candidate.getCorrectionSpecifier())
	return false;

	return WantTypeSpecifiers \|\| HasNonType;
	}

	FunctionCallFilterCCC::FunctionCallFilterCCC(Sema &SemaRef, unsigned NumArgs,
	bool HasExplicitTemplateArgs,
	MemberExpr *ME)
	: NumArgs(NumArgs), HasExplicitTemplateArgs(HasExplicitTemplateArgs),
	CurContext(SemaRef.CurContext), MemberFn(ME) {
	WantTypeSpecifiers = false;
	WantFunctionLikeCasts = SemaRef.getLangOpts().CPlusPlus &&
	!HasExplicitTemplateArgs && NumArgs == 1;
	WantCXXNamedCasts = HasExplicitTemplateArgs && NumArgs == 1;
	WantRemainingKeywords = false;
	}

	bool FunctionCallFilterCCC::ValidateCandidate(const TypoCorrection &candidate) {
	if (!candidate.getCorrectionDecl())
	return candidate.isKeyword();

	for (auto *C : candidate) {
	FunctionDecl *FD = nullptr;
	NamedDecl *ND = C->getUnderlyingDecl();
	if (FunctionTemplateDecl *FTD = dyn_cast<FunctionTemplateDecl>(ND))
	FD = FTD->getTemplatedDecl();
	if (!HasExplicitTemplateArgs && !FD) {
	if (!(FD = dyn_cast<FunctionDecl>(ND)) && isa<ValueDecl>(ND)) {
	// If the Decl is neither a function nor a template function,
	// determine if it is a pointer or reference to a function. If so,
	// check against the number of arguments expected for the pointee.
	QualType ValType = cast<ValueDecl>(ND)->getType();
	if (ValType.isNull())
	continue;
	if (ValType->isAnyPointerType() \|\| ValType->isReferenceType())
	ValType = ValType->getPointeeType();
	if (const FunctionProtoType *FPT = ValType->getAs<FunctionProtoType>())
	if (FPT->getNumParams() == NumArgs)
	return true;
	}
	}

	// A typo for a function-style cast can look like a function call in C++.
	if ((HasExplicitTemplateArgs ? getAsTypeTemplateDecl(ND) != nullptr
	: isa<TypeDecl>(ND)) &&
	CurContext->getParentASTContext().getLangOpts().CPlusPlus)
	// Only a class or class template can take two or more arguments.
	return NumArgs <= 1 \|\| HasExplicitTemplateArgs \|\| isa<CXXRecordDecl>(ND);

	// Skip the current candidate if it is not a FunctionDecl or does not accept
	// the current number of arguments.
	if (!FD \|\| !(FD->getNumParams() >= NumArgs &&
	FD->getMinRequiredArguments() <= NumArgs))
	continue;

	// If the current candidate is a non-static C++ method, skip the candidate
	// unless the method being corrected--or the current DeclContext, if the
	// function being corrected is not a method--is a method in the same class
	// or a descendent class of the candidate's parent class.
	if (const auto *MD = dyn_cast<CXXMethodDecl>(FD)) {
	if (MemberFn \|\| !MD->isStatic()) {
	const auto *CurMD =
	MemberFn
	? dyn_cast_if_present<CXXMethodDecl>(MemberFn->getMemberDecl())
	: dyn_cast_if_present<CXXMethodDecl>(CurContext);
	const CXXRecordDecl *CurRD =
	CurMD ? CurMD->getParent()->getCanonicalDecl() : nullptr;
	const CXXRecordDecl *RD = MD->getParent()->getCanonicalDecl();
	if (!CurRD \|\| (CurRD != RD && !CurRD->isDerivedFrom(RD)))
	continue;
	}
	}
	return true;
	}
	return false;
	}

	void Sema::diagnoseTypo(const TypoCorrection &Correction,
	const PartialDiagnostic &TypoDiag,
	bool ErrorRecovery) {
	diagnoseTypo(Correction, TypoDiag, PDiag(diag::note_previous_decl),
	ErrorRecovery);
	}

	/// Find which declaration we should import to provide the definition of
	/// the given declaration.
	static const NamedDecl getDefinitionToImport(const NamedDecl D) {
	if (const auto *VD = dyn_cast<VarDecl>(D))
	return VD->getDefinition();
	if (const auto *FD = dyn_cast<FunctionDecl>(D))
	return FD->getDefinition();
	if (const auto *TD = dyn_cast<TagDecl>(D))
	return TD->getDefinition();
	if (const auto *ID = dyn_cast<ObjCInterfaceDecl>(D))
	return ID->getDefinition();
	if (const auto *PD = dyn_cast<ObjCProtocolDecl>(D))
	return PD->getDefinition();
	if (const auto *TD = dyn_cast<TemplateDecl>(D))
	if (const NamedDecl *TTD = TD->getTemplatedDecl())
	return getDefinitionToImport(TTD);
	return nullptr;
	}

	void Sema::diagnoseMissingImport(SourceLocation Loc, const NamedDecl *Decl,
	MissingImportKind MIK, bool Recover) {
	// Suggest importing a module providing the definition of this entity, if
	// possible.
	const NamedDecl *Def = getDefinitionToImport(Decl);
	if (!Def)
	Def = Decl;

	Module *Owner = getOwningModule(Def);
	assert(Owner && "definition of hidden declaration is not in a module");

	llvm::SmallVector<Module*, 8> OwningModules;
	OwningModules.push_back(Owner);
	auto Merged = Context.getModulesWithMergedDefinition(Def);
	OwningModules.insert(OwningModules.end(), Merged.begin(), Merged.end());

	diagnoseMissingImport(Loc, Def, Def->getLocation(), OwningModules, MIK,
	Recover);
	}

	/// Get a "quoted.h" or <angled.h> include path to use in a diagnostic
	/// suggesting the addition of a #include of the specified file.
	static std::string getHeaderNameForHeader(Preprocessor &PP, FileEntryRef E,
	llvm::StringRef IncludingFile) {
	bool IsAngled = false;
	auto Path = PP.getHeaderSearchInfo().suggestPathToFileForDiagnostics(
	E, IncludingFile, &IsAngled);
	return (IsAngled ? '<' : '"') + Path + (IsAngled ? '>' : '"');
	}

	void Sema::diagnoseMissingImport(SourceLocation UseLoc, const NamedDecl *Decl,
	SourceLocation DeclLoc,
	ArrayRef<Module *> Modules,
	MissingImportKind MIK, bool Recover) {
	assert(!Modules.empty());

	// See https://github.com/llvm/llvm-project/issues/73893. It is generally
	// confusing than helpful to show the namespace is not visible.
	if (isa<NamespaceDecl>(Decl))
	return;

	auto NotePrevious = [&] {
	// FIXME: Suppress the note backtrace even under
	// -fdiagnostics-show-note-include-stack. We don't care how this
	// declaration was previously reached.
	Diag(DeclLoc, diag::note_unreachable_entity) << (int)MIK;
	};

	// Weed out duplicates from module list.
	llvm::SmallVector<Module*, 8> UniqueModules;
	llvm::SmallDenseSet<Module*, 8> UniqueModuleSet;
	for (auto *M : Modules) {
	if (M->isExplicitGlobalModule() \|\| M->isPrivateModule())
	continue;
	if (UniqueModuleSet.insert(M).second)
	UniqueModules.push_back(M);
	}

	// Try to find a suitable header-name to #include.
	std::string HeaderName;
	if (OptionalFileEntryRef Header =
	PP.getHeaderToIncludeForDiagnostics(UseLoc, DeclLoc)) {
	if (const FileEntry *FE =
	SourceMgr.getFileEntryForID(SourceMgr.getFileID(UseLoc)))
	HeaderName =
	getHeaderNameForHeader(PP, *Header, FE->tryGetRealPathName());
	}

	// If we have a #include we should suggest, or if all definition locations
	// were in global module fragments, don't suggest an import.
	if (!HeaderName.empty() \|\| UniqueModules.empty()) {
	// FIXME: Find a smart place to suggest inserting a #include, and add
	// a FixItHint there.
	Diag(UseLoc, diag::err_module_unimported_use_header)
	<< (int)MIK << Decl << !HeaderName.empty() << HeaderName;
	// Produce a note showing where the entity was declared.
	NotePrevious();
	if (Recover)
	createImplicitModuleImportForErrorRecovery(UseLoc, Modules[0]);
	return;
	}

	Modules = UniqueModules;

	auto GetModuleNameForDiagnostic = [this](const Module *M) -> std::string {
	if (M->isModuleMapModule())
	return M->getFullModuleName();

	if (M->isImplicitGlobalModule())
	M = M->getTopLevelModule();

	// If the current module unit is in the same module with M, it is OK to show
	// the partition name. Otherwise, it'll be sufficient to show the primary
	// module name.
	if (getASTContext().isInSameModule(M, getCurrentModule()))
	return M->getTopLevelModuleName().str();
	else
	return M->getPrimaryModuleInterfaceName().str();
	};

	if (Modules.size() > 1) {
	std::string ModuleList;
	unsigned N = 0;
	for (const auto *M : Modules) {
	ModuleList += "\n ";
	if (++N == 5 && N != Modules.size()) {
	ModuleList += "[...]";
	break;
	}
	ModuleList += GetModuleNameForDiagnostic(M);
	}

	Diag(UseLoc, diag::err_module_unimported_use_multiple)
	<< (int)MIK << Decl << ModuleList;
	} else {
	// FIXME: Add a FixItHint that imports the corresponding module.
	Diag(UseLoc, diag::err_module_unimported_use)
	<< (int)MIK << Decl << GetModuleNameForDiagnostic(Modules[0]);
	}

	NotePrevious();

	// Try to recover by implicitly importing this module.
	if (Recover)
	createImplicitModuleImportForErrorRecovery(UseLoc, Modules[0]);
	}

	void Sema::diagnoseTypo(const TypoCorrection &Correction,
	const PartialDiagnostic &TypoDiag,
	const PartialDiagnostic &PrevNote,
	bool ErrorRecovery) {
	std::string CorrectedStr = Correction.getAsString(getLangOpts());
	std::string CorrectedQuotedStr = Correction.getQuoted(getLangOpts());
	FixItHint FixTypo = FixItHint::CreateReplacement(
	Correction.getCorrectionRange(), CorrectedStr);

	// Maybe we're just missing a module import.
	if (Correction.requiresImport()) {
	NamedDecl *Decl = Correction.getFoundDecl();
	assert(Decl && "import required but no declaration to import");

	diagnoseMissingImport(Correction.getCorrectionRange().getBegin(), Decl,
	MissingImportKind::Declaration, ErrorRecovery);
	return;
	}

	Diag(Correction.getCorrectionRange().getBegin(), TypoDiag)
	<< CorrectedQuotedStr << (ErrorRecovery ? FixTypo : FixItHint());

	NamedDecl *ChosenDecl =
	Correction.isKeyword() ? nullptr : Correction.getFoundDecl();

	// For builtin functions which aren't declared anywhere in source,
	// don't emit the "declared here" note.
	if (const auto *FD = dyn_cast_if_present<FunctionDecl>(ChosenDecl);
	FD && FD->getBuiltinID() &&
	PrevNote.getDiagID() == diag::note_previous_decl &&
	Correction.getCorrectionRange().getBegin() == FD->getBeginLoc()) {
	ChosenDecl = nullptr;
	}

	if (PrevNote.getDiagID() && ChosenDecl)
	Diag(ChosenDecl->getLocation(), PrevNote)
	<< CorrectedQuotedStr << (ErrorRecovery ? FixItHint() : FixTypo);

	// Add any extra diagnostics.
	for (const PartialDiagnostic &PD : Correction.getExtraDiagnostics())
	Diag(Correction.getCorrectionRange().getBegin(), PD);
	}

	TypoExpr *Sema::createDelayedTypo(std::unique_ptr<TypoCorrectionConsumer> TCC,
	TypoDiagnosticGenerator TDG,
	TypoRecoveryCallback TRC,
	SourceLocation TypoLoc) {
	assert(TCC && "createDelayedTypo requires a valid TypoCorrectionConsumer");
	auto TE = new (Context) TypoExpr(Context.DependentTy, TypoLoc);
	auto &State = DelayedTypos[TE];
	State.Consumer = std::move(TCC);
	State.DiagHandler = std::move(TDG);
	State.RecoveryHandler = std::move(TRC);
	if (TE)
	TypoExprs.push_back(TE);
	return TE;
	}

	const Sema::TypoExprState &Sema::getTypoExprState(TypoExpr *TE) const {
	auto Entry = DelayedTypos.find(TE);
	assert(Entry != DelayedTypos.end() &&
	"Failed to get the state for a TypoExpr!");
	return Entry->second;
	}

	void Sema::clearDelayedTypo(TypoExpr *TE) {
	DelayedTypos.erase(TE);
	}

	void Sema::ActOnPragmaDump(Scope S, SourceLocation IILoc, IdentifierInfo II) {
	DeclarationNameInfo Name(II, IILoc);
	LookupResult R(*this, Name, LookupAnyName,
	RedeclarationKind::NotForRedeclaration);
	R.suppressDiagnostics();
	R.setHideTags(false);
	LookupName(R, S);
	R.dump();
	}

	void Sema::ActOnPragmaDump(Expr *E) {
	E->dump();
	}

	RedeclarationKind Sema::forRedeclarationInCurContext() const {
	// A declaration with an owning module for linkage can never link against
	// anything that is not visible. We don't need to check linkage here; if
	// the context has internal linkage, redeclaration lookup won't find things
	// from other TUs, and we can't safely compute linkage yet in general.
	if (cast<Decl>(CurContext)->getOwningModuleForLinkage())
	return RedeclarationKind::ForVisibleRedeclaration;
	return RedeclarationKind::ForExternalRedeclaration;
	}
	diff --git a/contrib/llvm-project/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp b/contrib/llvm-project/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp
	index 62a240ecbc60..c11468a08ae5 100644
	--- a/contrib/llvm-project/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp
	+++ b/contrib/llvm-project/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp
	@@ -1,3968 +1,3965 @@
	//===- ExprEngine.cpp - Path-Sensitive Expression-Level Dataflow ----------===//
	//
	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
	// See https://llvm.org/LICENSE.txt for license information.
	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
	//
	//===----------------------------------------------------------------------===//
	//
	// This file defines a meta-engine for path-sensitive dataflow analysis that
	// is built on CoreEngine, but provides the boilerplate to execute transfer
	// functions and build the ExplodedGraph at the expression level.
	//
	//===----------------------------------------------------------------------===//

	#include "clang/StaticAnalyzer/Core/PathSensitive/ExprEngine.h"
	#include "PrettyStackTraceLocationContext.h"
	#include "clang/AST/ASTContext.h"
	#include "clang/AST/Decl.h"
	#include "clang/AST/DeclBase.h"
	#include "clang/AST/DeclCXX.h"
	#include "clang/AST/DeclObjC.h"
	#include "clang/AST/Expr.h"
	#include "clang/AST/ExprCXX.h"
	#include "clang/AST/ExprObjC.h"
	#include "clang/AST/ParentMap.h"
	#include "clang/AST/PrettyPrinter.h"
	#include "clang/AST/Stmt.h"
	#include "clang/AST/StmtCXX.h"
	#include "clang/AST/StmtObjC.h"
	#include "clang/AST/Type.h"
	#include "clang/Analysis/AnalysisDeclContext.h"
	#include "clang/Analysis/CFG.h"
	#include "clang/Analysis/ConstructionContext.h"
	#include "clang/Analysis/ProgramPoint.h"
	#include "clang/Basic/IdentifierTable.h"
	#include "clang/Basic/JsonSupport.h"
	#include "clang/Basic/LLVM.h"
	#include "clang/Basic/LangOptions.h"
	#include "clang/Basic/PrettyStackTrace.h"
	#include "clang/Basic/SourceLocation.h"
	#include "clang/Basic/SourceManager.h"
	#include "clang/Basic/Specifiers.h"
	#include "clang/StaticAnalyzer/Core/AnalyzerOptions.h"
	#include "clang/StaticAnalyzer/Core/BugReporter/BugReporter.h"
	#include "clang/StaticAnalyzer/Core/BugReporter/BugType.h"
	#include "clang/StaticAnalyzer/Core/CheckerManager.h"
	#include "clang/StaticAnalyzer/Core/PathSensitive/AnalysisManager.h"
	#include "clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h"
	#include "clang/StaticAnalyzer/Core/PathSensitive/ConstraintManager.h"
	#include "clang/StaticAnalyzer/Core/PathSensitive/CoreEngine.h"
	#include "clang/StaticAnalyzer/Core/PathSensitive/DynamicExtent.h"
	#include "clang/StaticAnalyzer/Core/PathSensitive/ExplodedGraph.h"
	#include "clang/StaticAnalyzer/Core/PathSensitive/LoopUnrolling.h"
	#include "clang/StaticAnalyzer/Core/PathSensitive/LoopWidening.h"
	#include "clang/StaticAnalyzer/Core/PathSensitive/MemRegion.h"
	#include "clang/StaticAnalyzer/Core/PathSensitive/ProgramState.h"
	#include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h"
	#include "clang/StaticAnalyzer/Core/PathSensitive/ProgramState_Fwd.h"
	#include "clang/StaticAnalyzer/Core/PathSensitive/SValBuilder.h"
	#include "clang/StaticAnalyzer/Core/PathSensitive/SVals.h"
	#include "clang/StaticAnalyzer/Core/PathSensitive/Store.h"
	#include "clang/StaticAnalyzer/Core/PathSensitive/SymExpr.h"
	#include "clang/StaticAnalyzer/Core/PathSensitive/SymbolManager.h"
	#include "llvm/ADT/APSInt.h"
	#include "llvm/ADT/DenseMap.h"
	#include "llvm/ADT/ImmutableMap.h"
	#include "llvm/ADT/ImmutableSet.h"
	#include "llvm/ADT/STLExtras.h"
	#include "llvm/ADT/SmallVector.h"
	#include "llvm/ADT/Statistic.h"
	#include "llvm/Support/Casting.h"
	#include "llvm/Support/Compiler.h"
	#include "llvm/Support/DOTGraphTraits.h"
	#include "llvm/Support/ErrorHandling.h"
	#include "llvm/Support/GraphWriter.h"
	#include "llvm/Support/SaveAndRestore.h"
	#include "llvm/Support/raw_ostream.h"
	#include <cassert>
	#include <cstdint>
	#include <memory>
	#include <optional>
	#include <string>
	#include <tuple>
	#include <utility>
	#include <vector>

	using namespace clang;
	using namespace ento;

	#define DEBUG_TYPE "ExprEngine"

	STATISTIC(NumRemoveDeadBindings,
	"The # of times RemoveDeadBindings is called");
	STATISTIC(NumMaxBlockCountReached,
	"The # of aborted paths due to reaching the maximum block count in "
	"a top level function");
	STATISTIC(NumMaxBlockCountReachedInInlined,
	"The # of aborted paths due to reaching the maximum block count in "
	"an inlined function");
	STATISTIC(NumTimesRetriedWithoutInlining,
	"The # of times we re-evaluated a call without inlining");

	//===----------------------------------------------------------------------===//
	// Internal program state traits.
	//===----------------------------------------------------------------------===//

	namespace {

	// When modeling a C++ constructor, for a variety of reasons we need to track
	// the location of the object for the duration of its ConstructionContext.
	// ObjectsUnderConstruction maps statements within the construction context
	// to the object's location, so that on every such statement the location
	// could have been retrieved.

	/// ConstructedObjectKey is used for being able to find the path-sensitive
	/// memory region of a freshly constructed object while modeling the AST node
	/// that syntactically represents the object that is being constructed.
	/// Semantics of such nodes may sometimes require access to the region that's
	/// not otherwise present in the program state, or to the very fact that
	/// the construction context was present and contained references to these
	/// AST nodes.
	class ConstructedObjectKey {
	using ConstructedObjectKeyImpl =
	std::pair<ConstructionContextItem, const LocationContext *>;
	const ConstructedObjectKeyImpl Impl;

	public:
	explicit ConstructedObjectKey(const ConstructionContextItem &Item,
	const LocationContext *LC)
	: Impl(Item, LC) {}

	const ConstructionContextItem &getItem() const { return Impl.first; }
	const LocationContext *getLocationContext() const { return Impl.second; }

	ASTContext &getASTContext() const {
	return getLocationContext()->getDecl()->getASTContext();
	}

	void printJson(llvm::raw_ostream &Out, PrinterHelper *Helper,
	PrintingPolicy &PP) const {
	const Stmt *S = getItem().getStmtOrNull();
	const CXXCtorInitializer *I = nullptr;
	if (!S)
	I = getItem().getCXXCtorInitializer();

	if (S)
	Out << "\"stmt_id\": " << S->getID(getASTContext());
	else
	Out << "\"init_id\": " << I->getID(getASTContext());

	// Kind
	Out << ", \"kind\": \"" << getItem().getKindAsString()
	<< "\", \"argument_index\": ";

	if (getItem().getKind() == ConstructionContextItem::ArgumentKind)
	Out << getItem().getIndex();
	else
	Out << "null";

	// Pretty-print
	Out << ", \"pretty\": ";

	if (S) {
	S->printJson(Out, Helper, PP, /AddQuotes=/true);
	} else {
	Out << '\"' << I->getAnyMember()->getDeclName() << '\"';
	}
	}

	void Profile(llvm::FoldingSetNodeID &ID) const {
	ID.Add(Impl.first);
	ID.AddPointer(Impl.second);
	}

	bool operator==(const ConstructedObjectKey &RHS) const {
	return Impl == RHS.Impl;
	}

	bool operator<(const ConstructedObjectKey &RHS) const {
	return Impl < RHS.Impl;
	}
	};
	} // namespace

	typedef llvm::ImmutableMap<ConstructedObjectKey, SVal>
	ObjectsUnderConstructionMap;
	REGISTER_TRAIT_WITH_PROGRAMSTATE(ObjectsUnderConstruction,
	ObjectsUnderConstructionMap)

	// This trait is responsible for storing the index of the element that is to be
	// constructed in the next iteration. As a result a CXXConstructExpr is only
	// stored if it is array type. Also the index is the index of the continuous
	// memory region, which is important for multi-dimensional arrays. E.g:: int
	// arr[2][2]; assume arr[1][1] will be the next element under construction, so
	// the index is 3.
	typedef llvm::ImmutableMap<
	std::pair<const CXXConstructExpr , const LocationContext >, unsigned>
	IndexOfElementToConstructMap;
	REGISTER_TRAIT_WITH_PROGRAMSTATE(IndexOfElementToConstruct,
	IndexOfElementToConstructMap)

	// This trait is responsible for holding our pending ArrayInitLoopExprs.
	// It pairs the LocationContext and the initializer CXXConstructExpr with
	// the size of the array that's being copy initialized.
	typedef llvm::ImmutableMap<
	std::pair<const CXXConstructExpr , const LocationContext >, unsigned>
	PendingInitLoopMap;
	REGISTER_TRAIT_WITH_PROGRAMSTATE(PendingInitLoop, PendingInitLoopMap)

	typedef llvm::ImmutableMap<const LocationContext *, unsigned>
	PendingArrayDestructionMap;
	REGISTER_TRAIT_WITH_PROGRAMSTATE(PendingArrayDestruction,
	PendingArrayDestructionMap)

	//===----------------------------------------------------------------------===//
	// Engine construction and deletion.
	//===----------------------------------------------------------------------===//

	static const char* TagProviderName = "ExprEngine";

	ExprEngine::ExprEngine(cross_tu::CrossTranslationUnitContext &CTU,
	AnalysisManager &mgr, SetOfConstDecls *VisitedCalleesIn,
	FunctionSummariesTy *FS, InliningModes HowToInlineIn)
	: CTU(CTU), IsCTUEnabled(mgr.getAnalyzerOptions().IsNaiveCTUEnabled),
	AMgr(mgr), AnalysisDeclContexts(mgr.getAnalysisDeclContextManager()),
	Engine(*this, FS, mgr.getAnalyzerOptions()), G(Engine.getGraph()),
	StateMgr(getContext(), mgr.getStoreManagerCreator(),
	mgr.getConstraintManagerCreator(), G.getAllocator(), this),
	SymMgr(StateMgr.getSymbolManager()), MRMgr(StateMgr.getRegionManager()),
	svalBuilder(StateMgr.getSValBuilder()), ObjCNoRet(mgr.getASTContext()),
	BR(mgr, *this), VisitedCallees(VisitedCalleesIn),
	HowToInline(HowToInlineIn) {
	unsigned TrimInterval = mgr.options.GraphTrimInterval;
	if (TrimInterval != 0) {
	// Enable eager node reclamation when constructing the ExplodedGraph.
	G.enableNodeReclamation(TrimInterval);
	}
	}

	//===----------------------------------------------------------------------===//
	// Utility methods.
	//===----------------------------------------------------------------------===//

	ProgramStateRef ExprEngine::getInitialState(const LocationContext *InitLoc) {
	ProgramStateRef state = StateMgr.getInitialState(InitLoc);
	const Decl *D = InitLoc->getDecl();

	// Preconditions.
	// FIXME: It would be nice if we had a more general mechanism to add
	// such preconditions. Some day.
	do {
	if (const auto *FD = dyn_cast<FunctionDecl>(D)) {
	// Precondition: the first argument of 'main' is an integer guaranteed
	// to be > 0.
	const IdentifierInfo *II = FD->getIdentifier();
	if (!II \|\| !(II->getName() == "main" && FD->getNumParams() > 0))
	break;

	const ParmVarDecl *PD = FD->getParamDecl(0);
	QualType T = PD->getType();
	const auto *BT = dyn_cast<BuiltinType>(T);
	if (!BT \|\| !BT->isInteger())
	break;

	const MemRegion *R = state->getRegion(PD, InitLoc);
	if (!R)
	break;

	SVal V = state->getSVal(loc::MemRegionVal(R));
	SVal Constraint_untested = evalBinOp(state, BO_GT, V,
	svalBuilder.makeZeroVal(T),
	svalBuilder.getConditionType());

	std::optional<DefinedOrUnknownSVal> Constraint =
	Constraint_untested.getAs<DefinedOrUnknownSVal>();

	if (!Constraint)
	break;

	if (ProgramStateRef newState = state->assume(*Constraint, true))
	state = newState;
	}
	break;
	}
	while (false);

	if (const auto *MD = dyn_cast<ObjCMethodDecl>(D)) {
	// Precondition: 'self' is always non-null upon entry to an Objective-C
	// method.
	const ImplicitParamDecl *SelfD = MD->getSelfDecl();
	const MemRegion *R = state->getRegion(SelfD, InitLoc);
	SVal V = state->getSVal(loc::MemRegionVal(R));

	if (std::optional<Loc> LV = V.getAs<Loc>()) {
	// Assume that the pointer value in 'self' is non-null.
	state = state->assume(*LV, true);
	assert(state && "'self' cannot be null");
	}
	}

	if (const auto *MD = dyn_cast<CXXMethodDecl>(D)) {
	if (MD->isImplicitObjectMemberFunction()) {
	// Precondition: 'this' is always non-null upon entry to the
	// top-level function. This is our starting assumption for
	// analyzing an "open" program.
	const StackFrameContext *SFC = InitLoc->getStackFrame();
	if (SFC->getParent() == nullptr) {
	loc::MemRegionVal L = svalBuilder.getCXXThis(MD, SFC);
	SVal V = state->getSVal(L);
	if (std::optional<Loc> LV = V.getAs<Loc>()) {
	state = state->assume(*LV, true);
	assert(state && "'this' cannot be null");
	}
	}
	}
	}

	return state;
	}

	ProgramStateRef ExprEngine::createTemporaryRegionIfNeeded(
	ProgramStateRef State, const LocationContext *LC,
	const Expr InitWithAdjustments, const Expr Result,
	const SubRegion **OutRegionWithAdjustments) {
	// FIXME: This function is a hack that works around the quirky AST
	// we're often having with respect to C++ temporaries. If only we modelled
	// the actual execution order of statements properly in the CFG,
	// all the hassle with adjustments would not be necessary,
	// and perhaps the whole function would be removed.
	SVal InitValWithAdjustments = State->getSVal(InitWithAdjustments, LC);
	if (!Result) {
	// If we don't have an explicit result expression, we're in "if needed"
	// mode. Only create a region if the current value is a NonLoc.
	if (!isa<NonLoc>(InitValWithAdjustments)) {
	if (OutRegionWithAdjustments)
	*OutRegionWithAdjustments = nullptr;
	return State;
	}
	Result = InitWithAdjustments;
	} else {
	// We need to create a region no matter what. Make sure we don't try to
	// stuff a Loc into a non-pointer temporary region.
	assert(!isa<Loc>(InitValWithAdjustments) \|\|
	Loc::isLocType(Result->getType()) \|\|
	Result->getType()->isMemberPointerType());
	}

	ProgramStateManager &StateMgr = State->getStateManager();
	MemRegionManager &MRMgr = StateMgr.getRegionManager();
	StoreManager &StoreMgr = StateMgr.getStoreManager();

	// MaterializeTemporaryExpr may appear out of place, after a few field and
	// base-class accesses have been made to the object, even though semantically
	// it is the whole object that gets materialized and lifetime-extended.
	//
	// For example:
	//
	// `-MaterializeTemporaryExpr
	// `-MemberExpr
	// `-CXXTemporaryObjectExpr
	//
	// instead of the more natural
	//
	// `-MemberExpr
	// `-MaterializeTemporaryExpr
	// `-CXXTemporaryObjectExpr
	//
	// Use the usual methods for obtaining the expression of the base object,
	// and record the adjustments that we need to make to obtain the sub-object
	// that the whole expression 'Ex' refers to. This trick is usual,
	// in the sense that CodeGen takes a similar route.

	SmallVector<const Expr *, 2> CommaLHSs;
	SmallVector<SubobjectAdjustment, 2> Adjustments;

	const Expr *Init = InitWithAdjustments->skipRValueSubobjectAdjustments(
	CommaLHSs, Adjustments);

	// Take the region for Init, i.e. for the whole object. If we do not remember
	// the region in which the object originally was constructed, come up with
	// a new temporary region out of thin air and copy the contents of the object
	// (which are currently present in the Environment, because Init is an rvalue)
	// into that region. This is not correct, but it is better than nothing.
	const TypedValueRegion *TR = nullptr;
	if (const auto *MT = dyn_cast<MaterializeTemporaryExpr>(Result)) {
	if (std::optional<SVal> V = getObjectUnderConstruction(State, MT, LC)) {
	State = finishObjectConstruction(State, MT, LC);
	State = State->BindExpr(Result, LC, *V);
	return State;
	} else if (const ValueDecl *VD = MT->getExtendingDecl()) {
	StorageDuration SD = MT->getStorageDuration();
	assert(SD != SD_FullExpression);
	// If this object is bound to a reference with static storage duration, we
	// put it in a different region to prevent "address leakage" warnings.
	if (SD == SD_Static \|\| SD == SD_Thread) {
	TR = MRMgr.getCXXStaticLifetimeExtendedObjectRegion(Init, VD);
	} else {
	TR = MRMgr.getCXXLifetimeExtendedObjectRegion(Init, VD, LC);
	}
	} else {
	assert(MT->getStorageDuration() == SD_FullExpression);
	TR = MRMgr.getCXXTempObjectRegion(Init, LC);
	}
	} else {
	TR = MRMgr.getCXXTempObjectRegion(Init, LC);
	}

	SVal Reg = loc::MemRegionVal(TR);
	SVal BaseReg = Reg;

	// Make the necessary adjustments to obtain the sub-object.
	for (const SubobjectAdjustment &Adj : llvm::reverse(Adjustments)) {
	switch (Adj.Kind) {
	case SubobjectAdjustment::DerivedToBaseAdjustment:
	Reg = StoreMgr.evalDerivedToBase(Reg, Adj.DerivedToBase.BasePath);
	break;
	case SubobjectAdjustment::FieldAdjustment:
	Reg = StoreMgr.getLValueField(Adj.Field, Reg);
	break;
	case SubobjectAdjustment::MemberPointerAdjustment:
	// FIXME: Unimplemented.
	State = State->invalidateRegions(Reg, InitWithAdjustments,
	currBldrCtx->blockCount(), LC, true,
	nullptr, nullptr, nullptr);
	return State;
	}
	}

	// What remains is to copy the value of the object to the new region.
	// FIXME: In other words, what we should always do is copy value of the
	// Init expression (which corresponds to the bigger object) to the whole
	// temporary region TR. However, this value is often no longer present
	// in the Environment. If it has disappeared, we instead invalidate TR.
	// Still, what we can do is assign the value of expression Ex (which
	// corresponds to the sub-object) to the TR's sub-region Reg. At least,
	// values inside Reg would be correct.
	SVal InitVal = State->getSVal(Init, LC);
	if (InitVal.isUnknown()) {
	InitVal = getSValBuilder().conjureSymbolVal(Result, LC, Init->getType(),
	currBldrCtx->blockCount());
	State = State->bindLoc(BaseReg.castAs<Loc>(), InitVal, LC, false);

	// Then we'd need to take the value that certainly exists and bind it
	// over.
	if (InitValWithAdjustments.isUnknown()) {
	// Try to recover some path sensitivity in case we couldn't
	// compute the value.
	InitValWithAdjustments = getSValBuilder().conjureSymbolVal(
	Result, LC, InitWithAdjustments->getType(),
	currBldrCtx->blockCount());
	}
	State =
	State->bindLoc(Reg.castAs<Loc>(), InitValWithAdjustments, LC, false);
	} else {
	State = State->bindLoc(BaseReg.castAs<Loc>(), InitVal, LC, false);
	}

	// The result expression would now point to the correct sub-region of the
	// newly created temporary region. Do this last in order to getSVal of Init
	// correctly in case (Result == Init).
	if (Result->isGLValue()) {
	State = State->BindExpr(Result, LC, Reg);
	} else {
	State = State->BindExpr(Result, LC, InitValWithAdjustments);
	}

	// Notify checkers once for two bindLoc()s.
	State = processRegionChange(State, TR, LC);

	if (OutRegionWithAdjustments)
	*OutRegionWithAdjustments = cast<SubRegion>(Reg.getAsRegion());
	return State;
	}

	ProgramStateRef ExprEngine::setIndexOfElementToConstruct(
	ProgramStateRef State, const CXXConstructExpr *E,
	const LocationContext *LCtx, unsigned Idx) {
	auto Key = std::make_pair(E, LCtx->getStackFrame());

	assert(!State->contains<IndexOfElementToConstruct>(Key) \|\| Idx > 0);

	return State->set<IndexOfElementToConstruct>(Key, Idx);
	}

	std::optional<unsigned>
	ExprEngine::getPendingInitLoop(ProgramStateRef State, const CXXConstructExpr *E,
	const LocationContext *LCtx) {
	const unsigned *V = State->get<PendingInitLoop>({E, LCtx->getStackFrame()});
	return V ? std::make_optional(*V) : std::nullopt;
	}

	ProgramStateRef ExprEngine::removePendingInitLoop(ProgramStateRef State,
	const CXXConstructExpr *E,
	const LocationContext *LCtx) {
	auto Key = std::make_pair(E, LCtx->getStackFrame());

	assert(E && State->contains<PendingInitLoop>(Key));
	return State->remove<PendingInitLoop>(Key);
	}

	ProgramStateRef ExprEngine::setPendingInitLoop(ProgramStateRef State,
	const CXXConstructExpr *E,
	const LocationContext *LCtx,
	unsigned Size) {
	auto Key = std::make_pair(E, LCtx->getStackFrame());

	assert(!State->contains<PendingInitLoop>(Key) && Size > 0);

	return State->set<PendingInitLoop>(Key, Size);
	}

	std::optional<unsigned>
	ExprEngine::getIndexOfElementToConstruct(ProgramStateRef State,
	const CXXConstructExpr *E,
	const LocationContext *LCtx) {
	const unsigned *V =
	State->get<IndexOfElementToConstruct>({E, LCtx->getStackFrame()});
	return V ? std::make_optional(*V) : std::nullopt;
	}

	ProgramStateRef
	ExprEngine::removeIndexOfElementToConstruct(ProgramStateRef State,
	const CXXConstructExpr *E,
	const LocationContext *LCtx) {
	auto Key = std::make_pair(E, LCtx->getStackFrame());

	assert(E && State->contains<IndexOfElementToConstruct>(Key));
	return State->remove<IndexOfElementToConstruct>(Key);
	}

	std::optional<unsigned>
	ExprEngine::getPendingArrayDestruction(ProgramStateRef State,
	const LocationContext *LCtx) {
	assert(LCtx && "LocationContext shouldn't be null!");

	const unsigned *V =
	State->get<PendingArrayDestruction>(LCtx->getStackFrame());
	return V ? std::make_optional(*V) : std::nullopt;
	}

	ProgramStateRef ExprEngine::setPendingArrayDestruction(
	ProgramStateRef State, const LocationContext *LCtx, unsigned Idx) {
	assert(LCtx && "LocationContext shouldn't be null!");

	auto Key = LCtx->getStackFrame();

	return State->set<PendingArrayDestruction>(Key, Idx);
	}

	ProgramStateRef
	ExprEngine::removePendingArrayDestruction(ProgramStateRef State,
	const LocationContext *LCtx) {
	assert(LCtx && "LocationContext shouldn't be null!");

	auto Key = LCtx->getStackFrame();

	assert(LCtx && State->contains<PendingArrayDestruction>(Key));
	return State->remove<PendingArrayDestruction>(Key);
	}

	ProgramStateRef
	ExprEngine::addObjectUnderConstruction(ProgramStateRef State,
	const ConstructionContextItem &Item,
	const LocationContext *LC, SVal V) {
	ConstructedObjectKey Key(Item, LC->getStackFrame());

	const Expr *Init = nullptr;

	if (auto DS = dyn_cast_or_null<DeclStmt>(Item.getStmtOrNull())) {
	if (auto VD = dyn_cast_or_null<VarDecl>(DS->getSingleDecl()))
	Init = VD->getInit();
	}

	if (auto LE = dyn_cast_or_null<LambdaExpr>(Item.getStmtOrNull()))
	Init = *(LE->capture_init_begin() + Item.getIndex());

	if (!Init && !Item.getStmtOrNull())
	Init = Item.getCXXCtorInitializer()->getInit();

	// In an ArrayInitLoopExpr the real initializer is returned by
	// getSubExpr(). Note that AILEs can be nested in case of
	// multidimesnional arrays.
	if (const auto *AILE = dyn_cast_or_null<ArrayInitLoopExpr>(Init))
	Init = extractElementInitializerFromNestedAILE(AILE);

	// FIXME: Currently the state might already contain the marker due to
	// incorrect handling of temporaries bound to default parameters.
	// The state will already contain the marker if we construct elements
	// in an array, as we visit the same statement multiple times before
	// the array declaration. The marker is removed when we exit the
	// constructor call.
	assert((!State->get<ObjectsUnderConstruction>(Key) \|\|
	Key.getItem().getKind() ==
	ConstructionContextItem::TemporaryDestructorKind \|\|
	State->contains<IndexOfElementToConstruct>(
	{dyn_cast_or_null<CXXConstructExpr>(Init), LC})) &&
	"The object is already marked as `UnderConstruction`, when it's not "
	"supposed to!");
	return State->set<ObjectsUnderConstruction>(Key, V);
	}

	std::optional<SVal>
	ExprEngine::getObjectUnderConstruction(ProgramStateRef State,
	const ConstructionContextItem &Item,
	const LocationContext *LC) {
	ConstructedObjectKey Key(Item, LC->getStackFrame());
	const SVal *V = State->get<ObjectsUnderConstruction>(Key);
	return V ? std::make_optional(*V) : std::nullopt;
	}

	ProgramStateRef
	ExprEngine::finishObjectConstruction(ProgramStateRef State,
	const ConstructionContextItem &Item,
	const LocationContext *LC) {
	ConstructedObjectKey Key(Item, LC->getStackFrame());
	assert(State->contains<ObjectsUnderConstruction>(Key));
	return State->remove<ObjectsUnderConstruction>(Key);
	}

	ProgramStateRef ExprEngine::elideDestructor(ProgramStateRef State,
	const CXXBindTemporaryExpr *BTE,
	const LocationContext *LC) {
	ConstructedObjectKey Key({BTE, /IsElided=/true}, LC);
	// FIXME: Currently the state might already contain the marker due to
	// incorrect handling of temporaries bound to default parameters.
	return State->set<ObjectsUnderConstruction>(Key, UnknownVal());
	}

	ProgramStateRef
	ExprEngine::cleanupElidedDestructor(ProgramStateRef State,
	const CXXBindTemporaryExpr *BTE,
	const LocationContext *LC) {
	ConstructedObjectKey Key({BTE, /IsElided=/true}, LC);
	assert(State->contains<ObjectsUnderConstruction>(Key));
	return State->remove<ObjectsUnderConstruction>(Key);
	}

	bool ExprEngine::isDestructorElided(ProgramStateRef State,
	const CXXBindTemporaryExpr *BTE,
	const LocationContext *LC) {
	ConstructedObjectKey Key({BTE, /IsElided=/true}, LC);
	return State->contains<ObjectsUnderConstruction>(Key);
	}

	bool ExprEngine::areAllObjectsFullyConstructed(ProgramStateRef State,
	const LocationContext *FromLC,
	const LocationContext *ToLC) {
	const LocationContext *LC = FromLC;
	while (LC != ToLC) {
	assert(LC && "ToLC must be a parent of FromLC!");
	for (auto I : State->get<ObjectsUnderConstruction>())
	if (I.first.getLocationContext() == LC)
	return false;

	LC = LC->getParent();
	}
	return true;
	}


	//===----------------------------------------------------------------------===//
	// Top-level transfer function logic (Dispatcher).
	//===----------------------------------------------------------------------===//

	/// evalAssume - Called by ConstraintManager. Used to call checker-specific
	/// logic for handling assumptions on symbolic values.
	ProgramStateRef ExprEngine::processAssume(ProgramStateRef state,
	SVal cond, bool assumption) {
	return getCheckerManager().runCheckersForEvalAssume(state, cond, assumption);
	}

	ProgramStateRef
	ExprEngine::processRegionChanges(ProgramStateRef state,
	const InvalidatedSymbols *invalidated,
	ArrayRef<const MemRegion *> Explicits,
	ArrayRef<const MemRegion *> Regions,
	const LocationContext *LCtx,
	const CallEvent *Call) {
	return getCheckerManager().runCheckersForRegionChanges(state, invalidated,
	Explicits, Regions,
	LCtx, Call);
	}

	static void
	printObjectsUnderConstructionJson(raw_ostream &Out, ProgramStateRef State,
	const char NL, const LocationContext LCtx,
	unsigned int Space = 0, bool IsDot = false) {
	PrintingPolicy PP =
	LCtx->getAnalysisDeclContext()->getASTContext().getPrintingPolicy();

	++Space;
	bool HasItem = false;

	// Store the last key.
	const ConstructedObjectKey *LastKey = nullptr;
	for (const auto &I : State->get<ObjectsUnderConstruction>()) {
	const ConstructedObjectKey &Key = I.first;
	if (Key.getLocationContext() != LCtx)
	continue;

	if (!HasItem) {
	Out << '[' << NL;
	HasItem = true;
	}

	LastKey = &Key;
	}

	for (const auto &I : State->get<ObjectsUnderConstruction>()) {
	const ConstructedObjectKey &Key = I.first;
	SVal Value = I.second;
	if (Key.getLocationContext() != LCtx)
	continue;

	Indent(Out, Space, IsDot) << "{ ";
	Key.printJson(Out, nullptr, PP);
	Out << ", \"value\": \"" << Value << "\" }";

	if (&Key != LastKey)
	Out << ',';
	Out << NL;
	}

	if (HasItem)
	Indent(Out, --Space, IsDot) << ']'; // End of "location_context".
	else {
	Out << "null ";
	}
	}

	static void printIndicesOfElementsToConstructJson(
	raw_ostream &Out, ProgramStateRef State, const char *NL,
	const LocationContext *LCtx, unsigned int Space = 0, bool IsDot = false) {
	using KeyT = std::pair<const Expr , const LocationContext >;

	const auto &Context = LCtx->getAnalysisDeclContext()->getASTContext();
	PrintingPolicy PP = Context.getPrintingPolicy();

	++Space;
	bool HasItem = false;

	// Store the last key.
	KeyT LastKey;
	for (const auto &I : State->get<IndexOfElementToConstruct>()) {
	const KeyT &Key = I.first;
	if (Key.second != LCtx)
	continue;

	if (!HasItem) {
	Out << '[' << NL;
	HasItem = true;
	}

	LastKey = Key;
	}

	for (const auto &I : State->get<IndexOfElementToConstruct>()) {
	const KeyT &Key = I.first;
	unsigned Value = I.second;
	if (Key.second != LCtx)
	continue;

	Indent(Out, Space, IsDot) << "{ ";

	// Expr
	const Expr *E = Key.first;
	Out << "\"stmt_id\": " << E->getID(Context);

	// Kind
	Out << ", \"kind\": null";

	// Pretty-print
	Out << ", \"pretty\": ";
	Out << "\"" << E->getStmtClassName() << ' '
	<< E->getSourceRange().printToString(Context.getSourceManager()) << " '"
	<< QualType::getAsString(E->getType().split(), PP);
	Out << "'\"";

	Out << ", \"value\": \"Current index: " << Value - 1 << "\" }";

	if (Key != LastKey)
	Out << ',';
	Out << NL;
	}

	if (HasItem)
	Indent(Out, --Space, IsDot) << ']'; // End of "location_context".
	else {
	Out << "null ";
	}
	}

	static void printPendingInitLoopJson(raw_ostream &Out, ProgramStateRef State,
	const char *NL,
	const LocationContext *LCtx,
	unsigned int Space = 0,
	bool IsDot = false) {
	using KeyT = std::pair<const CXXConstructExpr , const LocationContext >;

	const auto &Context = LCtx->getAnalysisDeclContext()->getASTContext();
	PrintingPolicy PP = Context.getPrintingPolicy();

	++Space;
	bool HasItem = false;

	// Store the last key.
	KeyT LastKey;
	for (const auto &I : State->get<PendingInitLoop>()) {
	const KeyT &Key = I.first;
	if (Key.second != LCtx)
	continue;

	if (!HasItem) {
	Out << '[' << NL;
	HasItem = true;
	}

	LastKey = Key;
	}

	for (const auto &I : State->get<PendingInitLoop>()) {
	const KeyT &Key = I.first;
	unsigned Value = I.second;
	if (Key.second != LCtx)
	continue;

	Indent(Out, Space, IsDot) << "{ ";

	const CXXConstructExpr *E = Key.first;
	Out << "\"stmt_id\": " << E->getID(Context);

	Out << ", \"kind\": null";
	Out << ", \"pretty\": ";
	Out << '\"' << E->getStmtClassName() << ' '
	<< E->getSourceRange().printToString(Context.getSourceManager()) << " '"
	<< QualType::getAsString(E->getType().split(), PP);
	Out << "'\"";

	Out << ", \"value\": \"Flattened size: " << Value << "\"}";

	if (Key != LastKey)
	Out << ',';
	Out << NL;
	}

	if (HasItem)
	Indent(Out, --Space, IsDot) << ']'; // End of "location_context".
	else {
	Out << "null ";
	}
	}

	static void
	printPendingArrayDestructionsJson(raw_ostream &Out, ProgramStateRef State,
	const char NL, const LocationContext LCtx,
	unsigned int Space = 0, bool IsDot = false) {
	using KeyT = const LocationContext *;

	++Space;
	bool HasItem = false;

	// Store the last key.
	KeyT LastKey = nullptr;
	for (const auto &I : State->get<PendingArrayDestruction>()) {
	const KeyT &Key = I.first;
	if (Key != LCtx)
	continue;

	if (!HasItem) {
	Out << '[' << NL;
	HasItem = true;
	}

	LastKey = Key;
	}

	for (const auto &I : State->get<PendingArrayDestruction>()) {
	const KeyT &Key = I.first;
	if (Key != LCtx)
	continue;

	Indent(Out, Space, IsDot) << "{ ";

	Out << "\"stmt_id\": null";
	Out << ", \"kind\": null";
	Out << ", \"pretty\": \"Current index: \"";
	Out << ", \"value\": \"" << I.second << "\" }";

	if (Key != LastKey)
	Out << ',';
	Out << NL;
	}

	if (HasItem)
	Indent(Out, --Space, IsDot) << ']'; // End of "location_context".
	else {
	Out << "null ";
	}
	}

	/// A helper function to generalize program state trait printing.
	/// The function invokes Printer as 'Printer(Out, State, NL, LC, Space, IsDot,
	/// std::forward<Args>(args)...)'. \n One possible type for Printer is
	/// 'void()(raw_ostream &, ProgramStateRef, const char *, const LocationContext
	/// *, unsigned int, bool, ...)' \n \param Trait The state trait to be printed.
	/// \param Printer A void function that prints Trait.
	/// \param Args An additional parameter pack that is passed to Print upon
	/// invocation.
	template <typename Trait, typename Printer, typename... Args>
	static void printStateTraitWithLocationContextJson(
	raw_ostream &Out, ProgramStateRef State, const LocationContext *LCtx,
	const char *NL, unsigned int Space, bool IsDot,
	const char *jsonPropertyName, Printer printer, Args &&...args) {

	using RequiredType =
	void ()(raw_ostream &, ProgramStateRef, const char ,
	const LocationContext *, unsigned int, bool, Args &&...);

	// Try to do as much compile time checking as possible.
	// FIXME: check for invocable instead of function?
	static_assert(std::is_function_v<std::remove_pointer_t<Printer>>,
	"Printer is not a function!");
	static_assert(std::is_convertible_v<Printer, RequiredType>,
	"Printer doesn't have the required type!");

	if (LCtx && !State->get<Trait>().isEmpty()) {
	Indent(Out, Space, IsDot) << '\"' << jsonPropertyName << "\": ";
	++Space;
	Out << '[' << NL;
	LCtx->printJson(Out, NL, Space, IsDot, [&](const LocationContext *LC) {
	printer(Out, State, NL, LC, Space, IsDot, std::forward<Args>(args)...);
	});

	--Space;
	Indent(Out, Space, IsDot) << "]," << NL; // End of "jsonPropertyName".
	}
	}

	void ExprEngine::printJson(raw_ostream &Out, ProgramStateRef State,
	const LocationContext LCtx, const char NL,
	unsigned int Space, bool IsDot) const {

	printStateTraitWithLocationContextJson<ObjectsUnderConstruction>(
	Out, State, LCtx, NL, Space, IsDot, "constructing_objects",
	printObjectsUnderConstructionJson);
	printStateTraitWithLocationContextJson<IndexOfElementToConstruct>(
	Out, State, LCtx, NL, Space, IsDot, "index_of_element",
	printIndicesOfElementsToConstructJson);
	printStateTraitWithLocationContextJson<PendingInitLoop>(
	Out, State, LCtx, NL, Space, IsDot, "pending_init_loops",
	printPendingInitLoopJson);
	printStateTraitWithLocationContextJson<PendingArrayDestruction>(
	Out, State, LCtx, NL, Space, IsDot, "pending_destructors",
	printPendingArrayDestructionsJson);

	getCheckerManager().runCheckersForPrintStateJson(Out, State, NL, Space,
	IsDot);
	}

	void ExprEngine::processEndWorklist() {
	// This prints the name of the top-level function if we crash.
	PrettyStackTraceLocationContext CrashInfo(getRootLocationContext());
	getCheckerManager().runCheckersForEndAnalysis(G, BR, *this);
	}

	void ExprEngine::processCFGElement(const CFGElement E, ExplodedNode *Pred,
	unsigned StmtIdx, NodeBuilderContext *Ctx) {
	PrettyStackTraceLocationContext CrashInfo(Pred->getLocationContext());
	currStmtIdx = StmtIdx;
	currBldrCtx = Ctx;

	switch (E.getKind()) {
	case CFGElement::Statement:
	case CFGElement::Constructor:
	case CFGElement::CXXRecordTypedCall:
	ProcessStmt(E.castAs<CFGStmt>().getStmt(), Pred);
	return;
	case CFGElement::Initializer:
	ProcessInitializer(E.castAs<CFGInitializer>(), Pred);
	return;
	case CFGElement::NewAllocator:
	ProcessNewAllocator(E.castAs<CFGNewAllocator>().getAllocatorExpr(),
	Pred);
	return;
	case CFGElement::AutomaticObjectDtor:
	case CFGElement::DeleteDtor:
	case CFGElement::BaseDtor:
	case CFGElement::MemberDtor:
	case CFGElement::TemporaryDtor:
	ProcessImplicitDtor(E.castAs<CFGImplicitDtor>(), Pred);
	return;
	case CFGElement::LoopExit:
	ProcessLoopExit(E.castAs<CFGLoopExit>().getLoopStmt(), Pred);
	return;
	case CFGElement::LifetimeEnds:
	case CFGElement::CleanupFunction:
	case CFGElement::ScopeBegin:
	case CFGElement::ScopeEnd:
	return;
	}
	}

	static bool shouldRemoveDeadBindings(AnalysisManager &AMgr,
	const Stmt *S,
	const ExplodedNode *Pred,
	const LocationContext *LC) {
	// Are we never purging state values?
	if (AMgr.options.AnalysisPurgeOpt == PurgeNone)
	return false;

	// Is this the beginning of a basic block?
	if (Pred->getLocation().getAs<BlockEntrance>())
	return true;

	// Is this on a non-expression?
	if (!isa<Expr>(S))
	return true;

	// Run before processing a call.
	if (CallEvent::isCallStmt(S))
	return true;

	// Is this an expression that is consumed by another expression? If so,
	// postpone cleaning out the state.
	ParentMap &PM = LC->getAnalysisDeclContext()->getParentMap();
	return !PM.isConsumedExpr(cast<Expr>(S));
	}

	void ExprEngine::removeDead(ExplodedNode *Pred, ExplodedNodeSet &Out,
	const Stmt *ReferenceStmt,
	const LocationContext *LC,
	const Stmt *DiagnosticStmt,
	ProgramPoint::Kind K) {
	assert((K == ProgramPoint::PreStmtPurgeDeadSymbolsKind \|\|
	ReferenceStmt == nullptr \|\| isa<ReturnStmt>(ReferenceStmt))
	&& "PostStmt is not generally supported by the SymbolReaper yet");
	assert(LC && "Must pass the current (or expiring) LocationContext");

	if (!DiagnosticStmt) {
	DiagnosticStmt = ReferenceStmt;
	assert(DiagnosticStmt && "Required for clearing a LocationContext");
	}

	NumRemoveDeadBindings++;
	ProgramStateRef CleanedState = Pred->getState();

	// LC is the location context being destroyed, but SymbolReaper wants a
	// location context that is still live. (If this is the top-level stack
	// frame, this will be null.)
	if (!ReferenceStmt) {
	assert(K == ProgramPoint::PostStmtPurgeDeadSymbolsKind &&
	"Use PostStmtPurgeDeadSymbolsKind for clearing a LocationContext");
	LC = LC->getParent();
	}

	const StackFrameContext *SFC = LC ? LC->getStackFrame() : nullptr;
	SymbolReaper SymReaper(SFC, ReferenceStmt, SymMgr, getStoreManager());

	for (auto I : CleanedState->get<ObjectsUnderConstruction>()) {
	if (SymbolRef Sym = I.second.getAsSymbol())
	SymReaper.markLive(Sym);
	if (const MemRegion *MR = I.second.getAsRegion())
	SymReaper.markLive(MR);
	}

	getCheckerManager().runCheckersForLiveSymbols(CleanedState, SymReaper);

	// Create a state in which dead bindings are removed from the environment
	// and the store. TODO: The function should just return new env and store,
	// not a new state.
	CleanedState = StateMgr.removeDeadBindingsFromEnvironmentAndStore(
	CleanedState, SFC, SymReaper);

	// Process any special transfer function for dead symbols.
	// A tag to track convenience transitions, which can be removed at cleanup.
	static SimpleProgramPointTag cleanupTag(TagProviderName, "Clean Node");
	// Call checkers with the non-cleaned state so that they could query the
	// values of the soon to be dead symbols.
	ExplodedNodeSet CheckedSet;
	getCheckerManager().runCheckersForDeadSymbols(CheckedSet, Pred, SymReaper,
	DiagnosticStmt, *this, K);

	// For each node in CheckedSet, generate CleanedNodes that have the
	// environment, the store, and the constraints cleaned up but have the
	// user-supplied states as the predecessors.
	StmtNodeBuilder Bldr(CheckedSet, Out, *currBldrCtx);
	for (const auto I : CheckedSet) {
	ProgramStateRef CheckerState = I->getState();

	// The constraint manager has not been cleaned up yet, so clean up now.
	CheckerState =
	getConstraintManager().removeDeadBindings(CheckerState, SymReaper);

	assert(StateMgr.haveEqualEnvironments(CheckerState, Pred->getState()) &&
	"Checkers are not allowed to modify the Environment as a part of "
	"checkDeadSymbols processing.");
	assert(StateMgr.haveEqualStores(CheckerState, Pred->getState()) &&
	"Checkers are not allowed to modify the Store as a part of "
	"checkDeadSymbols processing.");

	// Create a state based on CleanedState with CheckerState GDM and
	// generate a transition to that state.
	ProgramStateRef CleanedCheckerSt =
	StateMgr.getPersistentStateWithGDM(CleanedState, CheckerState);
	Bldr.generateNode(DiagnosticStmt, I, CleanedCheckerSt, &cleanupTag, K);
	}
	}

	void ExprEngine::ProcessStmt(const Stmt currStmt, ExplodedNode Pred) {
	// Reclaim any unnecessary nodes in the ExplodedGraph.
	G.reclaimRecentlyAllocatedNodes();

	PrettyStackTraceLoc CrashInfo(getContext().getSourceManager(),
	currStmt->getBeginLoc(),
	"Error evaluating statement");

	// Remove dead bindings and symbols.
	ExplodedNodeSet CleanedStates;
	if (shouldRemoveDeadBindings(AMgr, currStmt, Pred,
	Pred->getLocationContext())) {
	removeDead(Pred, CleanedStates, currStmt,
	Pred->getLocationContext());
	} else
	CleanedStates.Add(Pred);

	// Visit the statement.
	ExplodedNodeSet Dst;
	for (const auto I : CleanedStates) {
	ExplodedNodeSet DstI;
	// Visit the statement.
	Visit(currStmt, I, DstI);
	Dst.insert(DstI);
	}

	// Enqueue the new nodes onto the work list.
	Engine.enqueue(Dst, currBldrCtx->getBlock(), currStmtIdx);
	}

	void ExprEngine::ProcessLoopExit(const Stmt* S, ExplodedNode *Pred) {
	PrettyStackTraceLoc CrashInfo(getContext().getSourceManager(),
	S->getBeginLoc(),
	"Error evaluating end of the loop");
	ExplodedNodeSet Dst;
	Dst.Add(Pred);
	NodeBuilder Bldr(Pred, Dst, *currBldrCtx);
	ProgramStateRef NewState = Pred->getState();

	if(AMgr.options.ShouldUnrollLoops)
	NewState = processLoopEnd(S, NewState);

	LoopExit PP(S, Pred->getLocationContext());
	Bldr.generateNode(PP, NewState, Pred);
	// Enqueue the new nodes onto the work list.
	Engine.enqueue(Dst, currBldrCtx->getBlock(), currStmtIdx);
	}

	void ExprEngine::ProcessInitializer(const CFGInitializer CFGInit,
	ExplodedNode *Pred) {
	const CXXCtorInitializer *BMI = CFGInit.getInitializer();
	const Expr *Init = BMI->getInit()->IgnoreImplicit();
	const LocationContext *LC = Pred->getLocationContext();

	PrettyStackTraceLoc CrashInfo(getContext().getSourceManager(),
	BMI->getSourceLocation(),
	"Error evaluating initializer");

	// We don't clean up dead bindings here.
	const auto *stackFrame = cast<StackFrameContext>(Pred->getLocationContext());
	const auto *decl = cast<CXXConstructorDecl>(stackFrame->getDecl());

	ProgramStateRef State = Pred->getState();
	SVal thisVal = State->getSVal(svalBuilder.getCXXThis(decl, stackFrame));

	ExplodedNodeSet Tmp;
	SVal FieldLoc;

	// Evaluate the initializer, if necessary
	if (BMI->isAnyMemberInitializer()) {
	// Constructors build the object directly in the field,
	// but non-objects must be copied in from the initializer.
	if (getObjectUnderConstruction(State, BMI, LC)) {
	// The field was directly constructed, so there is no need to bind.
	// But we still need to stop tracking the object under construction.
	State = finishObjectConstruction(State, BMI, LC);
	NodeBuilder Bldr(Pred, Tmp, *currBldrCtx);
	PostStore PS(Init, LC, /Loc/ nullptr, /tag/ nullptr);
	Bldr.generateNode(PS, State, Pred);
	} else {
	const ValueDecl *Field;
	if (BMI->isIndirectMemberInitializer()) {
	Field = BMI->getIndirectMember();
	FieldLoc = State->getLValue(BMI->getIndirectMember(), thisVal);
	} else {
	Field = BMI->getMember();
	FieldLoc = State->getLValue(BMI->getMember(), thisVal);
	}

	SVal InitVal;
	if (Init->getType()->isArrayType()) {
	// Handle arrays of trivial type. We can represent this with a
	// primitive load/copy from the base array region.
	const ArraySubscriptExpr *ASE;
	while ((ASE = dyn_cast<ArraySubscriptExpr>(Init)))
	Init = ASE->getBase()->IgnoreImplicit();

	SVal LValue = State->getSVal(Init, stackFrame);
	if (!Field->getType()->isReferenceType())
	if (std::optional<Loc> LValueLoc = LValue.getAs<Loc>())
	InitVal = State->getSVal(*LValueLoc);

	// If we fail to get the value for some reason, use a symbolic value.
	if (InitVal.isUnknownOrUndef()) {
	SValBuilder &SVB = getSValBuilder();
	InitVal = SVB.conjureSymbolVal(BMI->getInit(), stackFrame,
	Field->getType(),
	currBldrCtx->blockCount());
	}
	} else {
	InitVal = State->getSVal(BMI->getInit(), stackFrame);
	}

	PostInitializer PP(BMI, FieldLoc.getAsRegion(), stackFrame);
	evalBind(Tmp, Init, Pred, FieldLoc, InitVal, /isInit=/true, &PP);
	}
	} else if (BMI->isBaseInitializer() && isa<InitListExpr>(Init)) {
	// When the base class is initialized with an initialization list and the
	// base class does not have a ctor, there will not be a CXXConstructExpr to
	// initialize the base region. Hence, we need to make the bind for it.
	SVal BaseLoc = getStoreManager().evalDerivedToBase(
	thisVal, QualType(BMI->getBaseClass(), 0), BMI->isBaseVirtual());
	SVal InitVal = State->getSVal(Init, stackFrame);
	evalBind(Tmp, Init, Pred, BaseLoc, InitVal, /isInit=/true);
	} else {
	assert(BMI->isBaseInitializer() \|\| BMI->isDelegatingInitializer());
	Tmp.insert(Pred);
	// We already did all the work when visiting the CXXConstructExpr.
	}

	// Construct PostInitializer nodes whether the state changed or not,
	// so that the diagnostics don't get confused.
	PostInitializer PP(BMI, FieldLoc.getAsRegion(), stackFrame);
	ExplodedNodeSet Dst;
	NodeBuilder Bldr(Tmp, Dst, *currBldrCtx);
	for (const auto I : Tmp) {
	ProgramStateRef State = I->getState();
	Bldr.generateNode(PP, State, I);
	}

	// Enqueue the new nodes onto the work list.
	Engine.enqueue(Dst, currBldrCtx->getBlock(), currStmtIdx);
	}

	std::pair<ProgramStateRef, uint64_t>
	ExprEngine::prepareStateForArrayDestruction(const ProgramStateRef State,
	const MemRegion *Region,
	const QualType &ElementTy,
	const LocationContext *LCtx,
	SVal *ElementCountVal) {
	assert(Region != nullptr && "Not-null region expected");

	QualType Ty = ElementTy.getDesugaredType(getContext());
	while (const auto *NTy = dyn_cast<ArrayType>(Ty))
	Ty = NTy->getElementType().getDesugaredType(getContext());

	auto ElementCount = getDynamicElementCount(State, Region, svalBuilder, Ty);

	if (ElementCountVal)
	*ElementCountVal = ElementCount;

	// Note: the destructors are called in reverse order.
	unsigned Idx = 0;
	if (auto OptionalIdx = getPendingArrayDestruction(State, LCtx)) {
	Idx = *OptionalIdx;
	} else {
	// The element count is either unknown, or an SVal that's not an integer.
	if (!ElementCount.isConstant())
	return {State, 0};

	Idx = ElementCount.getAsInteger()->getLimitedValue();
	}

	if (Idx == 0)
	return {State, 0};

	--Idx;

	return {setPendingArrayDestruction(State, LCtx, Idx), Idx};
	}

	void ExprEngine::ProcessImplicitDtor(const CFGImplicitDtor D,
	ExplodedNode *Pred) {
	ExplodedNodeSet Dst;
	switch (D.getKind()) {
	case CFGElement::AutomaticObjectDtor:
	ProcessAutomaticObjDtor(D.castAs<CFGAutomaticObjDtor>(), Pred, Dst);
	break;
	case CFGElement::BaseDtor:
	ProcessBaseDtor(D.castAs<CFGBaseDtor>(), Pred, Dst);
	break;
	case CFGElement::MemberDtor:
	ProcessMemberDtor(D.castAs<CFGMemberDtor>(), Pred, Dst);
	break;
	case CFGElement::TemporaryDtor:
	ProcessTemporaryDtor(D.castAs<CFGTemporaryDtor>(), Pred, Dst);
	break;
	case CFGElement::DeleteDtor:
	ProcessDeleteDtor(D.castAs<CFGDeleteDtor>(), Pred, Dst);
	break;
	default:
	llvm_unreachable("Unexpected dtor kind.");
	}

	// Enqueue the new nodes onto the work list.
	Engine.enqueue(Dst, currBldrCtx->getBlock(), currStmtIdx);
	}

	void ExprEngine::ProcessNewAllocator(const CXXNewExpr *NE,
	ExplodedNode *Pred) {
	ExplodedNodeSet Dst;
	AnalysisManager &AMgr = getAnalysisManager();
	AnalyzerOptions &Opts = AMgr.options;
	// TODO: We're not evaluating allocators for all cases just yet as
	// we're not handling the return value correctly, which causes false
	// positives when the alpha.cplusplus.NewDeleteLeaks check is on.
	if (Opts.MayInlineCXXAllocator)
	VisitCXXNewAllocatorCall(NE, Pred, Dst);
	else {
	NodeBuilder Bldr(Pred, Dst, *currBldrCtx);
	const LocationContext *LCtx = Pred->getLocationContext();
	PostImplicitCall PP(NE->getOperatorNew(), NE->getBeginLoc(), LCtx,
	getCFGElementRef());
	Bldr.generateNode(PP, Pred->getState(), Pred);
	}
	Engine.enqueue(Dst, currBldrCtx->getBlock(), currStmtIdx);
	}

	void ExprEngine::ProcessAutomaticObjDtor(const CFGAutomaticObjDtor Dtor,
	ExplodedNode *Pred,
	ExplodedNodeSet &Dst) {
	const auto *DtorDecl = Dtor.getDestructorDecl(getContext());
	const VarDecl *varDecl = Dtor.getVarDecl();
	QualType varType = varDecl->getType();

	ProgramStateRef state = Pred->getState();
	const LocationContext *LCtx = Pred->getLocationContext();

	SVal dest = state->getLValue(varDecl, LCtx);
	const MemRegion *Region = dest.castAs<loc::MemRegionVal>().getRegion();

	if (varType->isReferenceType()) {
	const MemRegion *ValueRegion = state->getSVal(Region).getAsRegion();
	if (!ValueRegion) {
	// FIXME: This should not happen. The language guarantees a presence
	// of a valid initializer here, so the reference shall not be undefined.
	// It seems that we're calling destructors over variables that
	// were not initialized yet.
	return;
	}
	Region = ValueRegion->getBaseRegion();
	varType = cast<TypedValueRegion>(Region)->getValueType();
	}

	unsigned Idx = 0;
	if (isa<ArrayType>(varType)) {
	SVal ElementCount;
	std::tie(state, Idx) = prepareStateForArrayDestruction(
	state, Region, varType, LCtx, &ElementCount);

	if (ElementCount.isConstant()) {
	uint64_t ArrayLength = ElementCount.getAsInteger()->getLimitedValue();
	assert(ArrayLength &&
	"An automatic dtor for a 0 length array shouldn't be triggered!");

	// Still handle this case if we don't have assertions enabled.
	if (!ArrayLength) {
	static SimpleProgramPointTag PT(
	"ExprEngine", "Skipping automatic 0 length array destruction, "
	"which shouldn't be in the CFG.");
	PostImplicitCall PP(DtorDecl, varDecl->getLocation(), LCtx,
	getCFGElementRef(), &PT);
	NodeBuilder Bldr(Pred, Dst, *currBldrCtx);
	Bldr.generateSink(PP, Pred->getState(), Pred);
	return;
	}
	}
	}

	EvalCallOptions CallOpts;
	Region = makeElementRegion(state, loc::MemRegionVal(Region), varType,
	CallOpts.IsArrayCtorOrDtor, Idx)
	.getAsRegion();

	NodeBuilder Bldr(Pred, Dst, getBuilderContext());

	static SimpleProgramPointTag PT("ExprEngine",
	"Prepare for object destruction");
	PreImplicitCall PP(DtorDecl, varDecl->getLocation(), LCtx, getCFGElementRef(),
	&PT);
	Pred = Bldr.generateNode(PP, state, Pred);

	if (!Pred)
	return;
	Bldr.takeNodes(Pred);

	VisitCXXDestructor(varType, Region, Dtor.getTriggerStmt(),
	/IsBase=/false, Pred, Dst, CallOpts);
	}

	void ExprEngine::ProcessDeleteDtor(const CFGDeleteDtor Dtor,
	ExplodedNode *Pred,
	ExplodedNodeSet &Dst) {
	ProgramStateRef State = Pred->getState();
	const LocationContext *LCtx = Pred->getLocationContext();
	const CXXDeleteExpr *DE = Dtor.getDeleteExpr();
	const Stmt *Arg = DE->getArgument();
	QualType DTy = DE->getDestroyedType();
	SVal ArgVal = State->getSVal(Arg, LCtx);

	// If the argument to delete is known to be a null value,
	// don't run destructor.
	if (State->isNull(ArgVal).isConstrainedTrue()) {
	QualType BTy = getContext().getBaseElementType(DTy);
	const CXXRecordDecl *RD = BTy->getAsCXXRecordDecl();
	const CXXDestructorDecl *Dtor = RD->getDestructor();

	PostImplicitCall PP(Dtor, DE->getBeginLoc(), LCtx, getCFGElementRef());
	NodeBuilder Bldr(Pred, Dst, *currBldrCtx);
	Bldr.generateNode(PP, Pred->getState(), Pred);
	return;
	}

	auto getDtorDecl = [](const QualType &DTy) {
	const CXXRecordDecl *RD = DTy->getAsCXXRecordDecl();
	return RD->getDestructor();
	};

	unsigned Idx = 0;
	EvalCallOptions CallOpts;
	const MemRegion *ArgR = ArgVal.getAsRegion();

	if (DE->isArrayForm()) {
	CallOpts.IsArrayCtorOrDtor = true;
	// Yes, it may even be a multi-dimensional array.
	while (const auto *AT = getContext().getAsArrayType(DTy))
	DTy = AT->getElementType();

	if (ArgR) {
	SVal ElementCount;
	std::tie(State, Idx) = prepareStateForArrayDestruction(
	State, ArgR, DTy, LCtx, &ElementCount);

	// If we're about to destruct a 0 length array, don't run any of the
	// destructors.
	if (ElementCount.isConstant() &&
	ElementCount.getAsInteger()->getLimitedValue() == 0) {

	static SimpleProgramPointTag PT(
	"ExprEngine", "Skipping 0 length array delete destruction");
	PostImplicitCall PP(getDtorDecl(DTy), DE->getBeginLoc(), LCtx,
	getCFGElementRef(), &PT);
	NodeBuilder Bldr(Pred, Dst, *currBldrCtx);
	Bldr.generateNode(PP, Pred->getState(), Pred);
	return;
	}

	ArgR = State->getLValue(DTy, svalBuilder.makeArrayIndex(Idx), ArgVal)
	.getAsRegion();
	}
	}

	NodeBuilder Bldr(Pred, Dst, getBuilderContext());
	static SimpleProgramPointTag PT("ExprEngine",
	"Prepare for object destruction");
	PreImplicitCall PP(getDtorDecl(DTy), DE->getBeginLoc(), LCtx,
	getCFGElementRef(), &PT);
	Pred = Bldr.generateNode(PP, State, Pred);

	if (!Pred)
	return;
	Bldr.takeNodes(Pred);

	VisitCXXDestructor(DTy, ArgR, DE, /IsBase=/false, Pred, Dst, CallOpts);
	}

	void ExprEngine::ProcessBaseDtor(const CFGBaseDtor D,
	ExplodedNode *Pred, ExplodedNodeSet &Dst) {
	const LocationContext *LCtx = Pred->getLocationContext();

	const auto *CurDtor = cast<CXXDestructorDecl>(LCtx->getDecl());
	Loc ThisPtr = getSValBuilder().getCXXThis(CurDtor,
	LCtx->getStackFrame());
	SVal ThisVal = Pred->getState()->getSVal(ThisPtr);

	// Create the base object region.
	const CXXBaseSpecifier *Base = D.getBaseSpecifier();
	QualType BaseTy = Base->getType();
	SVal BaseVal = getStoreManager().evalDerivedToBase(ThisVal, BaseTy,
	Base->isVirtual());

	EvalCallOptions CallOpts;
	VisitCXXDestructor(BaseTy, BaseVal.getAsRegion(), CurDtor->getBody(),
	/IsBase=/true, Pred, Dst, CallOpts);
	}

	void ExprEngine::ProcessMemberDtor(const CFGMemberDtor D,
	ExplodedNode *Pred, ExplodedNodeSet &Dst) {
	const auto *DtorDecl = D.getDestructorDecl(getContext());
	const FieldDecl *Member = D.getFieldDecl();
	QualType T = Member->getType();
	ProgramStateRef State = Pred->getState();
	const LocationContext *LCtx = Pred->getLocationContext();

	const auto *CurDtor = cast<CXXDestructorDecl>(LCtx->getDecl());
	Loc ThisStorageLoc =
	getSValBuilder().getCXXThis(CurDtor, LCtx->getStackFrame());
	Loc ThisLoc = State->getSVal(ThisStorageLoc).castAs<Loc>();
	SVal FieldVal = State->getLValue(Member, ThisLoc);

	unsigned Idx = 0;
	if (isa<ArrayType>(T)) {
	SVal ElementCount;
	std::tie(State, Idx) = prepareStateForArrayDestruction(
	State, FieldVal.getAsRegion(), T, LCtx, &ElementCount);

	if (ElementCount.isConstant()) {
	uint64_t ArrayLength = ElementCount.getAsInteger()->getLimitedValue();
	assert(ArrayLength &&
	"A member dtor for a 0 length array shouldn't be triggered!");

	// Still handle this case if we don't have assertions enabled.
	if (!ArrayLength) {
	static SimpleProgramPointTag PT(
	"ExprEngine", "Skipping member 0 length array destruction, which "
	"shouldn't be in the CFG.");
	PostImplicitCall PP(DtorDecl, Member->getLocation(), LCtx,
	getCFGElementRef(), &PT);
	NodeBuilder Bldr(Pred, Dst, *currBldrCtx);
	Bldr.generateSink(PP, Pred->getState(), Pred);
	return;
	}
	}
	}

	EvalCallOptions CallOpts;
	FieldVal =
	makeElementRegion(State, FieldVal, T, CallOpts.IsArrayCtorOrDtor, Idx);

	NodeBuilder Bldr(Pred, Dst, getBuilderContext());

	static SimpleProgramPointTag PT("ExprEngine",
	"Prepare for object destruction");
	PreImplicitCall PP(DtorDecl, Member->getLocation(), LCtx, getCFGElementRef(),
	&PT);
	Pred = Bldr.generateNode(PP, State, Pred);

	if (!Pred)
	return;
	Bldr.takeNodes(Pred);

	VisitCXXDestructor(T, FieldVal.getAsRegion(), CurDtor->getBody(),
	/IsBase=/false, Pred, Dst, CallOpts);
	}

	void ExprEngine::ProcessTemporaryDtor(const CFGTemporaryDtor D,
	ExplodedNode *Pred,
	ExplodedNodeSet &Dst) {
	const CXXBindTemporaryExpr *BTE = D.getBindTemporaryExpr();
	ProgramStateRef State = Pred->getState();
	const LocationContext *LC = Pred->getLocationContext();
	const MemRegion *MR = nullptr;

	if (std::optional<SVal> V = getObjectUnderConstruction(
	State, D.getBindTemporaryExpr(), Pred->getLocationContext())) {
	// FIXME: Currently we insert temporary destructors for default parameters,
	// but we don't insert the constructors, so the entry in
	// ObjectsUnderConstruction may be missing.
	State = finishObjectConstruction(State, D.getBindTemporaryExpr(),
	Pred->getLocationContext());
	MR = V->getAsRegion();
	}

	// If copy elision has occurred, and the constructor corresponding to the
	// destructor was elided, we need to skip the destructor as well.
	if (isDestructorElided(State, BTE, LC)) {
	State = cleanupElidedDestructor(State, BTE, LC);
	NodeBuilder Bldr(Pred, Dst, *currBldrCtx);
	PostImplicitCall PP(D.getDestructorDecl(getContext()),
	D.getBindTemporaryExpr()->getBeginLoc(),
	Pred->getLocationContext(), getCFGElementRef());
	Bldr.generateNode(PP, State, Pred);
	return;
	}

	ExplodedNodeSet CleanDtorState;
	StmtNodeBuilder StmtBldr(Pred, CleanDtorState, *currBldrCtx);
	StmtBldr.generateNode(D.getBindTemporaryExpr(), Pred, State);

	QualType T = D.getBindTemporaryExpr()->getSubExpr()->getType();
	// FIXME: Currently CleanDtorState can be empty here due to temporaries being
	// bound to default parameters.
	assert(CleanDtorState.size() <= 1);
	ExplodedNode *CleanPred =
	CleanDtorState.empty() ? Pred : *CleanDtorState.begin();

	EvalCallOptions CallOpts;
	CallOpts.IsTemporaryCtorOrDtor = true;
	if (!MR) {
	// FIXME: If we have no MR, we still need to unwrap the array to avoid
	// destroying the whole array at once.
	//
	// For this case there is no universal solution as there is no way to
	// directly create an array of temporary objects. There are some expressions
	// however which can create temporary objects and have an array type.
	//
	// E.g.: std::initializer_list<S>{S(), S()};
	//
	// The expression above has a type of 'const struct S[2]' but it's a single
	// 'std::initializer_list<>'. The destructors of the 2 temporary 'S()'
	// objects will be called anyway, because they are 2 separate objects in 2
	// separate clusters, i.e.: not an array.
	//
	// Now the 'std::initializer_list<>' is not an array either even though it
	// has the type of an array. The point is, we only want to invoke the
	// destructor for the initializer list once not twice or so.
	while (const ArrayType *AT = getContext().getAsArrayType(T)) {
	T = AT->getElementType();

	// FIXME: Enable this flag once we handle this case properly.
	// CallOpts.IsArrayCtorOrDtor = true;
	}
	} else {
	// FIXME: We'd eventually need to makeElementRegion() trick here,
	// but for now we don't have the respective construction contexts,
	// so MR would always be null in this case. Do nothing for now.
	}
	VisitCXXDestructor(T, MR, D.getBindTemporaryExpr(),
	/IsBase=/false, CleanPred, Dst, CallOpts);
	}

	void ExprEngine::processCleanupTemporaryBranch(const CXXBindTemporaryExpr *BTE,
	NodeBuilderContext &BldCtx,
	ExplodedNode *Pred,
	ExplodedNodeSet &Dst,
	const CFGBlock *DstT,
	const CFGBlock *DstF) {
	BranchNodeBuilder TempDtorBuilder(Pred, Dst, BldCtx, DstT, DstF);
	ProgramStateRef State = Pred->getState();
	const LocationContext *LC = Pred->getLocationContext();
	if (getObjectUnderConstruction(State, BTE, LC)) {
	TempDtorBuilder.markInfeasible(false);
	TempDtorBuilder.generateNode(State, true, Pred);
	} else {
	TempDtorBuilder.markInfeasible(true);
	TempDtorBuilder.generateNode(State, false, Pred);
	}
	}

	void ExprEngine::VisitCXXBindTemporaryExpr(const CXXBindTemporaryExpr *BTE,
	ExplodedNodeSet &PreVisit,
	ExplodedNodeSet &Dst) {
	// This is a fallback solution in case we didn't have a construction
	// context when we were constructing the temporary. Otherwise the map should
	// have been populated there.
	if (!getAnalysisManager().options.ShouldIncludeTemporaryDtorsInCFG) {
	// In case we don't have temporary destructors in the CFG, do not mark
	// the initialization - we would otherwise never clean it up.
	Dst = PreVisit;
	return;
	}
	StmtNodeBuilder StmtBldr(PreVisit, Dst, *currBldrCtx);
	for (ExplodedNode *Node : PreVisit) {
	ProgramStateRef State = Node->getState();
	const LocationContext *LC = Node->getLocationContext();
	if (!getObjectUnderConstruction(State, BTE, LC)) {
	// FIXME: Currently the state might also already contain the marker due to
	// incorrect handling of temporaries bound to default parameters; for
	// those, we currently skip the CXXBindTemporaryExpr but rely on adding
	// temporary destructor nodes.
	State = addObjectUnderConstruction(State, BTE, LC, UnknownVal());
	}
	StmtBldr.generateNode(BTE, Node, State);
	}
	}

	ProgramStateRef ExprEngine::escapeValues(ProgramStateRef State,
	ArrayRef<SVal> Vs,
	PointerEscapeKind K,
	const CallEvent *Call) const {
	class CollectReachableSymbolsCallback final : public SymbolVisitor {
	InvalidatedSymbols &Symbols;

	public:
	explicit CollectReachableSymbolsCallback(InvalidatedSymbols &Symbols)
	: Symbols(Symbols) {}

	const InvalidatedSymbols &getSymbols() const { return Symbols; }

	bool VisitSymbol(SymbolRef Sym) override {
	Symbols.insert(Sym);
	return true;
	}
	};
	InvalidatedSymbols Symbols;
	CollectReachableSymbolsCallback CallBack(Symbols);
	for (SVal V : Vs)
	State->scanReachableSymbols(V, CallBack);

	return getCheckerManager().runCheckersForPointerEscape(
	State, CallBack.getSymbols(), Call, K, nullptr);
	}

	void ExprEngine::Visit(const Stmt S, ExplodedNode Pred,
	ExplodedNodeSet &DstTop) {
	PrettyStackTraceLoc CrashInfo(getContext().getSourceManager(),
	S->getBeginLoc(), "Error evaluating statement");
	ExplodedNodeSet Dst;
	StmtNodeBuilder Bldr(Pred, DstTop, *currBldrCtx);

	assert(!isa<Expr>(S) \|\| S == cast<Expr>(S)->IgnoreParens());

	switch (S->getStmtClass()) {
	// C++, OpenMP and ARC stuff we don't support yet.
	case Stmt::CXXDependentScopeMemberExprClass:
	case Stmt::CXXTryStmtClass:
	case Stmt::CXXTypeidExprClass:
	case Stmt::CXXUuidofExprClass:
	case Stmt::CXXFoldExprClass:
	case Stmt::MSPropertyRefExprClass:
	case Stmt::MSPropertySubscriptExprClass:
	case Stmt::CXXUnresolvedConstructExprClass:
	case Stmt::DependentScopeDeclRefExprClass:
	case Stmt::ArrayTypeTraitExprClass:
	case Stmt::ExpressionTraitExprClass:
	case Stmt::UnresolvedLookupExprClass:
	case Stmt::UnresolvedMemberExprClass:
	case Stmt::TypoExprClass:
	case Stmt::RecoveryExprClass:
	case Stmt::CXXNoexceptExprClass:
	case Stmt::PackExpansionExprClass:
	case Stmt::PackIndexingExprClass:
	case Stmt::SubstNonTypeTemplateParmPackExprClass:
	case Stmt::FunctionParmPackExprClass:
	case Stmt::CoroutineBodyStmtClass:
	case Stmt::CoawaitExprClass:
	case Stmt::DependentCoawaitExprClass:
	case Stmt::CoreturnStmtClass:
	case Stmt::CoyieldExprClass:
	case Stmt::SEHTryStmtClass:
	case Stmt::SEHExceptStmtClass:
	case Stmt::SEHLeaveStmtClass:
	case Stmt::SEHFinallyStmtClass:
	case Stmt::OMPCanonicalLoopClass:
	case Stmt::OMPParallelDirectiveClass:
	case Stmt::OMPSimdDirectiveClass:
	case Stmt::OMPForDirectiveClass:
	case Stmt::OMPForSimdDirectiveClass:
	case Stmt::OMPSectionsDirectiveClass:
	case Stmt::OMPSectionDirectiveClass:
	case Stmt::OMPScopeDirectiveClass:
	case Stmt::OMPSingleDirectiveClass:
	case Stmt::OMPMasterDirectiveClass:
	case Stmt::OMPCriticalDirectiveClass:
	case Stmt::OMPParallelForDirectiveClass:
	case Stmt::OMPParallelForSimdDirectiveClass:
	case Stmt::OMPParallelSectionsDirectiveClass:
	case Stmt::OMPParallelMasterDirectiveClass:
	case Stmt::OMPParallelMaskedDirectiveClass:
	case Stmt::OMPTaskDirectiveClass:
	case Stmt::OMPTaskyieldDirectiveClass:
	case Stmt::OMPBarrierDirectiveClass:
	case Stmt::OMPTaskwaitDirectiveClass:
	case Stmt::OMPErrorDirectiveClass:
	case Stmt::OMPTaskgroupDirectiveClass:
	case Stmt::OMPFlushDirectiveClass:
	case Stmt::OMPDepobjDirectiveClass:
	case Stmt::OMPScanDirectiveClass:
	case Stmt::OMPOrderedDirectiveClass:
	case Stmt::OMPAtomicDirectiveClass:
	case Stmt::OMPTargetDirectiveClass:
	case Stmt::OMPTargetDataDirectiveClass:
	case Stmt::OMPTargetEnterDataDirectiveClass:
	case Stmt::OMPTargetExitDataDirectiveClass:
	case Stmt::OMPTargetParallelDirectiveClass:
	case Stmt::OMPTargetParallelForDirectiveClass:
	case Stmt::OMPTargetUpdateDirectiveClass:
	case Stmt::OMPTeamsDirectiveClass:
	case Stmt::OMPCancellationPointDirectiveClass:
	case Stmt::OMPCancelDirectiveClass:
	case Stmt::OMPTaskLoopDirectiveClass:
	case Stmt::OMPTaskLoopSimdDirectiveClass:
	case Stmt::OMPMasterTaskLoopDirectiveClass:
	case Stmt::OMPMaskedTaskLoopDirectiveClass:
	case Stmt::OMPMasterTaskLoopSimdDirectiveClass:
	case Stmt::OMPMaskedTaskLoopSimdDirectiveClass:
	case Stmt::OMPParallelMasterTaskLoopDirectiveClass:
	case Stmt::OMPParallelMaskedTaskLoopDirectiveClass:
	case Stmt::OMPParallelMasterTaskLoopSimdDirectiveClass:
	case Stmt::OMPParallelMaskedTaskLoopSimdDirectiveClass:
	case Stmt::OMPDistributeDirectiveClass:
	case Stmt::OMPDistributeParallelForDirectiveClass:
	case Stmt::OMPDistributeParallelForSimdDirectiveClass:
	case Stmt::OMPDistributeSimdDirectiveClass:
	case Stmt::OMPTargetParallelForSimdDirectiveClass:
	case Stmt::OMPTargetSimdDirectiveClass:
	case Stmt::OMPTeamsDistributeDirectiveClass:
	case Stmt::OMPTeamsDistributeSimdDirectiveClass:
	case Stmt::OMPTeamsDistributeParallelForSimdDirectiveClass:
	case Stmt::OMPTeamsDistributeParallelForDirectiveClass:
	case Stmt::OMPTargetTeamsDirectiveClass:
	case Stmt::OMPTargetTeamsDistributeDirectiveClass:
	case Stmt::OMPTargetTeamsDistributeParallelForDirectiveClass:
	case Stmt::OMPTargetTeamsDistributeParallelForSimdDirectiveClass:
	case Stmt::OMPTargetTeamsDistributeSimdDirectiveClass:
	case Stmt::OMPReverseDirectiveClass:
	case Stmt::OMPTileDirectiveClass:
	case Stmt::OMPInterchangeDirectiveClass:
	case Stmt::OMPInteropDirectiveClass:
	case Stmt::OMPDispatchDirectiveClass:
	case Stmt::OMPMaskedDirectiveClass:
	case Stmt::OMPGenericLoopDirectiveClass:
	case Stmt::OMPTeamsGenericLoopDirectiveClass:
	case Stmt::OMPTargetTeamsGenericLoopDirectiveClass:
	case Stmt::OMPParallelGenericLoopDirectiveClass:
	case Stmt::OMPTargetParallelGenericLoopDirectiveClass:
	case Stmt::CapturedStmtClass:
	case Stmt::OpenACCComputeConstructClass:
	case Stmt::OpenACCLoopConstructClass:
	case Stmt::OMPUnrollDirectiveClass:
	case Stmt::OMPMetaDirectiveClass: {
	const ExplodedNode *node = Bldr.generateSink(S, Pred, Pred->getState());
	Engine.addAbortedBlock(node, currBldrCtx->getBlock());
	break;
	}

	case Stmt::ParenExprClass:
	llvm_unreachable("ParenExprs already handled.");
	case Stmt::GenericSelectionExprClass:
	llvm_unreachable("GenericSelectionExprs already handled.");
	// Cases that should never be evaluated simply because they shouldn't
	// appear in the CFG.
	case Stmt::BreakStmtClass:
	case Stmt::CaseStmtClass:
	case Stmt::CompoundStmtClass:
	case Stmt::ContinueStmtClass:
	case Stmt::CXXForRangeStmtClass:
	case Stmt::DefaultStmtClass:
	case Stmt::DoStmtClass:
	case Stmt::ForStmtClass:
	case Stmt::GotoStmtClass:
	case Stmt::IfStmtClass:
	case Stmt::IndirectGotoStmtClass:
	case Stmt::LabelStmtClass:
	case Stmt::NoStmtClass:
	case Stmt::NullStmtClass:
	case Stmt::SwitchStmtClass:
	case Stmt::WhileStmtClass:
	case Expr::MSDependentExistsStmtClass:
	llvm_unreachable("Stmt should not be in analyzer evaluation loop");
	case Stmt::ImplicitValueInitExprClass:
	// These nodes are shared in the CFG and would case caching out.
	// Moreover, no additional evaluation required for them, the
	// analyzer can reconstruct these values from the AST.
	llvm_unreachable("Should be pruned from CFG");

	case Stmt::ObjCSubscriptRefExprClass:
	case Stmt::ObjCPropertyRefExprClass:
	llvm_unreachable("These are handled by PseudoObjectExpr");

	case Stmt::GNUNullExprClass: {
	// GNU __null is a pointer-width integer, not an actual pointer.
	ProgramStateRef state = Pred->getState();
	state = state->BindExpr(
	S, Pred->getLocationContext(),
	svalBuilder.makeIntValWithWidth(getContext().VoidPtrTy, 0));
	Bldr.generateNode(S, Pred, state);
	break;
	}

	case Stmt::ObjCAtSynchronizedStmtClass:
	Bldr.takeNodes(Pred);
	VisitObjCAtSynchronizedStmt(cast<ObjCAtSynchronizedStmt>(S), Pred, Dst);
	Bldr.addNodes(Dst);
	break;

	case Expr::ConstantExprClass:
	case Stmt::ExprWithCleanupsClass:
	// Handled due to fully linearised CFG.
	break;

	case Stmt::CXXBindTemporaryExprClass: {
	Bldr.takeNodes(Pred);
	ExplodedNodeSet PreVisit;
	getCheckerManager().runCheckersForPreStmt(PreVisit, Pred, S, *this);
	ExplodedNodeSet Next;
	VisitCXXBindTemporaryExpr(cast<CXXBindTemporaryExpr>(S), PreVisit, Next);
	getCheckerManager().runCheckersForPostStmt(Dst, Next, S, *this);
	Bldr.addNodes(Dst);
	break;
	}

	case Stmt::ArrayInitLoopExprClass:
	Bldr.takeNodes(Pred);
	VisitArrayInitLoopExpr(cast<ArrayInitLoopExpr>(S), Pred, Dst);
	Bldr.addNodes(Dst);
	break;
	// Cases not handled yet; but will handle some day.
	case Stmt::DesignatedInitExprClass:
	case Stmt::DesignatedInitUpdateExprClass:
	case Stmt::ArrayInitIndexExprClass:
	case Stmt::ExtVectorElementExprClass:
	case Stmt::ImaginaryLiteralClass:
	case Stmt::ObjCAtCatchStmtClass:
	case Stmt::ObjCAtFinallyStmtClass:
	case Stmt::ObjCAtTryStmtClass:
	case Stmt::ObjCAutoreleasePoolStmtClass:
	case Stmt::ObjCEncodeExprClass:
	case Stmt::ObjCIsaExprClass:
	case Stmt::ObjCProtocolExprClass:
	case Stmt::ObjCSelectorExprClass:
	case Stmt::ParenListExprClass:
	case Stmt::ShuffleVectorExprClass:
	case Stmt::ConvertVectorExprClass:
	case Stmt::VAArgExprClass:
	case Stmt::CUDAKernelCallExprClass:
	case Stmt::OpaqueValueExprClass:
	case Stmt::AsTypeExprClass:
	case Stmt::ConceptSpecializationExprClass:
	case Stmt::CXXRewrittenBinaryOperatorClass:
	case Stmt::RequiresExprClass:
	case Expr::CXXParenListInitExprClass:
	+ case Stmt::EmbedExprClass:
	// Fall through.

	// Cases we intentionally don't evaluate, since they don't need
	// to be explicitly evaluated.
	case Stmt::PredefinedExprClass:
	case Stmt::AddrLabelExprClass:
	case Stmt::AttributedStmtClass:
	case Stmt::IntegerLiteralClass:
	case Stmt::FixedPointLiteralClass:
	case Stmt::CharacterLiteralClass:
	case Stmt::CXXScalarValueInitExprClass:
	case Stmt::CXXBoolLiteralExprClass:
	case Stmt::ObjCBoolLiteralExprClass:
	case Stmt::ObjCAvailabilityCheckExprClass:
	case Stmt::FloatingLiteralClass:
	case Stmt::NoInitExprClass:
	case Stmt::SizeOfPackExprClass:
	case Stmt::StringLiteralClass:
	case Stmt::SourceLocExprClass:
	case Stmt::ObjCStringLiteralClass:
	case Stmt::CXXPseudoDestructorExprClass:
	case Stmt::SubstNonTypeTemplateParmExprClass:
	case Stmt::CXXNullPtrLiteralExprClass:
	case Stmt::ArraySectionExprClass:
	case Stmt::OMPArrayShapingExprClass:
	case Stmt::OMPIteratorExprClass:
	case Stmt::SYCLUniqueStableNameExprClass:
	case Stmt::TypeTraitExprClass: {
	Bldr.takeNodes(Pred);
	ExplodedNodeSet preVisit;
	getCheckerManager().runCheckersForPreStmt(preVisit, Pred, S, *this);
	getCheckerManager().runCheckersForPostStmt(Dst, preVisit, S, *this);
	Bldr.addNodes(Dst);
	break;
	}

	case Stmt::CXXDefaultArgExprClass:
	case Stmt::CXXDefaultInitExprClass: {
	Bldr.takeNodes(Pred);
	ExplodedNodeSet PreVisit;
	getCheckerManager().runCheckersForPreStmt(PreVisit, Pred, S, *this);

	ExplodedNodeSet Tmp;
	StmtNodeBuilder Bldr2(PreVisit, Tmp, *currBldrCtx);

	const Expr *ArgE;
	if (const auto *DefE = dyn_cast<CXXDefaultArgExpr>(S))
	ArgE = DefE->getExpr();
	else if (const auto *DefE = dyn_cast<CXXDefaultInitExpr>(S))
	ArgE = DefE->getExpr();
	else
	llvm_unreachable("unknown constant wrapper kind");

	bool IsTemporary = false;
	if (const auto *MTE = dyn_cast<MaterializeTemporaryExpr>(ArgE)) {
	ArgE = MTE->getSubExpr();
	IsTemporary = true;
	}

	std::optional<SVal> ConstantVal = svalBuilder.getConstantVal(ArgE);
	if (!ConstantVal)
	ConstantVal = UnknownVal();

	const LocationContext *LCtx = Pred->getLocationContext();
	for (const auto I : PreVisit) {
	ProgramStateRef State = I->getState();
	State = State->BindExpr(S, LCtx, *ConstantVal);
	if (IsTemporary)
	State = createTemporaryRegionIfNeeded(State, LCtx,
	cast<Expr>(S),
	cast<Expr>(S));
	Bldr2.generateNode(S, I, State);
	}

	getCheckerManager().runCheckersForPostStmt(Dst, Tmp, S, *this);
	Bldr.addNodes(Dst);
	break;
	}

	// Cases we evaluate as opaque expressions, conjuring a symbol.
	case Stmt::CXXStdInitializerListExprClass:
	case Expr::ObjCArrayLiteralClass:
	case Expr::ObjCDictionaryLiteralClass:
	case Expr::ObjCBoxedExprClass: {
	Bldr.takeNodes(Pred);

	ExplodedNodeSet preVisit;
	getCheckerManager().runCheckersForPreStmt(preVisit, Pred, S, *this);

	ExplodedNodeSet Tmp;
	StmtNodeBuilder Bldr2(preVisit, Tmp, *currBldrCtx);

	const auto *Ex = cast<Expr>(S);
	QualType resultType = Ex->getType();

	for (const auto N : preVisit) {
	const LocationContext *LCtx = N->getLocationContext();
	SVal result = svalBuilder.conjureSymbolVal(nullptr, Ex, LCtx,
	resultType,
	currBldrCtx->blockCount());
	ProgramStateRef State = N->getState()->BindExpr(Ex, LCtx, result);

	// Escape pointers passed into the list, unless it's an ObjC boxed
	// expression which is not a boxable C structure.
	if (!(isa<ObjCBoxedExpr>(Ex) &&
	!cast<ObjCBoxedExpr>(Ex)->getSubExpr()
	->getType()->isRecordType()))
	for (auto Child : Ex->children()) {
	assert(Child);
	SVal Val = State->getSVal(Child, LCtx);
	State = escapeValues(State, Val, PSK_EscapeOther);
	}

	Bldr2.generateNode(S, N, State);
	}

	getCheckerManager().runCheckersForPostStmt(Dst, Tmp, S, *this);
	Bldr.addNodes(Dst);
	break;
	}

	case Stmt::ArraySubscriptExprClass:
	Bldr.takeNodes(Pred);
	VisitArraySubscriptExpr(cast<ArraySubscriptExpr>(S), Pred, Dst);
	Bldr.addNodes(Dst);
	break;

	case Stmt::MatrixSubscriptExprClass:
	llvm_unreachable("Support for MatrixSubscriptExpr is not implemented.");
	break;

	case Stmt::GCCAsmStmtClass: {
	Bldr.takeNodes(Pred);
	ExplodedNodeSet PreVisit;
	getCheckerManager().runCheckersForPreStmt(PreVisit, Pred, S, *this);
	ExplodedNodeSet PostVisit;
	for (ExplodedNode *const N : PreVisit)
	VisitGCCAsmStmt(cast<GCCAsmStmt>(S), N, PostVisit);
	getCheckerManager().runCheckersForPostStmt(Dst, PostVisit, S, *this);
	Bldr.addNodes(Dst);
	break;
	}

	case Stmt::MSAsmStmtClass:
	Bldr.takeNodes(Pred);
	VisitMSAsmStmt(cast<MSAsmStmt>(S), Pred, Dst);
	Bldr.addNodes(Dst);
	break;

	case Stmt::BlockExprClass:
	Bldr.takeNodes(Pred);
	VisitBlockExpr(cast<BlockExpr>(S), Pred, Dst);
	Bldr.addNodes(Dst);
	break;

	case Stmt::LambdaExprClass:
	if (AMgr.options.ShouldInlineLambdas) {
	Bldr.takeNodes(Pred);
	VisitLambdaExpr(cast<LambdaExpr>(S), Pred, Dst);
	Bldr.addNodes(Dst);
	} else {
	const ExplodedNode *node = Bldr.generateSink(S, Pred, Pred->getState());
	Engine.addAbortedBlock(node, currBldrCtx->getBlock());
	}
	break;

	case Stmt::BinaryOperatorClass: {
	const auto *B = cast<BinaryOperator>(S);
	if (B->isLogicalOp()) {
	Bldr.takeNodes(Pred);
	VisitLogicalExpr(B, Pred, Dst);
	Bldr.addNodes(Dst);
	break;
	}
	else if (B->getOpcode() == BO_Comma) {
	ProgramStateRef state = Pred->getState();
	Bldr.generateNode(B, Pred,
	state->BindExpr(B, Pred->getLocationContext(),
	state->getSVal(B->getRHS(),
	Pred->getLocationContext())));
	break;
	}

	Bldr.takeNodes(Pred);

	if (AMgr.options.ShouldEagerlyAssume &&
	(B->isRelationalOp() \|\| B->isEqualityOp())) {
	ExplodedNodeSet Tmp;
	VisitBinaryOperator(cast<BinaryOperator>(S), Pred, Tmp);
	evalEagerlyAssumeBinOpBifurcation(Dst, Tmp, cast<Expr>(S));
	}
	else
	VisitBinaryOperator(cast<BinaryOperator>(S), Pred, Dst);

	Bldr.addNodes(Dst);
	break;
	}

	case Stmt::CXXOperatorCallExprClass: {
	const auto *OCE = cast<CXXOperatorCallExpr>(S);

	// For instance method operators, make sure the 'this' argument has a
	// valid region.
	const Decl *Callee = OCE->getCalleeDecl();
	if (const auto *MD = dyn_cast_or_null<CXXMethodDecl>(Callee)) {
	if (MD->isImplicitObjectMemberFunction()) {
	ProgramStateRef State = Pred->getState();
	const LocationContext *LCtx = Pred->getLocationContext();
	ProgramStateRef NewState =
	createTemporaryRegionIfNeeded(State, LCtx, OCE->getArg(0));
	if (NewState != State) {
	Pred = Bldr.generateNode(OCE, Pred, NewState, /tag=/nullptr,
	ProgramPoint::PreStmtKind);
	// Did we cache out?
	if (!Pred)
	break;
	}
	}
	}
	[[fallthrough]];
	}

	case Stmt::CallExprClass:
	case Stmt::CXXMemberCallExprClass:
	case Stmt::UserDefinedLiteralClass:
	Bldr.takeNodes(Pred);
	VisitCallExpr(cast<CallExpr>(S), Pred, Dst);
	Bldr.addNodes(Dst);
	break;

	case Stmt::CXXCatchStmtClass:
	Bldr.takeNodes(Pred);
	VisitCXXCatchStmt(cast<CXXCatchStmt>(S), Pred, Dst);
	Bldr.addNodes(Dst);
	break;

	case Stmt::CXXTemporaryObjectExprClass:
	case Stmt::CXXConstructExprClass:
	Bldr.takeNodes(Pred);
	VisitCXXConstructExpr(cast<CXXConstructExpr>(S), Pred, Dst);
	Bldr.addNodes(Dst);
	break;

	case Stmt::CXXInheritedCtorInitExprClass:
	Bldr.takeNodes(Pred);
	VisitCXXInheritedCtorInitExpr(cast<CXXInheritedCtorInitExpr>(S), Pred,
	Dst);
	Bldr.addNodes(Dst);
	break;

	case Stmt::CXXNewExprClass: {
	Bldr.takeNodes(Pred);

	ExplodedNodeSet PreVisit;
	getCheckerManager().runCheckersForPreStmt(PreVisit, Pred, S, *this);

	ExplodedNodeSet PostVisit;
	for (const auto i : PreVisit)
	VisitCXXNewExpr(cast<CXXNewExpr>(S), i, PostVisit);

	getCheckerManager().runCheckersForPostStmt(Dst, PostVisit, S, *this);
	Bldr.addNodes(Dst);
	break;
	}

	case Stmt::CXXDeleteExprClass: {
	Bldr.takeNodes(Pred);
	ExplodedNodeSet PreVisit;
	const auto *CDE = cast<CXXDeleteExpr>(S);
	getCheckerManager().runCheckersForPreStmt(PreVisit, Pred, S, *this);
	ExplodedNodeSet PostVisit;
	getCheckerManager().runCheckersForPostStmt(PostVisit, PreVisit, S, *this);

	for (const auto i : PostVisit)
	VisitCXXDeleteExpr(CDE, i, Dst);

	Bldr.addNodes(Dst);
	break;
	}
	// FIXME: ChooseExpr is really a constant. We need to fix
	// the CFG do not model them as explicit control-flow.

	case Stmt::ChooseExprClass: { // __builtin_choose_expr
	Bldr.takeNodes(Pred);
	const auto *C = cast<ChooseExpr>(S);
	VisitGuardedExpr(C, C->getLHS(), C->getRHS(), Pred, Dst);
	Bldr.addNodes(Dst);
	break;
	}

	case Stmt::CompoundAssignOperatorClass:
	Bldr.takeNodes(Pred);
	VisitBinaryOperator(cast<BinaryOperator>(S), Pred, Dst);
	Bldr.addNodes(Dst);
	break;

	case Stmt::CompoundLiteralExprClass:
	Bldr.takeNodes(Pred);
	VisitCompoundLiteralExpr(cast<CompoundLiteralExpr>(S), Pred, Dst);
	Bldr.addNodes(Dst);
	break;

	case Stmt::BinaryConditionalOperatorClass:
	case Stmt::ConditionalOperatorClass: { // '?' operator
	Bldr.takeNodes(Pred);
	const auto *C = cast<AbstractConditionalOperator>(S);
	VisitGuardedExpr(C, C->getTrueExpr(), C->getFalseExpr(), Pred, Dst);
	Bldr.addNodes(Dst);
	break;
	}

	case Stmt::CXXThisExprClass:
	Bldr.takeNodes(Pred);
	VisitCXXThisExpr(cast<CXXThisExpr>(S), Pred, Dst);
	Bldr.addNodes(Dst);
	break;

	case Stmt::DeclRefExprClass: {
	Bldr.takeNodes(Pred);
	const auto *DE = cast<DeclRefExpr>(S);
	VisitCommonDeclRefExpr(DE, DE->getDecl(), Pred, Dst);
	Bldr.addNodes(Dst);
	break;
	}

	case Stmt::DeclStmtClass:
	Bldr.takeNodes(Pred);
	VisitDeclStmt(cast<DeclStmt>(S), Pred, Dst);
	Bldr.addNodes(Dst);
	break;

	case Stmt::ImplicitCastExprClass:
	case Stmt::CStyleCastExprClass:
	case Stmt::CXXStaticCastExprClass:
	case Stmt::CXXDynamicCastExprClass:
	case Stmt::CXXReinterpretCastExprClass:
	case Stmt::CXXConstCastExprClass:
	case Stmt::CXXFunctionalCastExprClass:
	case Stmt::BuiltinBitCastExprClass:
	case Stmt::ObjCBridgedCastExprClass:
	case Stmt::CXXAddrspaceCastExprClass: {
	Bldr.takeNodes(Pred);
	const auto *C = cast<CastExpr>(S);
	ExplodedNodeSet dstExpr;
	VisitCast(C, C->getSubExpr(), Pred, dstExpr);

	// Handle the postvisit checks.
	getCheckerManager().runCheckersForPostStmt(Dst, dstExpr, C, *this);
	Bldr.addNodes(Dst);
	break;
	}

	case Expr::MaterializeTemporaryExprClass: {
	Bldr.takeNodes(Pred);
	const auto *MTE = cast<MaterializeTemporaryExpr>(S);
	ExplodedNodeSet dstPrevisit;
	getCheckerManager().runCheckersForPreStmt(dstPrevisit, Pred, MTE, *this);
	ExplodedNodeSet dstExpr;
	for (const auto i : dstPrevisit)
	CreateCXXTemporaryObject(MTE, i, dstExpr);
	getCheckerManager().runCheckersForPostStmt(Dst, dstExpr, MTE, *this);
	Bldr.addNodes(Dst);
	break;
	}

	case Stmt::InitListExprClass:
	Bldr.takeNodes(Pred);
	VisitInitListExpr(cast<InitListExpr>(S), Pred, Dst);
	Bldr.addNodes(Dst);
	break;

	case Stmt::MemberExprClass:
	Bldr.takeNodes(Pred);
	VisitMemberExpr(cast<MemberExpr>(S), Pred, Dst);
	Bldr.addNodes(Dst);
	break;

	case Stmt::AtomicExprClass:
	Bldr.takeNodes(Pred);
	VisitAtomicExpr(cast<AtomicExpr>(S), Pred, Dst);
	Bldr.addNodes(Dst);
	break;

	case Stmt::ObjCIvarRefExprClass:
	Bldr.takeNodes(Pred);
	VisitLvalObjCIvarRefExpr(cast<ObjCIvarRefExpr>(S), Pred, Dst);
	Bldr.addNodes(Dst);
	break;

	case Stmt::ObjCForCollectionStmtClass:
	Bldr.takeNodes(Pred);
	VisitObjCForCollectionStmt(cast<ObjCForCollectionStmt>(S), Pred, Dst);
	Bldr.addNodes(Dst);
	break;

	case Stmt::ObjCMessageExprClass:
	Bldr.takeNodes(Pred);
	VisitObjCMessage(cast<ObjCMessageExpr>(S), Pred, Dst);
	Bldr.addNodes(Dst);
	break;

	case Stmt::ObjCAtThrowStmtClass:
	case Stmt::CXXThrowExprClass:
	// FIXME: This is not complete. We basically treat @throw as
	// an abort.
	Bldr.generateSink(S, Pred, Pred->getState());
	break;

	case Stmt::ReturnStmtClass:
	Bldr.takeNodes(Pred);
	VisitReturnStmt(cast<ReturnStmt>(S), Pred, Dst);
	Bldr.addNodes(Dst);
	break;

	case Stmt::OffsetOfExprClass: {
	Bldr.takeNodes(Pred);
	ExplodedNodeSet PreVisit;
	getCheckerManager().runCheckersForPreStmt(PreVisit, Pred, S, *this);

	ExplodedNodeSet PostVisit;
	for (const auto Node : PreVisit)
	VisitOffsetOfExpr(cast<OffsetOfExpr>(S), Node, PostVisit);

	getCheckerManager().runCheckersForPostStmt(Dst, PostVisit, S, *this);
	Bldr.addNodes(Dst);
	break;
	}

	case Stmt::UnaryExprOrTypeTraitExprClass:
	Bldr.takeNodes(Pred);
	VisitUnaryExprOrTypeTraitExpr(cast<UnaryExprOrTypeTraitExpr>(S),
	Pred, Dst);
	Bldr.addNodes(Dst);
	break;

	case Stmt::StmtExprClass: {
	const auto *SE = cast<StmtExpr>(S);

	if (SE->getSubStmt()->body_empty()) {
	// Empty statement expression.
	assert(SE->getType() == getContext().VoidTy
	&& "Empty statement expression must have void type.");
	break;
	}

	if (const auto *LastExpr =
	dyn_cast<Expr>(*SE->getSubStmt()->body_rbegin())) {
	ProgramStateRef state = Pred->getState();
	Bldr.generateNode(SE, Pred,
	state->BindExpr(SE, Pred->getLocationContext(),
	state->getSVal(LastExpr,
	Pred->getLocationContext())));
	}
	break;
	}

	case Stmt::UnaryOperatorClass: {
	Bldr.takeNodes(Pred);
	const auto *U = cast<UnaryOperator>(S);
	if (AMgr.options.ShouldEagerlyAssume && (U->getOpcode() == UO_LNot)) {
	ExplodedNodeSet Tmp;
	VisitUnaryOperator(U, Pred, Tmp);
	evalEagerlyAssumeBinOpBifurcation(Dst, Tmp, U);
	}
	else
	VisitUnaryOperator(U, Pred, Dst);
	Bldr.addNodes(Dst);
	break;
	}

	case Stmt::PseudoObjectExprClass: {
	Bldr.takeNodes(Pred);
	ProgramStateRef state = Pred->getState();
	const auto *PE = cast<PseudoObjectExpr>(S);
	if (const Expr *Result = PE->getResultExpr()) {
	SVal V = state->getSVal(Result, Pred->getLocationContext());
	Bldr.generateNode(S, Pred,
	state->BindExpr(S, Pred->getLocationContext(), V));
	}
	else
	Bldr.generateNode(S, Pred,
	state->BindExpr(S, Pred->getLocationContext(),
	UnknownVal()));

	Bldr.addNodes(Dst);
	break;
	}

	case Expr::ObjCIndirectCopyRestoreExprClass: {
	// ObjCIndirectCopyRestoreExpr implies passing a temporary for
	// correctness of lifetime management. Due to limited analysis
	// of ARC, this is implemented as direct arg passing.
	Bldr.takeNodes(Pred);
	ProgramStateRef state = Pred->getState();
	const auto *OIE = cast<ObjCIndirectCopyRestoreExpr>(S);
	const Expr *E = OIE->getSubExpr();
	SVal V = state->getSVal(E, Pred->getLocationContext());
	Bldr.generateNode(S, Pred,
	state->BindExpr(S, Pred->getLocationContext(), V));
	Bldr.addNodes(Dst);
	break;
	}
	-
	- case Stmt::EmbedExprClass:
	- llvm::report_fatal_error("Support for EmbedExpr is not implemented.");
	- break;
	}
	}

	bool ExprEngine::replayWithoutInlining(ExplodedNode *N,
	const LocationContext *CalleeLC) {
	const StackFrameContext *CalleeSF = CalleeLC->getStackFrame();
	const StackFrameContext *CallerSF = CalleeSF->getParent()->getStackFrame();
	assert(CalleeSF && CallerSF);
	ExplodedNode *BeforeProcessingCall = nullptr;
	const Stmt *CE = CalleeSF->getCallSite();

	// Find the first node before we started processing the call expression.
	while (N) {
	ProgramPoint L = N->getLocation();
	BeforeProcessingCall = N;
	N = N->pred_empty() ? nullptr : *(N->pred_begin());

	// Skip the nodes corresponding to the inlined code.
	if (L.getStackFrame() != CallerSF)
	continue;
	// We reached the caller. Find the node right before we started
	// processing the call.
	if (L.isPurgeKind())
	continue;
	if (L.getAs<PreImplicitCall>())
	continue;
	if (L.getAs<CallEnter>())
	continue;
	if (std::optional<StmtPoint> SP = L.getAs<StmtPoint>())
	if (SP->getStmt() == CE)
	continue;
	break;
	}

	if (!BeforeProcessingCall)
	return false;

	// TODO: Clean up the unneeded nodes.

	// Build an Epsilon node from which we will restart the analyzes.
	// Note that CE is permitted to be NULL!
	static SimpleProgramPointTag PT("ExprEngine", "Replay without inlining");
	ProgramPoint NewNodeLoc = EpsilonPoint(
	BeforeProcessingCall->getLocationContext(), CE, nullptr, &PT);
	// Add the special flag to GDM to signal retrying with no inlining.
	// Note, changing the state ensures that we are not going to cache out.
	ProgramStateRef NewNodeState = BeforeProcessingCall->getState();
	NewNodeState =
	NewNodeState->set<ReplayWithoutInlining>(const_cast<Stmt *>(CE));

	// Make the new node a successor of BeforeProcessingCall.
	bool IsNew = false;
	ExplodedNode *NewNode = G.getNode(NewNodeLoc, NewNodeState, false, &IsNew);
	// We cached out at this point. Caching out is common due to us backtracking
	// from the inlined function, which might spawn several paths.
	if (!IsNew)
	return true;

	NewNode->addPredecessor(BeforeProcessingCall, G);

	// Add the new node to the work list.
	Engine.enqueueStmtNode(NewNode, CalleeSF->getCallSiteBlock(),
	CalleeSF->getIndex());
	NumTimesRetriedWithoutInlining++;
	return true;
	}

	/// Block entrance. (Update counters).
	void ExprEngine::processCFGBlockEntrance(const BlockEdge &L,
	NodeBuilderWithSinks &nodeBuilder,
	ExplodedNode *Pred) {
	PrettyStackTraceLocationContext CrashInfo(Pred->getLocationContext());
	// If we reach a loop which has a known bound (and meets
	// other constraints) then consider completely unrolling it.
	if(AMgr.options.ShouldUnrollLoops) {
	unsigned maxBlockVisitOnPath = AMgr.options.maxBlockVisitOnPath;
	const Stmt *Term = nodeBuilder.getContext().getBlock()->getTerminatorStmt();
	if (Term) {
	ProgramStateRef NewState = updateLoopStack(Term, AMgr.getASTContext(),
	Pred, maxBlockVisitOnPath);
	if (NewState != Pred->getState()) {
	ExplodedNode *UpdatedNode = nodeBuilder.generateNode(NewState, Pred);
	if (!UpdatedNode)
	return;
	Pred = UpdatedNode;
	}
	}
	// Is we are inside an unrolled loop then no need the check the counters.
	if(isUnrolledState(Pred->getState()))
	return;
	}

	// If this block is terminated by a loop and it has already been visited the
	// maximum number of times, widen the loop.
	unsigned int BlockCount = nodeBuilder.getContext().blockCount();
	if (BlockCount == AMgr.options.maxBlockVisitOnPath - 1 &&
	AMgr.options.ShouldWidenLoops) {
	const Stmt *Term = nodeBuilder.getContext().getBlock()->getTerminatorStmt();
	if (!isa_and_nonnull<ForStmt, WhileStmt, DoStmt, CXXForRangeStmt>(Term))
	return;
	// Widen.
	const LocationContext *LCtx = Pred->getLocationContext();
	ProgramStateRef WidenedState =
	getWidenedLoopState(Pred->getState(), LCtx, BlockCount, Term);
	nodeBuilder.generateNode(WidenedState, Pred);
	return;
	}

	// FIXME: Refactor this into a checker.
	if (BlockCount >= AMgr.options.maxBlockVisitOnPath) {
	static SimpleProgramPointTag tag(TagProviderName, "Block count exceeded");
	const ExplodedNode *Sink =
	nodeBuilder.generateSink(Pred->getState(), Pred, &tag);

	// Check if we stopped at the top level function or not.
	// Root node should have the location context of the top most function.
	const LocationContext *CalleeLC = Pred->getLocation().getLocationContext();
	const LocationContext *CalleeSF = CalleeLC->getStackFrame();
	const LocationContext *RootLC =
	(*G.roots_begin())->getLocation().getLocationContext();
	if (RootLC->getStackFrame() != CalleeSF) {
	Engine.FunctionSummaries->markReachedMaxBlockCount(CalleeSF->getDecl());

	// Re-run the call evaluation without inlining it, by storing the
	// no-inlining policy in the state and enqueuing the new work item on
	// the list. Replay should almost never fail. Use the stats to catch it
	// if it does.
	if ((!AMgr.options.NoRetryExhausted &&
	replayWithoutInlining(Pred, CalleeLC)))
	return;
	NumMaxBlockCountReachedInInlined++;
	} else
	NumMaxBlockCountReached++;

	// Make sink nodes as exhausted(for stats) only if retry failed.
	Engine.blocksExhausted.push_back(std::make_pair(L, Sink));
	}
	}

	//===----------------------------------------------------------------------===//
	// Branch processing.
	//===----------------------------------------------------------------------===//

	/// RecoverCastedSymbol - A helper function for ProcessBranch that is used
	/// to try to recover some path-sensitivity for casts of symbolic
	/// integers that promote their values (which are currently not tracked well).
	/// This function returns the SVal bound to Condition->IgnoreCasts if all the
	// cast(s) did was sign-extend the original value.
	static SVal RecoverCastedSymbol(ProgramStateRef state,
	const Stmt *Condition,
	const LocationContext *LCtx,
	ASTContext &Ctx) {

	const auto *Ex = dyn_cast<Expr>(Condition);
	if (!Ex)
	return UnknownVal();

	uint64_t bits = 0;
	bool bitsInit = false;

	while (const auto *CE = dyn_cast<CastExpr>(Ex)) {
	QualType T = CE->getType();

	if (!T->isIntegralOrEnumerationType())
	return UnknownVal();

	uint64_t newBits = Ctx.getTypeSize(T);
	if (!bitsInit \|\| newBits < bits) {
	bitsInit = true;
	bits = newBits;
	}

	Ex = CE->getSubExpr();
	}

	// We reached a non-cast. Is it a symbolic value?
	QualType T = Ex->getType();

	if (!bitsInit \|\| !T->isIntegralOrEnumerationType() \|\|
	Ctx.getTypeSize(T) > bits)
	return UnknownVal();

	return state->getSVal(Ex, LCtx);
	}

	#ifndef NDEBUG
	static const Stmt getRightmostLeaf(const Stmt Condition) {
	while (Condition) {
	const auto *BO = dyn_cast<BinaryOperator>(Condition);
	if (!BO \|\| !BO->isLogicalOp()) {
	return Condition;
	}
	Condition = BO->getRHS()->IgnoreParens();
	}
	return nullptr;
	}
	#endif

	// Returns the condition the branch at the end of 'B' depends on and whose value
	// has been evaluated within 'B'.
	// In most cases, the terminator condition of 'B' will be evaluated fully in
	// the last statement of 'B'; in those cases, the resolved condition is the
	// given 'Condition'.
	// If the condition of the branch is a logical binary operator tree, the CFG is
	// optimized: in that case, we know that the expression formed by all but the
	// rightmost leaf of the logical binary operator tree must be true, and thus
	// the branch condition is at this point equivalent to the truth value of that
	// rightmost leaf; the CFG block thus only evaluates this rightmost leaf
	// expression in its final statement. As the full condition in that case was
	// not evaluated, and is thus not in the SVal cache, we need to use that leaf
	// expression to evaluate the truth value of the condition in the current state
	// space.
	static const Stmt ResolveCondition(const Stmt Condition,
	const CFGBlock *B) {
	if (const auto *Ex = dyn_cast<Expr>(Condition))
	Condition = Ex->IgnoreParens();

	const auto *BO = dyn_cast<BinaryOperator>(Condition);
	if (!BO \|\| !BO->isLogicalOp())
	return Condition;

	assert(B->getTerminator().isStmtBranch() &&
	"Other kinds of branches are handled separately!");

	// For logical operations, we still have the case where some branches
	// use the traditional "merge" approach and others sink the branch
	// directly into the basic blocks representing the logical operation.
	// We need to distinguish between those two cases here.

	// The invariants are still shifting, but it is possible that the
	// last element in a CFGBlock is not a CFGStmt. Look for the last
	// CFGStmt as the value of the condition.
	for (CFGElement Elem : llvm::reverse(*B)) {
	std::optional<CFGStmt> CS = Elem.getAs<CFGStmt>();
	if (!CS)
	continue;
	const Stmt *LastStmt = CS->getStmt();
	assert(LastStmt == Condition \|\| LastStmt == getRightmostLeaf(Condition));
	return LastStmt;
	}
	llvm_unreachable("could not resolve condition");
	}

	using ObjCForLctxPair =
	std::pair<const ObjCForCollectionStmt , const LocationContext >;

	REGISTER_MAP_WITH_PROGRAMSTATE(ObjCForHasMoreIterations, ObjCForLctxPair, bool)

	ProgramStateRef ExprEngine::setWhetherHasMoreIteration(
	ProgramStateRef State, const ObjCForCollectionStmt *O,
	const LocationContext *LC, bool HasMoreIteraton) {
	assert(!State->contains<ObjCForHasMoreIterations>({O, LC}));
	return State->set<ObjCForHasMoreIterations>({O, LC}, HasMoreIteraton);
	}

	ProgramStateRef
	ExprEngine::removeIterationState(ProgramStateRef State,
	const ObjCForCollectionStmt *O,
	const LocationContext *LC) {
	assert(State->contains<ObjCForHasMoreIterations>({O, LC}));
	return State->remove<ObjCForHasMoreIterations>({O, LC});
	}

	bool ExprEngine::hasMoreIteration(ProgramStateRef State,
	const ObjCForCollectionStmt *O,
	const LocationContext *LC) {
	assert(State->contains<ObjCForHasMoreIterations>({O, LC}));
	return *State->get<ObjCForHasMoreIterations>({O, LC});
	}

	/// Split the state on whether there are any more iterations left for this loop.
	/// Returns a (HasMoreIteration, HasNoMoreIteration) pair, or std::nullopt when
	/// the acquisition of the loop condition value failed.
	static std::optional<std::pair<ProgramStateRef, ProgramStateRef>>
	assumeCondition(const Stmt Condition, ExplodedNode N) {
	ProgramStateRef State = N->getState();
	if (const auto *ObjCFor = dyn_cast<ObjCForCollectionStmt>(Condition)) {
	bool HasMoreIteraton =
	ExprEngine::hasMoreIteration(State, ObjCFor, N->getLocationContext());
	// Checkers have already ran on branch conditions, so the current
	// information as to whether the loop has more iteration becomes outdated
	// after this point.
	State = ExprEngine::removeIterationState(State, ObjCFor,
	N->getLocationContext());
	if (HasMoreIteraton)
	return std::pair<ProgramStateRef, ProgramStateRef>{State, nullptr};
	else
	return std::pair<ProgramStateRef, ProgramStateRef>{nullptr, State};
	}
	SVal X = State->getSVal(Condition, N->getLocationContext());

	if (X.isUnknownOrUndef()) {
	// Give it a chance to recover from unknown.
	if (const auto *Ex = dyn_cast<Expr>(Condition)) {
	if (Ex->getType()->isIntegralOrEnumerationType()) {
	// Try to recover some path-sensitivity. Right now casts of symbolic
	// integers that promote their values are currently not tracked well.
	// If 'Condition' is such an expression, try and recover the
	// underlying value and use that instead.
	SVal recovered =
	RecoverCastedSymbol(State, Condition, N->getLocationContext(),
	N->getState()->getStateManager().getContext());

	if (!recovered.isUnknown()) {
	X = recovered;
	}
	}
	}
	}

	// If the condition is still unknown, give up.
	if (X.isUnknownOrUndef())
	return std::nullopt;

	DefinedSVal V = X.castAs<DefinedSVal>();

	ProgramStateRef StTrue, StFalse;
	return State->assume(V);
	}

	void ExprEngine::processBranch(const Stmt *Condition,
	NodeBuilderContext& BldCtx,
	ExplodedNode *Pred,
	ExplodedNodeSet &Dst,
	const CFGBlock *DstT,
	const CFGBlock *DstF) {
	assert((!Condition \|\| !isa<CXXBindTemporaryExpr>(Condition)) &&
	"CXXBindTemporaryExprs are handled by processBindTemporary.");
	const LocationContext *LCtx = Pred->getLocationContext();
	PrettyStackTraceLocationContext StackCrashInfo(LCtx);
	currBldrCtx = &BldCtx;

	// Check for NULL conditions; e.g. "for(;;)"
	if (!Condition) {
	BranchNodeBuilder NullCondBldr(Pred, Dst, BldCtx, DstT, DstF);
	NullCondBldr.markInfeasible(false);
	NullCondBldr.generateNode(Pred->getState(), true, Pred);
	return;
	}

	if (const auto *Ex = dyn_cast<Expr>(Condition))
	Condition = Ex->IgnoreParens();

	Condition = ResolveCondition(Condition, BldCtx.getBlock());
	PrettyStackTraceLoc CrashInfo(getContext().getSourceManager(),
	Condition->getBeginLoc(),
	"Error evaluating branch");

	ExplodedNodeSet CheckersOutSet;
	getCheckerManager().runCheckersForBranchCondition(Condition, CheckersOutSet,
	Pred, *this);
	// We generated only sinks.
	if (CheckersOutSet.empty())
	return;

	BranchNodeBuilder builder(CheckersOutSet, Dst, BldCtx, DstT, DstF);
	for (ExplodedNode *PredN : CheckersOutSet) {
	if (PredN->isSink())
	continue;

	ProgramStateRef PrevState = PredN->getState();

	ProgramStateRef StTrue, StFalse;
	if (const auto KnownCondValueAssumption = assumeCondition(Condition, PredN))
	std::tie(StTrue, StFalse) = *KnownCondValueAssumption;
	else {
	assert(!isa<ObjCForCollectionStmt>(Condition));
	builder.generateNode(PrevState, true, PredN);
	builder.generateNode(PrevState, false, PredN);
	continue;
	}
	if (StTrue && StFalse)
	assert(!isa<ObjCForCollectionStmt>(Condition));

	// Process the true branch.
	if (builder.isFeasible(true)) {
	if (StTrue)
	builder.generateNode(StTrue, true, PredN);
	else
	builder.markInfeasible(true);
	}

	// Process the false branch.
	if (builder.isFeasible(false)) {
	if (StFalse)
	builder.generateNode(StFalse, false, PredN);
	else
	builder.markInfeasible(false);
	}
	}
	currBldrCtx = nullptr;
	}

	/// The GDM component containing the set of global variables which have been
	/// previously initialized with explicit initializers.
	REGISTER_TRAIT_WITH_PROGRAMSTATE(InitializedGlobalsSet,
	llvm::ImmutableSet<const VarDecl *>)

	void ExprEngine::processStaticInitializer(const DeclStmt *DS,
	NodeBuilderContext &BuilderCtx,
	ExplodedNode *Pred,
	ExplodedNodeSet &Dst,
	const CFGBlock *DstT,
	const CFGBlock *DstF) {
	PrettyStackTraceLocationContext CrashInfo(Pred->getLocationContext());
	currBldrCtx = &BuilderCtx;

	const auto *VD = cast<VarDecl>(DS->getSingleDecl());
	ProgramStateRef state = Pred->getState();
	bool initHasRun = state->contains<InitializedGlobalsSet>(VD);
	BranchNodeBuilder builder(Pred, Dst, BuilderCtx, DstT, DstF);

	if (!initHasRun) {
	state = state->add<InitializedGlobalsSet>(VD);
	}

	builder.generateNode(state, initHasRun, Pred);
	builder.markInfeasible(!initHasRun);

	currBldrCtx = nullptr;
	}

	/// processIndirectGoto - Called by CoreEngine. Used to generate successor
	/// nodes by processing the 'effects' of a computed goto jump.
	void ExprEngine::processIndirectGoto(IndirectGotoNodeBuilder &builder) {
	ProgramStateRef state = builder.getState();
	SVal V = state->getSVal(builder.getTarget(), builder.getLocationContext());

	// Three possibilities:
	//
	// (1) We know the computed label.
	// (2) The label is NULL (or some other constant), or Undefined.
	// (3) We have no clue about the label. Dispatch to all targets.
	//

	using iterator = IndirectGotoNodeBuilder::iterator;

	if (std::optional<loc::GotoLabel> LV = V.getAs<loc::GotoLabel>()) {
	const LabelDecl *L = LV->getLabel();

	for (iterator Succ : builder) {
	if (Succ.getLabel() == L) {
	builder.generateNode(Succ, state);
	return;
	}
	}

	llvm_unreachable("No block with label.");
	}

	if (isa<UndefinedVal, loc::ConcreteInt>(V)) {
	// Dispatch to the first target and mark it as a sink.
	//ExplodedNode* N = builder.generateNode(builder.begin(), state, true);
	// FIXME: add checker visit.
	// UndefBranches.insert(N);
	return;
	}

	// This is really a catch-all. We don't support symbolics yet.
	// FIXME: Implement dispatch for symbolic pointers.

	for (iterator Succ : builder)
	builder.generateNode(Succ, state);
	}

	void ExprEngine::processBeginOfFunction(NodeBuilderContext &BC,
	ExplodedNode *Pred,
	ExplodedNodeSet &Dst,
	const BlockEdge &L) {
	SaveAndRestore<const NodeBuilderContext *> NodeContextRAII(currBldrCtx, &BC);
	getCheckerManager().runCheckersForBeginFunction(Dst, L, Pred, *this);
	}

	/// ProcessEndPath - Called by CoreEngine. Used to generate end-of-path
	/// nodes when the control reaches the end of a function.
	void ExprEngine::processEndOfFunction(NodeBuilderContext& BC,
	ExplodedNode *Pred,
	const ReturnStmt *RS) {
	ProgramStateRef State = Pred->getState();

	if (!Pred->getStackFrame()->inTopFrame())
	State = finishArgumentConstruction(
	State, *getStateManager().getCallEventManager().getCaller(
	Pred->getStackFrame(), Pred->getState()));

	// FIXME: We currently cannot assert that temporaries are clear, because
	// lifetime extended temporaries are not always modelled correctly. In some
	// cases when we materialize the temporary, we do
	// createTemporaryRegionIfNeeded(), and the region changes, and also the
	// respective destructor becomes automatic from temporary. So for now clean up
	// the state manually before asserting. Ideally, this braced block of code
	// should go away.
	{
	const LocationContext *FromLC = Pred->getLocationContext();
	const LocationContext *ToLC = FromLC->getStackFrame()->getParent();
	const LocationContext *LC = FromLC;
	while (LC != ToLC) {
	assert(LC && "ToLC must be a parent of FromLC!");
	for (auto I : State->get<ObjectsUnderConstruction>())
	if (I.first.getLocationContext() == LC) {
	// The comment above only pardons us for not cleaning up a
	// temporary destructor. If any other statements are found here,
	// it must be a separate problem.
	assert(I.first.getItem().getKind() ==
	ConstructionContextItem::TemporaryDestructorKind \|\|
	I.first.getItem().getKind() ==
	ConstructionContextItem::ElidedDestructorKind);
	State = State->remove<ObjectsUnderConstruction>(I.first);
	}
	LC = LC->getParent();
	}
	}

	// Perform the transition with cleanups.
	if (State != Pred->getState()) {
	ExplodedNodeSet PostCleanup;
	NodeBuilder Bldr(Pred, PostCleanup, BC);
	Pred = Bldr.generateNode(Pred->getLocation(), State, Pred);
	if (!Pred) {
	// The node with clean temporaries already exists. We might have reached
	// it on a path on which we initialize different temporaries.
	return;
	}
	}

	assert(areAllObjectsFullyConstructed(Pred->getState(),
	Pred->getLocationContext(),
	Pred->getStackFrame()->getParent()));

	PrettyStackTraceLocationContext CrashInfo(Pred->getLocationContext());

	ExplodedNodeSet Dst;
	if (Pred->getLocationContext()->inTopFrame()) {
	// Remove dead symbols.
	ExplodedNodeSet AfterRemovedDead;
	removeDeadOnEndOfFunction(BC, Pred, AfterRemovedDead);

	// Notify checkers.
	for (const auto I : AfterRemovedDead)
	getCheckerManager().runCheckersForEndFunction(BC, Dst, I, *this, RS);
	} else {
	getCheckerManager().runCheckersForEndFunction(BC, Dst, Pred, *this, RS);
	}

	Engine.enqueueEndOfFunction(Dst, RS);
	}

	/// ProcessSwitch - Called by CoreEngine. Used to generate successor
	/// nodes by processing the 'effects' of a switch statement.
	void ExprEngine::processSwitch(SwitchNodeBuilder& builder) {
	using iterator = SwitchNodeBuilder::iterator;

	ProgramStateRef state = builder.getState();
	const Expr *CondE = builder.getCondition();
	SVal CondV_untested = state->getSVal(CondE, builder.getLocationContext());

	if (CondV_untested.isUndef()) {
	//ExplodedNode* N = builder.generateDefaultCaseNode(state, true);
	// FIXME: add checker
	//UndefBranches.insert(N);

	return;
	}
	DefinedOrUnknownSVal CondV = CondV_untested.castAs<DefinedOrUnknownSVal>();

	ProgramStateRef DefaultSt = state;

	iterator I = builder.begin(), EI = builder.end();
	bool defaultIsFeasible = I == EI;

	for ( ; I != EI; ++I) {
	// Successor may be pruned out during CFG construction.
	if (!I.getBlock())
	continue;

	const CaseStmt *Case = I.getCase();

	// Evaluate the LHS of the case value.
	llvm::APSInt V1 = Case->getLHS()->EvaluateKnownConstInt(getContext());
	assert(V1.getBitWidth() == getContext().getIntWidth(CondE->getType()));

	// Get the RHS of the case, if it exists.
	llvm::APSInt V2;
	if (const Expr *E = Case->getRHS())
	V2 = E->EvaluateKnownConstInt(getContext());
	else
	V2 = V1;

	ProgramStateRef StateCase;
	if (std::optional<NonLoc> NL = CondV.getAs<NonLoc>())
	std::tie(StateCase, DefaultSt) =
	DefaultSt->assumeInclusiveRange(*NL, V1, V2);
	else // UnknownVal
	StateCase = DefaultSt;

	if (StateCase)
	builder.generateCaseStmtNode(I, StateCase);

	// Now "assume" that the case doesn't match. Add this state
	// to the default state (if it is feasible).
	if (DefaultSt)
	defaultIsFeasible = true;
	else {
	defaultIsFeasible = false;
	break;
	}
	}

	if (!defaultIsFeasible)
	return;

	// If we have switch(enum value), the default branch is not
	// feasible if all of the enum constants not covered by 'case:' statements
	// are not feasible values for the switch condition.
	//
	// Note that this isn't as accurate as it could be. Even if there isn't
	// a case for a particular enum value as long as that enum value isn't
	// feasible then it shouldn't be considered for making 'default:' reachable.
	const SwitchStmt *SS = builder.getSwitch();
	const Expr *CondExpr = SS->getCond()->IgnoreParenImpCasts();
	if (CondExpr->getType()->getAs<EnumType>()) {
	if (SS->isAllEnumCasesCovered())
	return;
	}

	builder.generateDefaultCaseNode(DefaultSt);
	}

	//===----------------------------------------------------------------------===//
	// Transfer functions: Loads and stores.
	//===----------------------------------------------------------------------===//

	void ExprEngine::VisitCommonDeclRefExpr(const Expr Ex, const NamedDecl D,
	ExplodedNode *Pred,
	ExplodedNodeSet &Dst) {
	StmtNodeBuilder Bldr(Pred, Dst, *currBldrCtx);

	ProgramStateRef state = Pred->getState();
	const LocationContext *LCtx = Pred->getLocationContext();

	if (const auto *VD = dyn_cast<VarDecl>(D)) {
	// C permits "extern void v", and if you cast the address to a valid type,
	// you can even do things with it. We simply pretend
	assert(Ex->isGLValue() \|\| VD->getType()->isVoidType());
	const LocationContext *LocCtxt = Pred->getLocationContext();
	const Decl *D = LocCtxt->getDecl();
	const auto *MD = dyn_cast_or_null<CXXMethodDecl>(D);
	const auto *DeclRefEx = dyn_cast<DeclRefExpr>(Ex);
	std::optional<std::pair<SVal, QualType>> VInfo;

	if (AMgr.options.ShouldInlineLambdas && DeclRefEx &&
	DeclRefEx->refersToEnclosingVariableOrCapture() && MD &&
	MD->getParent()->isLambda()) {
	// Lookup the field of the lambda.
	const CXXRecordDecl *CXXRec = MD->getParent();
	llvm::DenseMap<const ValueDecl , FieldDecl > LambdaCaptureFields;
	FieldDecl *LambdaThisCaptureField;
	CXXRec->getCaptureFields(LambdaCaptureFields, LambdaThisCaptureField);

	// Sema follows a sequence of complex rules to determine whether the
	// variable should be captured.
	if (const FieldDecl *FD = LambdaCaptureFields[VD]) {
	Loc CXXThis =
	svalBuilder.getCXXThis(MD, LocCtxt->getStackFrame());
	SVal CXXThisVal = state->getSVal(CXXThis);
	VInfo = std::make_pair(state->getLValue(FD, CXXThisVal), FD->getType());
	}
	}

	if (!VInfo)
	VInfo = std::make_pair(state->getLValue(VD, LocCtxt), VD->getType());

	SVal V = VInfo->first;
	bool IsReference = VInfo->second->isReferenceType();

	// For references, the 'lvalue' is the pointer address stored in the
	// reference region.
	if (IsReference) {
	if (const MemRegion *R = V.getAsRegion())
	V = state->getSVal(R);
	else
	V = UnknownVal();
	}

	Bldr.generateNode(Ex, Pred, state->BindExpr(Ex, LCtx, V), nullptr,
	ProgramPoint::PostLValueKind);
	return;
	}
	if (const auto *ED = dyn_cast<EnumConstantDecl>(D)) {
	assert(!Ex->isGLValue());
	SVal V = svalBuilder.makeIntVal(ED->getInitVal());
	Bldr.generateNode(Ex, Pred, state->BindExpr(Ex, LCtx, V));
	return;
	}
	if (const auto *FD = dyn_cast<FunctionDecl>(D)) {
	SVal V = svalBuilder.getFunctionPointer(FD);
	Bldr.generateNode(Ex, Pred, state->BindExpr(Ex, LCtx, V), nullptr,
	ProgramPoint::PostLValueKind);
	return;
	}
	if (isa<FieldDecl, IndirectFieldDecl>(D)) {
	// Delegate all work related to pointer to members to the surrounding
	// operator&.
	return;
	}
	if (const auto *BD = dyn_cast<BindingDecl>(D)) {
	const auto *DD = cast<DecompositionDecl>(BD->getDecomposedDecl());

	SVal Base = state->getLValue(DD, LCtx);
	if (DD->getType()->isReferenceType()) {
	if (const MemRegion *R = Base.getAsRegion())
	Base = state->getSVal(R);
	else
	Base = UnknownVal();
	}

	SVal V = UnknownVal();

	// Handle binding to data members
	if (const auto *ME = dyn_cast<MemberExpr>(BD->getBinding())) {
	const auto *Field = cast<FieldDecl>(ME->getMemberDecl());
	V = state->getLValue(Field, Base);
	}
	// Handle binding to arrays
	else if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(BD->getBinding())) {
	SVal Idx = state->getSVal(ASE->getIdx(), LCtx);

	// Note: the index of an element in a structured binding is automatically
	// created and it is a unique identifier of the specific element. Thus it
	// cannot be a value that varies at runtime.
	assert(Idx.isConstant() && "BindingDecl array index is not a constant!");

	V = state->getLValue(BD->getType(), Idx, Base);
	}
	// Handle binding to tuple-like structures
	else if (const auto *HV = BD->getHoldingVar()) {
	V = state->getLValue(HV, LCtx);

	if (HV->getType()->isReferenceType()) {
	if (const MemRegion *R = V.getAsRegion())
	V = state->getSVal(R);
	else
	V = UnknownVal();
	}
	} else
	llvm_unreachable("An unknown case of structured binding encountered!");

	// In case of tuple-like types the references are already handled, so we
	// don't want to handle them again.
	if (BD->getType()->isReferenceType() && !BD->getHoldingVar()) {
	if (const MemRegion *R = V.getAsRegion())
	V = state->getSVal(R);
	else
	V = UnknownVal();
	}

	Bldr.generateNode(Ex, Pred, state->BindExpr(Ex, LCtx, V), nullptr,
	ProgramPoint::PostLValueKind);

	return;
	}

	if (const auto *TPO = dyn_cast<TemplateParamObjectDecl>(D)) {
	// FIXME: We should meaningfully implement this.
	(void)TPO;
	return;
	}

	llvm_unreachable("Support for this Decl not implemented.");
	}

	/// VisitArrayInitLoopExpr - Transfer function for array init loop.
	void ExprEngine::VisitArrayInitLoopExpr(const ArrayInitLoopExpr *Ex,
	ExplodedNode *Pred,
	ExplodedNodeSet &Dst) {
	ExplodedNodeSet CheckerPreStmt;
	getCheckerManager().runCheckersForPreStmt(CheckerPreStmt, Pred, Ex, *this);

	ExplodedNodeSet EvalSet;
	StmtNodeBuilder Bldr(CheckerPreStmt, EvalSet, *currBldrCtx);

	const Expr *Arr = Ex->getCommonExpr()->getSourceExpr();

	for (auto *Node : CheckerPreStmt) {

	// The constructor visitior has already taken care of everything.
	if (isa<CXXConstructExpr>(Ex->getSubExpr()))
	break;

	const LocationContext *LCtx = Node->getLocationContext();
	ProgramStateRef state = Node->getState();

	SVal Base = UnknownVal();

	// As in case of this expression the sub-expressions are not visited by any
	// other transfer functions, they are handled by matching their AST.

	// Case of implicit copy or move ctor of object with array member
	//
	// Note: ExprEngine::VisitMemberExpr is not able to bind the array to the
	// environment.
	//
	// struct S {
	// int arr[2];
	// };
	//
	//
	// S a;
	// S b = a;
	//
	// The AST in case of a copy constructor looks like this:
	// ArrayInitLoopExpr
	// \|-OpaqueValueExpr
	// \| `-MemberExpr <-- match this
	// \| `-DeclRefExpr
	// ` ...
	//
	//
	// S c;
	// S d = std::move(d);
	//
	// In case of a move constructor the resulting AST looks like:
	// ArrayInitLoopExpr
	// \|-OpaqueValueExpr
	// \| `-MemberExpr <-- match this first
	// \| `-CXXStaticCastExpr <-- match this after
	// \| `-DeclRefExpr
	// ` ...
	if (const auto *ME = dyn_cast<MemberExpr>(Arr)) {
	Expr *MEBase = ME->getBase();

	// Move ctor
	if (auto CXXSCE = dyn_cast<CXXStaticCastExpr>(MEBase)) {
	MEBase = CXXSCE->getSubExpr();
	}

	auto ObjDeclExpr = cast<DeclRefExpr>(MEBase);
	SVal Obj = state->getLValue(cast<VarDecl>(ObjDeclExpr->getDecl()), LCtx);

	Base = state->getLValue(cast<FieldDecl>(ME->getMemberDecl()), Obj);
	}

	// Case of lambda capture and decomposition declaration
	//
	// int arr[2];
	//
	// [arr]{ int a = arr[0]; }();
	// auto[a, b] = arr;
	//
	// In both of these cases the AST looks like the following:
	// ArrayInitLoopExpr
	// \|-OpaqueValueExpr
	// \| `-DeclRefExpr <-- match this
	// ` ...
	if (const DeclRefExpr *DRE = dyn_cast<DeclRefExpr>(Arr))
	Base = state->getLValue(cast<VarDecl>(DRE->getDecl()), LCtx);

	// Create a lazy compound value to the original array
	if (const MemRegion *R = Base.getAsRegion())
	Base = state->getSVal(R);
	else
	Base = UnknownVal();

	Bldr.generateNode(Ex, Pred, state->BindExpr(Ex, LCtx, Base));
	}

	getCheckerManager().runCheckersForPostStmt(Dst, EvalSet, Ex, *this);
	}

	/// VisitArraySubscriptExpr - Transfer function for array accesses
	void ExprEngine::VisitArraySubscriptExpr(const ArraySubscriptExpr *A,
	ExplodedNode *Pred,
	ExplodedNodeSet &Dst){
	const Expr *Base = A->getBase()->IgnoreParens();
	const Expr *Idx = A->getIdx()->IgnoreParens();

	ExplodedNodeSet CheckerPreStmt;
	getCheckerManager().runCheckersForPreStmt(CheckerPreStmt, Pred, A, *this);

	ExplodedNodeSet EvalSet;
	StmtNodeBuilder Bldr(CheckerPreStmt, EvalSet, *currBldrCtx);

	bool IsVectorType = A->getBase()->getType()->isVectorType();

	// The "like" case is for situations where C standard prohibits the type to
	// be an lvalue, e.g. taking the address of a subscript of an expression of
	// type "void *".
	bool IsGLValueLike = A->isGLValue() \|\|
	(A->getType().isCForbiddenLValueType() && !AMgr.getLangOpts().CPlusPlus);

	for (auto *Node : CheckerPreStmt) {
	const LocationContext *LCtx = Node->getLocationContext();
	ProgramStateRef state = Node->getState();

	if (IsGLValueLike) {
	QualType T = A->getType();

	// One of the forbidden LValue types! We still need to have sensible
	// symbolic locations to represent this stuff. Note that arithmetic on
	// void pointers is a GCC extension.
	if (T->isVoidType())
	T = getContext().CharTy;

	SVal V = state->getLValue(T,
	state->getSVal(Idx, LCtx),
	state->getSVal(Base, LCtx));
	Bldr.generateNode(A, Node, state->BindExpr(A, LCtx, V), nullptr,
	ProgramPoint::PostLValueKind);
	} else if (IsVectorType) {
	// FIXME: non-glvalue vector reads are not modelled.
	Bldr.generateNode(A, Node, state, nullptr);
	} else {
	llvm_unreachable("Array subscript should be an lValue when not \
	a vector and not a forbidden lvalue type");
	}
	}

	getCheckerManager().runCheckersForPostStmt(Dst, EvalSet, A, *this);
	}

	/// VisitMemberExpr - Transfer function for member expressions.
	void ExprEngine::VisitMemberExpr(const MemberExpr M, ExplodedNode Pred,
	ExplodedNodeSet &Dst) {
	// FIXME: Prechecks eventually go in ::Visit().
	ExplodedNodeSet CheckedSet;
	getCheckerManager().runCheckersForPreStmt(CheckedSet, Pred, M, *this);

	ExplodedNodeSet EvalSet;
	ValueDecl *Member = M->getMemberDecl();

	// Handle static member variables and enum constants accessed via
	// member syntax.
	if (isa<VarDecl, EnumConstantDecl>(Member)) {
	for (const auto I : CheckedSet)
	VisitCommonDeclRefExpr(M, Member, I, EvalSet);
	} else {
	StmtNodeBuilder Bldr(CheckedSet, EvalSet, *currBldrCtx);
	ExplodedNodeSet Tmp;

	for (const auto I : CheckedSet) {
	ProgramStateRef state = I->getState();
	const LocationContext *LCtx = I->getLocationContext();
	Expr *BaseExpr = M->getBase();

	// Handle C++ method calls.
	if (const auto *MD = dyn_cast<CXXMethodDecl>(Member)) {
	if (MD->isImplicitObjectMemberFunction())
	state = createTemporaryRegionIfNeeded(state, LCtx, BaseExpr);

	SVal MDVal = svalBuilder.getFunctionPointer(MD);
	state = state->BindExpr(M, LCtx, MDVal);

	Bldr.generateNode(M, I, state);
	continue;
	}

	// Handle regular struct fields / member variables.
	const SubRegion *MR = nullptr;
	state = createTemporaryRegionIfNeeded(state, LCtx, BaseExpr,
	/Result=/nullptr,
	/OutRegionWithAdjustments=/&MR);
	SVal baseExprVal =
	MR ? loc::MemRegionVal(MR) : state->getSVal(BaseExpr, LCtx);

	// FIXME: Copied from RegionStoreManager::bind()
	if (const auto *SR =
	dyn_cast_or_null<SymbolicRegion>(baseExprVal.getAsRegion())) {
	QualType T = SR->getPointeeStaticType();
	baseExprVal =
	loc::MemRegionVal(getStoreManager().GetElementZeroRegion(SR, T));
	}

	const auto *field = cast<FieldDecl>(Member);
	SVal L = state->getLValue(field, baseExprVal);

	if (M->isGLValue() \|\| M->getType()->isArrayType()) {
	// We special-case rvalues of array type because the analyzer cannot
	// reason about them, since we expect all regions to be wrapped in Locs.
	// We instead treat these as lvalues and assume that they will decay to
	// pointers as soon as they are used.
	if (!M->isGLValue()) {
	assert(M->getType()->isArrayType());
	const auto *PE =
	dyn_cast<ImplicitCastExpr>(I->getParentMap().getParentIgnoreParens(M));
	if (!PE \|\| PE->getCastKind() != CK_ArrayToPointerDecay) {
	llvm_unreachable("should always be wrapped in ArrayToPointerDecay");
	}
	}

	if (field->getType()->isReferenceType()) {
	if (const MemRegion *R = L.getAsRegion())
	L = state->getSVal(R);
	else
	L = UnknownVal();
	}

	Bldr.generateNode(M, I, state->BindExpr(M, LCtx, L), nullptr,
	ProgramPoint::PostLValueKind);
	} else {
	Bldr.takeNodes(I);
	evalLoad(Tmp, M, M, I, state, L);
	Bldr.addNodes(Tmp);
	}
	}
	}

	getCheckerManager().runCheckersForPostStmt(Dst, EvalSet, M, *this);
	}

	void ExprEngine::VisitAtomicExpr(const AtomicExpr AE, ExplodedNode Pred,
	ExplodedNodeSet &Dst) {
	ExplodedNodeSet AfterPreSet;
	getCheckerManager().runCheckersForPreStmt(AfterPreSet, Pred, AE, *this);

	// For now, treat all the arguments to C11 atomics as escaping.
	// FIXME: Ideally we should model the behavior of the atomics precisely here.

	ExplodedNodeSet AfterInvalidateSet;
	StmtNodeBuilder Bldr(AfterPreSet, AfterInvalidateSet, *currBldrCtx);

	for (const auto I : AfterPreSet) {
	ProgramStateRef State = I->getState();
	const LocationContext *LCtx = I->getLocationContext();

	SmallVector<SVal, 8> ValuesToInvalidate;
	for (unsigned SI = 0, Count = AE->getNumSubExprs(); SI != Count; SI++) {
	const Expr *SubExpr = AE->getSubExprs()[SI];
	SVal SubExprVal = State->getSVal(SubExpr, LCtx);
	ValuesToInvalidate.push_back(SubExprVal);
	}

	State = State->invalidateRegions(ValuesToInvalidate, AE,
	currBldrCtx->blockCount(),
	LCtx,
	/CausedByPointerEscape/true,
	/Symbols=/nullptr);

	SVal ResultVal = UnknownVal();
	State = State->BindExpr(AE, LCtx, ResultVal);
	Bldr.generateNode(AE, I, State, nullptr,
	ProgramPoint::PostStmtKind);
	}

	getCheckerManager().runCheckersForPostStmt(Dst, AfterInvalidateSet, AE, *this);
	}

	// A value escapes in four possible cases:
	// (1) We are binding to something that is not a memory region.
	// (2) We are binding to a MemRegion that does not have stack storage.
	// (3) We are binding to a top-level parameter region with a non-trivial
	// destructor. We won't see the destructor during analysis, but it's there.
	// (4) We are binding to a MemRegion with stack storage that the store
	// does not understand.
	ProgramStateRef ExprEngine::processPointerEscapedOnBind(
	ProgramStateRef State, ArrayRef<std::pair<SVal, SVal>> LocAndVals,
	const LocationContext *LCtx, PointerEscapeKind Kind,
	const CallEvent *Call) {
	SmallVector<SVal, 8> Escaped;
	for (const std::pair<SVal, SVal> &LocAndVal : LocAndVals) {
	// Cases (1) and (2).
	const MemRegion *MR = LocAndVal.first.getAsRegion();
	if (!MR \|\|
	!isa<StackSpaceRegion, StaticGlobalSpaceRegion>(MR->getMemorySpace())) {
	Escaped.push_back(LocAndVal.second);
	continue;
	}

	// Case (3).
	if (const auto *VR = dyn_cast<VarRegion>(MR->getBaseRegion()))
	if (VR->hasStackParametersStorage() && VR->getStackFrame()->inTopFrame())
	if (const auto *RD = VR->getValueType()->getAsCXXRecordDecl())
	if (!RD->hasTrivialDestructor()) {
	Escaped.push_back(LocAndVal.second);
	continue;
	}

	// Case (4): in order to test that, generate a new state with the binding
	// added. If it is the same state, then it escapes (since the store cannot
	// represent the binding).
	// Do this only if we know that the store is not supposed to generate the
	// same state.
	SVal StoredVal = State->getSVal(MR);
	if (StoredVal != LocAndVal.second)
	if (State ==
	(State->bindLoc(loc::MemRegionVal(MR), LocAndVal.second, LCtx)))
	Escaped.push_back(LocAndVal.second);
	}

	if (Escaped.empty())
	return State;

	return escapeValues(State, Escaped, Kind, Call);
	}

	ProgramStateRef
	ExprEngine::processPointerEscapedOnBind(ProgramStateRef State, SVal Loc,
	SVal Val, const LocationContext *LCtx) {
	std::pair<SVal, SVal> LocAndVal(Loc, Val);
	return processPointerEscapedOnBind(State, LocAndVal, LCtx, PSK_EscapeOnBind,
	nullptr);
	}

	ProgramStateRef
	ExprEngine::notifyCheckersOfPointerEscape(ProgramStateRef State,
	const InvalidatedSymbols *Invalidated,
	ArrayRef<const MemRegion *> ExplicitRegions,
	const CallEvent *Call,
	RegionAndSymbolInvalidationTraits &ITraits) {
	if (!Invalidated \|\| Invalidated->empty())
	return State;

	if (!Call)
	return getCheckerManager().runCheckersForPointerEscape(State,
	*Invalidated,
	nullptr,
	PSK_EscapeOther,
	&ITraits);

	// If the symbols were invalidated by a call, we want to find out which ones
	// were invalidated directly due to being arguments to the call.
	InvalidatedSymbols SymbolsDirectlyInvalidated;
	for (const auto I : ExplicitRegions) {
	if (const SymbolicRegion *R = I->StripCasts()->getAs<SymbolicRegion>())
	SymbolsDirectlyInvalidated.insert(R->getSymbol());
	}

	InvalidatedSymbols SymbolsIndirectlyInvalidated;
	for (const auto &sym : *Invalidated) {
	if (SymbolsDirectlyInvalidated.count(sym))
	continue;
	SymbolsIndirectlyInvalidated.insert(sym);
	}

	if (!SymbolsDirectlyInvalidated.empty())
	State = getCheckerManager().runCheckersForPointerEscape(State,
	SymbolsDirectlyInvalidated, Call, PSK_DirectEscapeOnCall, &ITraits);

	// Notify about the symbols that get indirectly invalidated by the call.
	if (!SymbolsIndirectlyInvalidated.empty())
	State = getCheckerManager().runCheckersForPointerEscape(State,
	SymbolsIndirectlyInvalidated, Call, PSK_IndirectEscapeOnCall, &ITraits);

	return State;
	}

	/// evalBind - Handle the semantics of binding a value to a specific location.
	/// This method is used by evalStore and (soon) VisitDeclStmt, and others.
	void ExprEngine::evalBind(ExplodedNodeSet &Dst, const Stmt *StoreE,
	ExplodedNode *Pred,
	SVal location, SVal Val,
	bool atDeclInit, const ProgramPoint *PP) {
	const LocationContext *LC = Pred->getLocationContext();
	PostStmt PS(StoreE, LC);
	if (!PP)
	PP = &PS;

	// Do a previsit of the bind.
	ExplodedNodeSet CheckedSet;
	getCheckerManager().runCheckersForBind(CheckedSet, Pred, location, Val,
	StoreE, this, PP);

	StmtNodeBuilder Bldr(CheckedSet, Dst, *currBldrCtx);

	// If the location is not a 'Loc', it will already be handled by
	// the checkers. There is nothing left to do.
	if (!isa<Loc>(location)) {
	const ProgramPoint L = PostStore(StoreE, LC, /Loc/nullptr,
	/tag/nullptr);
	ProgramStateRef state = Pred->getState();
	state = processPointerEscapedOnBind(state, location, Val, LC);
	Bldr.generateNode(L, state, Pred);
	return;
	}

	for (const auto PredI : CheckedSet) {
	ProgramStateRef state = PredI->getState();

	state = processPointerEscapedOnBind(state, location, Val, LC);

	// When binding the value, pass on the hint that this is a initialization.
	// For initializations, we do not need to inform clients of region
	// changes.
	state = state->bindLoc(location.castAs<Loc>(),
	Val, LC, /* notifyChanges = */ !atDeclInit);

	const MemRegion *LocReg = nullptr;
	if (std::optional<loc::MemRegionVal> LocRegVal =
	location.getAs<loc::MemRegionVal>()) {
	LocReg = LocRegVal->getRegion();
	}

	const ProgramPoint L = PostStore(StoreE, LC, LocReg, nullptr);
	Bldr.generateNode(L, state, PredI);
	}
	}

	/// evalStore - Handle the semantics of a store via an assignment.
	/// @param Dst The node set to store generated state nodes
	/// @param AssignE The assignment expression if the store happens in an
	/// assignment.
	/// @param LocationE The location expression that is stored to.
	/// @param state The current simulation state
	/// @param location The location to store the value
	/// @param Val The value to be stored
	void ExprEngine::evalStore(ExplodedNodeSet &Dst, const Expr *AssignE,
	const Expr *LocationE,
	ExplodedNode *Pred,
	ProgramStateRef state, SVal location, SVal Val,
	const ProgramPointTag *tag) {
	// Proceed with the store. We use AssignE as the anchor for the PostStore
	// ProgramPoint if it is non-NULL, and LocationE otherwise.
	const Expr *StoreE = AssignE ? AssignE : LocationE;

	// Evaluate the location (checks for bad dereferences).
	ExplodedNodeSet Tmp;
	evalLocation(Tmp, AssignE, LocationE, Pred, state, location, false);

	if (Tmp.empty())
	return;

	if (location.isUndef())
	return;

	for (const auto I : Tmp)
	evalBind(Dst, StoreE, I, location, Val, false);
	}

	void ExprEngine::evalLoad(ExplodedNodeSet &Dst,
	const Expr *NodeEx,
	const Expr *BoundEx,
	ExplodedNode *Pred,
	ProgramStateRef state,
	SVal location,
	const ProgramPointTag *tag,
	QualType LoadTy) {
	assert(!isa<NonLoc>(location) && "location cannot be a NonLoc.");
	assert(NodeEx);
	assert(BoundEx);
	// Evaluate the location (checks for bad dereferences).
	ExplodedNodeSet Tmp;
	evalLocation(Tmp, NodeEx, BoundEx, Pred, state, location, true);
	if (Tmp.empty())
	return;

	StmtNodeBuilder Bldr(Tmp, Dst, *currBldrCtx);
	if (location.isUndef())
	return;

	// Proceed with the load.
	for (const auto I : Tmp) {
	state = I->getState();
	const LocationContext *LCtx = I->getLocationContext();

	SVal V = UnknownVal();
	if (location.isValid()) {
	if (LoadTy.isNull())
	LoadTy = BoundEx->getType();
	V = state->getSVal(location.castAs<Loc>(), LoadTy);
	}

	Bldr.generateNode(NodeEx, I, state->BindExpr(BoundEx, LCtx, V), tag,
	ProgramPoint::PostLoadKind);
	}
	}

	void ExprEngine::evalLocation(ExplodedNodeSet &Dst,
	const Stmt *NodeEx,
	const Stmt *BoundEx,
	ExplodedNode *Pred,
	ProgramStateRef state,
	SVal location,
	bool isLoad) {
	StmtNodeBuilder BldrTop(Pred, Dst, *currBldrCtx);
	// Early checks for performance reason.
	if (location.isUnknown()) {
	return;
	}

	ExplodedNodeSet Src;
	BldrTop.takeNodes(Pred);
	StmtNodeBuilder Bldr(Pred, Src, *currBldrCtx);
	if (Pred->getState() != state) {
	// Associate this new state with an ExplodedNode.
	// FIXME: If I pass null tag, the graph is incorrect, e.g for
	// int *p;
	// p = 0;
	// *p = 0xDEADBEEF;
	// "p = 0" is not noted as "Null pointer value stored to 'p'" but
	// instead "int *p" is noted as
	// "Variable 'p' initialized to a null pointer value"

	static SimpleProgramPointTag tag(TagProviderName, "Location");
	Bldr.generateNode(NodeEx, Pred, state, &tag);
	}
	ExplodedNodeSet Tmp;
	getCheckerManager().runCheckersForLocation(Tmp, Src, location, isLoad,
	NodeEx, BoundEx, *this);
	BldrTop.addNodes(Tmp);
	}

	std::pair<const ProgramPointTag , const ProgramPointTag>
	ExprEngine::geteagerlyAssumeBinOpBifurcationTags() {
	static SimpleProgramPointTag
	eagerlyAssumeBinOpBifurcationTrue(TagProviderName,
	"Eagerly Assume True"),
	eagerlyAssumeBinOpBifurcationFalse(TagProviderName,
	"Eagerly Assume False");
	return std::make_pair(&eagerlyAssumeBinOpBifurcationTrue,
	&eagerlyAssumeBinOpBifurcationFalse);
	}

	void ExprEngine::evalEagerlyAssumeBinOpBifurcation(ExplodedNodeSet &Dst,
	ExplodedNodeSet &Src,
	const Expr *Ex) {
	StmtNodeBuilder Bldr(Src, Dst, *currBldrCtx);

	for (const auto Pred : Src) {
	// Test if the previous node was as the same expression. This can happen
	// when the expression fails to evaluate to anything meaningful and
	// (as an optimization) we don't generate a node.
	ProgramPoint P = Pred->getLocation();
	if (!P.getAs<PostStmt>() \|\| P.castAs<PostStmt>().getStmt() != Ex) {
	continue;
	}

	ProgramStateRef state = Pred->getState();
	SVal V = state->getSVal(Ex, Pred->getLocationContext());
	std::optional<nonloc::SymbolVal> SEV = V.getAs<nonloc::SymbolVal>();
	if (SEV && SEV->isExpression()) {
	const std::pair<const ProgramPointTag , const ProgramPointTag> &tags =
	geteagerlyAssumeBinOpBifurcationTags();

	ProgramStateRef StateTrue, StateFalse;
	std::tie(StateTrue, StateFalse) = state->assume(*SEV);

	// First assume that the condition is true.
	if (StateTrue) {
	SVal Val = svalBuilder.makeIntVal(1U, Ex->getType());
	StateTrue = StateTrue->BindExpr(Ex, Pred->getLocationContext(), Val);
	Bldr.generateNode(Ex, Pred, StateTrue, tags.first);
	}

	// Next, assume that the condition is false.
	if (StateFalse) {
	SVal Val = svalBuilder.makeIntVal(0U, Ex->getType());
	StateFalse = StateFalse->BindExpr(Ex, Pred->getLocationContext(), Val);
	Bldr.generateNode(Ex, Pred, StateFalse, tags.second);
	}
	}
	}
	}

	void ExprEngine::VisitGCCAsmStmt(const GCCAsmStmt A, ExplodedNode Pred,
	ExplodedNodeSet &Dst) {
	StmtNodeBuilder Bldr(Pred, Dst, *currBldrCtx);
	// We have processed both the inputs and the outputs. All of the outputs
	// should evaluate to Locs. Nuke all of their values.

	// FIXME: Some day in the future it would be nice to allow a "plug-in"
	// which interprets the inline asm and stores proper results in the
	// outputs.

	ProgramStateRef state = Pred->getState();

	for (const Expr *O : A->outputs()) {
	SVal X = state->getSVal(O, Pred->getLocationContext());
	assert(!isa<NonLoc>(X)); // Should be an Lval, or unknown, undef.

	if (std::optional<Loc> LV = X.getAs<Loc>())
	state = state->bindLoc(*LV, UnknownVal(), Pred->getLocationContext());
	}

	Bldr.generateNode(A, Pred, state);
	}

	void ExprEngine::VisitMSAsmStmt(const MSAsmStmt A, ExplodedNode Pred,
	ExplodedNodeSet &Dst) {
	StmtNodeBuilder Bldr(Pred, Dst, *currBldrCtx);
	Bldr.generateNode(A, Pred, Pred->getState());
	}

	//===----------------------------------------------------------------------===//
	// Visualization.
	//===----------------------------------------------------------------------===//

	namespace llvm {

	template<>
	struct DOTGraphTraits<ExplodedGraph*> : public DefaultDOTGraphTraits {
	DOTGraphTraits (bool isSimple = false) : DefaultDOTGraphTraits(isSimple) {}

	static bool nodeHasBugReport(const ExplodedNode *N) {
	BugReporter &BR = static_cast<ExprEngine &>(
	N->getState()->getStateManager().getOwningEngine()).getBugReporter();

	for (const auto &Class : BR.equivalenceClasses()) {
	for (const auto &Report : Class.getReports()) {
	const auto *PR = dyn_cast<PathSensitiveBugReport>(Report.get());
	if (!PR)
	continue;
	const ExplodedNode *EN = PR->getErrorNode();
	if (EN->getState() == N->getState() &&
	EN->getLocation() == N->getLocation())
	return true;
	}
	}
	return false;
	}

	/// \p PreCallback: callback before break.
	/// \p PostCallback: callback after break.
	/// \p Stop: stop iteration if returns @c true
	/// \return Whether @c Stop ever returned @c true.
	static bool traverseHiddenNodes(
	const ExplodedNode *N,
	llvm::function_ref<void(const ExplodedNode *)> PreCallback,
	llvm::function_ref<void(const ExplodedNode *)> PostCallback,
	llvm::function_ref<bool(const ExplodedNode *)> Stop) {
	while (true) {
	PreCallback(N);
	if (Stop(N))
	return true;

	if (N->succ_size() != 1 \|\| !isNodeHidden(N->getFirstSucc(), nullptr))
	break;
	PostCallback(N);

	N = N->getFirstSucc();
	}
	return false;
	}

	static bool isNodeHidden(const ExplodedNode N, const ExplodedGraph G) {
	return N->isTrivial();
	}

	static std::string getNodeLabel(const ExplodedNode N, ExplodedGraph G){
	std::string Buf;
	llvm::raw_string_ostream Out(Buf);

	const bool IsDot = true;
	const unsigned int Space = 1;
	ProgramStateRef State = N->getState();

	Out << "{ \"state_id\": " << State->getID()
	<< ",\\l";

	Indent(Out, Space, IsDot) << "\"program_points\": [\\l";

	// Dump program point for all the previously skipped nodes.
	traverseHiddenNodes(
	N,
	[&](const ExplodedNode *OtherNode) {
	Indent(Out, Space + 1, IsDot) << "{ ";
	OtherNode->getLocation().printJson(Out, /NL=/"\\l");
	Out << ", \"tag\": ";
	if (const ProgramPointTag *Tag = OtherNode->getLocation().getTag())
	Out << '\"' << Tag->getTagDescription() << '\"';
	else
	Out << "null";
	Out << ", \"node_id\": " << OtherNode->getID() <<
	", \"is_sink\": " << OtherNode->isSink() <<
	", \"has_report\": " << nodeHasBugReport(OtherNode) << " }";
	},
	// Adds a comma and a new-line between each program point.
	[&](const ExplodedNode *) { Out << ",\\l"; },
	[&](const ExplodedNode *) { return false; });

	Out << "\\l"; // Adds a new-line to the last program point.
	Indent(Out, Space, IsDot) << "],\\l";

	State->printDOT(Out, N->getLocationContext(), Space);

	Out << "\\l}\\l";
	return Buf;
	}
	};

	} // namespace llvm

	void ExprEngine::ViewGraph(bool trim) {
	std::string Filename = DumpGraph(trim);
	llvm::DisplayGraph(Filename, false, llvm::GraphProgram::DOT);
	}

	void ExprEngine::ViewGraph(ArrayRef<const ExplodedNode *> Nodes) {
	std::string Filename = DumpGraph(Nodes);
	llvm::DisplayGraph(Filename, false, llvm::GraphProgram::DOT);
	}

	std::string ExprEngine::DumpGraph(bool trim, StringRef Filename) {
	if (trim) {
	std::vector<const ExplodedNode *> Src;

	// Iterate through the reports and get their nodes.
	for (const auto &Class : BR.equivalenceClasses()) {
	const auto *R =
	dyn_cast<PathSensitiveBugReport>(Class.getReports()[0].get());
	if (!R)
	continue;
	const auto N = const_cast<ExplodedNode >(R->getErrorNode());
	Src.push_back(N);
	}
	return DumpGraph(Src, Filename);
	}

	return llvm::WriteGraph(&G, "ExprEngine", /ShortNames=/false,
	/Title=/"Exploded Graph",
	/Filename=/std::string(Filename));
	}

	std::string ExprEngine::DumpGraph(ArrayRef<const ExplodedNode *> Nodes,
	StringRef Filename) {
	std::unique_ptr<ExplodedGraph> TrimmedG(G.trim(Nodes));

	if (!TrimmedG.get()) {
	llvm::errs() << "warning: Trimmed ExplodedGraph is empty.\n";
	return "";
	}

	return llvm::WriteGraph(TrimmedG.get(), "TrimmedExprEngine",
	/ShortNames=/false,
	/Title=/"Trimmed Exploded Graph",
	/Filename=/std::string(Filename));
	}

	void *ProgramStateTrait<ReplayWithoutInlining>::GDMIndex() {
	static int index = 0;
	return &index;
	}

	void ExprEngine::anchor() { }
	diff --git a/contrib/llvm-project/compiler-rt/lib/builtins/cpu_model/x86.c b/contrib/llvm-project/compiler-rt/lib/builtins/cpu_model/x86.c
	index 867ed97e57bf..b1c4abd9d11d 100644
	--- a/contrib/llvm-project/compiler-rt/lib/builtins/cpu_model/x86.c
	+++ b/contrib/llvm-project/compiler-rt/lib/builtins/cpu_model/x86.c
	@@ -1,1185 +1,1205 @@
	//===-- cpu_model/x86.c - Support for __cpu_model builtin --------- C --===//
	//
	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
	// See https://llvm.org/LICENSE.txt for license information.
	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
	//
	//===----------------------------------------------------------------------===//
	//
	// This file is based on LLVM's lib/Support/Host.cpp.
	// It implements the operating system Host concept and builtin
	// __cpu_model for the compiler_rt library for x86.
	//
	//===----------------------------------------------------------------------===//

	#include "cpu_model.h"

	#if !(defined(__i386__) \|\| defined(_M_IX86) \|\| defined(__x86_64__) \|\| \
	defined(_M_X64))
	#error This file is intended only for x86-based targets
	#endif

	#if defined(__GNUC__) \|\| defined(__clang__) \|\| defined(_MSC_VER)

	#include <assert.h>

	#ifdef _MSC_VER
	#include <intrin.h>
	#endif

	enum VendorSignatures {
	SIG_INTEL = 0x756e6547, // Genu
	SIG_AMD = 0x68747541, // Auth
	};

	enum ProcessorVendors {
	VENDOR_INTEL = 1,
	VENDOR_AMD,
	VENDOR_OTHER,
	VENDOR_MAX
	};

	enum ProcessorTypes {
	INTEL_BONNELL = 1,
	INTEL_CORE2,
	INTEL_COREI7,
	AMDFAM10H,
	AMDFAM15H,
	INTEL_SILVERMONT,
	INTEL_KNL,
	AMD_BTVER1,
	AMD_BTVER2,
	AMDFAM17H,
	INTEL_KNM,
	INTEL_GOLDMONT,
	INTEL_GOLDMONT_PLUS,
	INTEL_TREMONT,
	AMDFAM19H,
	ZHAOXIN_FAM7H,
	INTEL_SIERRAFOREST,
	INTEL_GRANDRIDGE,
	INTEL_CLEARWATERFOREST,
	+ AMDFAM1AH,
	CPU_TYPE_MAX
	};

	enum ProcessorSubtypes {
	INTEL_COREI7_NEHALEM = 1,
	INTEL_COREI7_WESTMERE,
	INTEL_COREI7_SANDYBRIDGE,
	AMDFAM10H_BARCELONA,
	AMDFAM10H_SHANGHAI,
	AMDFAM10H_ISTANBUL,
	AMDFAM15H_BDVER1,
	AMDFAM15H_BDVER2,
	AMDFAM15H_BDVER3,
	AMDFAM15H_BDVER4,
	AMDFAM17H_ZNVER1,
	INTEL_COREI7_IVYBRIDGE,
	INTEL_COREI7_HASWELL,
	INTEL_COREI7_BROADWELL,
	INTEL_COREI7_SKYLAKE,
	INTEL_COREI7_SKYLAKE_AVX512,
	INTEL_COREI7_CANNONLAKE,
	INTEL_COREI7_ICELAKE_CLIENT,
	INTEL_COREI7_ICELAKE_SERVER,
	AMDFAM17H_ZNVER2,
	INTEL_COREI7_CASCADELAKE,
	INTEL_COREI7_TIGERLAKE,
	INTEL_COREI7_COOPERLAKE,
	INTEL_COREI7_SAPPHIRERAPIDS,
	INTEL_COREI7_ALDERLAKE,
	AMDFAM19H_ZNVER3,
	INTEL_COREI7_ROCKETLAKE,
	ZHAOXIN_FAM7H_LUJIAZUI,
	AMDFAM19H_ZNVER4,
	INTEL_COREI7_GRANITERAPIDS,
	INTEL_COREI7_GRANITERAPIDS_D,
	INTEL_COREI7_ARROWLAKE,
	INTEL_COREI7_ARROWLAKE_S,
	INTEL_COREI7_PANTHERLAKE,
	+ AMDFAM1AH_ZNVER5,
	CPU_SUBTYPE_MAX
	};

	enum ProcessorFeatures {
	FEATURE_CMOV = 0,
	FEATURE_MMX,
	FEATURE_POPCNT,
	FEATURE_SSE,
	FEATURE_SSE2,
	FEATURE_SSE3,
	FEATURE_SSSE3,
	FEATURE_SSE4_1,
	FEATURE_SSE4_2,
	FEATURE_AVX,
	FEATURE_AVX2,
	FEATURE_SSE4_A,
	FEATURE_FMA4,
	FEATURE_XOP,
	FEATURE_FMA,
	FEATURE_AVX512F,
	FEATURE_BMI,
	FEATURE_BMI2,
	FEATURE_AES,
	FEATURE_PCLMUL,
	FEATURE_AVX512VL,
	FEATURE_AVX512BW,
	FEATURE_AVX512DQ,
	FEATURE_AVX512CD,
	FEATURE_AVX512ER,
	FEATURE_AVX512PF,
	FEATURE_AVX512VBMI,
	FEATURE_AVX512IFMA,
	FEATURE_AVX5124VNNIW,
	FEATURE_AVX5124FMAPS,
	FEATURE_AVX512VPOPCNTDQ,
	FEATURE_AVX512VBMI2,
	FEATURE_GFNI,
	FEATURE_VPCLMULQDQ,
	FEATURE_AVX512VNNI,
	FEATURE_AVX512BITALG,
	FEATURE_AVX512BF16,
	FEATURE_AVX512VP2INTERSECT,
	// FIXME: Below Features has some missings comparing to gcc, it's because gcc
	// has some not one-to-one mapped in llvm.
	// FEATURE_3DNOW,
	// FEATURE_3DNOWP,
	FEATURE_ADX = 40,
	// FEATURE_ABM,
	FEATURE_CLDEMOTE = 42,
	FEATURE_CLFLUSHOPT,
	FEATURE_CLWB,
	FEATURE_CLZERO,
	FEATURE_CMPXCHG16B,
	// FIXME: Not adding FEATURE_CMPXCHG8B is a workaround to make 'generic' as
	// a cpu string with no X86_FEATURE_COMPAT features, which is required in
	// current implementantion of cpu_specific/cpu_dispatch FMV feature.
	// FEATURE_CMPXCHG8B,
	FEATURE_ENQCMD = 48,
	FEATURE_F16C,
	FEATURE_FSGSBASE,
	// FEATURE_FXSAVE,
	// FEATURE_HLE,
	// FEATURE_IBT,
	FEATURE_LAHF_LM = 54,
	FEATURE_LM,
	FEATURE_LWP,
	FEATURE_LZCNT,
	FEATURE_MOVBE,
	FEATURE_MOVDIR64B,
	FEATURE_MOVDIRI,
	FEATURE_MWAITX,
	// FEATURE_OSXSAVE,
	FEATURE_PCONFIG = 63,
	FEATURE_PKU,
	FEATURE_PREFETCHWT1,
	FEATURE_PRFCHW,
	FEATURE_PTWRITE,
	FEATURE_RDPID,
	FEATURE_RDRND,
	FEATURE_RDSEED,
	FEATURE_RTM,
	FEATURE_SERIALIZE,
	FEATURE_SGX,
	FEATURE_SHA,
	FEATURE_SHSTK,
	FEATURE_TBM,
	FEATURE_TSXLDTRK,
	FEATURE_VAES,
	FEATURE_WAITPKG,
	FEATURE_WBNOINVD,
	FEATURE_XSAVE,
	FEATURE_XSAVEC,
	FEATURE_XSAVEOPT,
	FEATURE_XSAVES,
	FEATURE_AMX_TILE,
	FEATURE_AMX_INT8,
	FEATURE_AMX_BF16,
	FEATURE_UINTR,
	FEATURE_HRESET,
	FEATURE_KL,
	// FEATURE_AESKLE,
	FEATURE_WIDEKL = 92,
	FEATURE_AVXVNNI,
	FEATURE_AVX512FP16,
	FEATURE_X86_64_BASELINE,
	FEATURE_X86_64_V2,
	FEATURE_X86_64_V3,
	FEATURE_X86_64_V4,
	FEATURE_AVXIFMA,
	FEATURE_AVXVNNIINT8,
	FEATURE_AVXNECONVERT,
	FEATURE_CMPCCXADD,
	FEATURE_AMX_FP16,
	FEATURE_PREFETCHI,
	FEATURE_RAOINT,
	FEATURE_AMX_COMPLEX,
	FEATURE_AVXVNNIINT16,
	FEATURE_SM3,
	FEATURE_SHA512,
	FEATURE_SM4,
	FEATURE_APXF,
	FEATURE_USERMSR,
	FEATURE_AVX10_1_256,
	FEATURE_AVX10_1_512,
	CPU_FEATURE_MAX
	};

	// The check below for i386 was copied from clang's cpuid.h (__get_cpuid_max).
	// Check motivated by bug reports for OpenSSL crashing on CPUs without CPUID
	// support. Consequently, for i386, the presence of CPUID is checked first
	// via the corresponding eflags bit.
	static bool isCpuIdSupported(void) {
	#if defined(__GNUC__) \|\| defined(__clang__)
	#if defined(__i386__)
	int __cpuid_supported;
	__asm__(" pushfl\n"
	" popl %%eax\n"
	" movl %%eax,%%ecx\n"
	" xorl $0x00200000,%%eax\n"
	" pushl %%eax\n"
	" popfl\n"
	" pushfl\n"
	" popl %%eax\n"
	" movl $0,%0\n"
	" cmpl %%eax,%%ecx\n"
	" je 1f\n"
	" movl $1,%0\n"
	"1:"
	: "=r"(__cpuid_supported)
	:
	: "eax", "ecx");
	if (!__cpuid_supported)
	return false;
	#endif
	return true;
	#endif
	return true;
	}

	// This code is copied from lib/Support/Host.cpp.
	// Changes to either file should be mirrored in the other.

	/// getX86CpuIDAndInfo - Execute the specified cpuid and return the 4 values in
	/// the specified arguments. If we can't run cpuid on the host, return true.
	static bool getX86CpuIDAndInfo(unsigned value, unsigned rEAX, unsigned rEBX,
	unsigned rECX, unsigned rEDX) {
	#if defined(__GNUC__) \|\| defined(__clang__)
	#if defined(__x86_64__)
	// gcc doesn't know cpuid would clobber ebx/rbx. Preserve it manually.
	// FIXME: should we save this for Clang?
	__asm__("movq\t%%rbx, %%rsi\n\t"
	"cpuid\n\t"
	"xchgq\t%%rbx, %%rsi\n\t"
	: "=a"(rEAX), "=S"(rEBX), "=c"(rECX), "=d"(rEDX)
	: "a"(value));
	return false;
	#elif defined(__i386__)
	__asm__("movl\t%%ebx, %%esi\n\t"
	"cpuid\n\t"
	"xchgl\t%%ebx, %%esi\n\t"
	: "=a"(rEAX), "=S"(rEBX), "=c"(rECX), "=d"(rEDX)
	: "a"(value));
	return false;
	#else
	return true;
	#endif
	#elif defined(_MSC_VER)
	// The MSVC intrinsic is portable across x86 and x64.
	int registers[4];
	__cpuid(registers, value);
	*rEAX = registers[0];
	*rEBX = registers[1];
	*rECX = registers[2];
	*rEDX = registers[3];
	return false;
	#else
	return true;
	#endif
	}

	/// getX86CpuIDAndInfoEx - Execute the specified cpuid with subleaf and return
	/// the 4 values in the specified arguments. If we can't run cpuid on the host,
	/// return true.
	static bool getX86CpuIDAndInfoEx(unsigned value, unsigned subleaf,
	unsigned rEAX, unsigned rEBX, unsigned *rECX,
	unsigned *rEDX) {
	#if defined(__GNUC__) \|\| defined(__clang__)
	#if defined(__x86_64__)
	// gcc doesn't know cpuid would clobber ebx/rbx. Preserve it manually.
	// FIXME: should we save this for Clang?
	__asm__("movq\t%%rbx, %%rsi\n\t"
	"cpuid\n\t"
	"xchgq\t%%rbx, %%rsi\n\t"
	: "=a"(rEAX), "=S"(rEBX), "=c"(rECX), "=d"(rEDX)
	: "a"(value), "c"(subleaf));
	return false;
	#elif defined(__i386__)
	__asm__("movl\t%%ebx, %%esi\n\t"
	"cpuid\n\t"
	"xchgl\t%%ebx, %%esi\n\t"
	: "=a"(rEAX), "=S"(rEBX), "=c"(rECX), "=d"(rEDX)
	: "a"(value), "c"(subleaf));
	return false;
	#else
	return true;
	#endif
	#elif defined(_MSC_VER)
	int registers[4];
	__cpuidex(registers, value, subleaf);
	*rEAX = registers[0];
	*rEBX = registers[1];
	*rECX = registers[2];
	*rEDX = registers[3];
	return false;
	#else
	return true;
	#endif
	}

	// Read control register 0 (XCR0). Used to detect features such as AVX.
	static bool getX86XCR0(unsigned rEAX, unsigned rEDX) {
	#if defined(__GNUC__) \|\| defined(__clang__)
	// Check xgetbv; this uses a .byte sequence instead of the instruction
	// directly because older assemblers do not include support for xgetbv and
	// there is no easy way to conditionally compile based on the assembler used.
	__asm__(".byte 0x0f, 0x01, 0xd0" : "=a"(rEAX), "=d"(rEDX) : "c"(0));
	return false;
	#elif defined(_MSC_FULL_VER) && defined(_XCR_XFEATURE_ENABLED_MASK)
	unsigned long long Result = _xgetbv(_XCR_XFEATURE_ENABLED_MASK);
	*rEAX = Result;
	*rEDX = Result >> 32;
	return false;
	#else
	return true;
	#endif
	}

	static void detectX86FamilyModel(unsigned EAX, unsigned *Family,
	unsigned *Model) {
	*Family = (EAX >> 8) & 0xf; // Bits 8 - 11
	*Model = (EAX >> 4) & 0xf; // Bits 4 - 7
	if (Family == 6 \|\| Family == 0xf) {
	if (*Family == 0xf)
	// Examine extended family ID if family ID is F.
	*Family += (EAX >> 20) & 0xff; // Bits 20 - 27
	// Examine extended model ID if family ID is 6 or F.
	*Model += ((EAX >> 16) & 0xf) << 4; // Bits 16 - 19
	}
	}

	#define testFeature(F) (Features[F / 32] & (1 << (F % 32))) != 0

	static const char *getIntelProcessorTypeAndSubtype(unsigned Family,
	unsigned Model,
	const unsigned *Features,
	unsigned *Type,
	unsigned *Subtype) {
	// We select CPU strings to match the code in Host.cpp, but we don't use them
	// in compiler-rt.
	const char *CPU = 0;

	switch (Family) {
	case 6:
	switch (Model) {
	case 0x0f: // Intel Core 2 Duo processor, Intel Core 2 Duo mobile
	// processor, Intel Core 2 Quad processor, Intel Core 2 Quad
	// mobile processor, Intel Core 2 Extreme processor, Intel
	// Pentium Dual-Core processor, Intel Xeon processor, model
	// 0Fh. All processors are manufactured using the 65 nm process.
	case 0x16: // Intel Celeron processor model 16h. All processors are
	// manufactured using the 65 nm process
	CPU = "core2";
	*Type = INTEL_CORE2;
	break;
	case 0x17: // Intel Core 2 Extreme processor, Intel Xeon processor, model
	// 17h. All processors are manufactured using the 45 nm process.
	//
	// 45nm: Penryn , Wolfdale, Yorkfield (XE)
	case 0x1d: // Intel Xeon processor MP. All processors are manufactured using
	// the 45 nm process.
	CPU = "penryn";
	*Type = INTEL_CORE2;
	break;
	case 0x1a: // Intel Core i7 processor and Intel Xeon processor. All
	// processors are manufactured using the 45 nm process.
	case 0x1e: // Intel(R) Core(TM) i7 CPU 870 @ 2.93GHz.
	// As found in a Summer 2010 model iMac.
	case 0x1f:
	case 0x2e: // Nehalem EX
	CPU = "nehalem";
	*Type = INTEL_COREI7;
	*Subtype = INTEL_COREI7_NEHALEM;
	break;
	case 0x25: // Intel Core i7, laptop version.
	case 0x2c: // Intel Core i7 processor and Intel Xeon processor. All
	// processors are manufactured using the 32 nm process.
	case 0x2f: // Westmere EX
	CPU = "westmere";
	*Type = INTEL_COREI7;
	*Subtype = INTEL_COREI7_WESTMERE;
	break;
	case 0x2a: // Intel Core i7 processor. All processors are manufactured
	// using the 32 nm process.
	case 0x2d:
	CPU = "sandybridge";
	*Type = INTEL_COREI7;
	*Subtype = INTEL_COREI7_SANDYBRIDGE;
	break;
	case 0x3a:
	case 0x3e: // Ivy Bridge EP
	CPU = "ivybridge";
	*Type = INTEL_COREI7;
	*Subtype = INTEL_COREI7_IVYBRIDGE;
	break;

	// Haswell:
	case 0x3c:
	case 0x3f:
	case 0x45:
	case 0x46:
	CPU = "haswell";
	*Type = INTEL_COREI7;
	*Subtype = INTEL_COREI7_HASWELL;
	break;

	// Broadwell:
	case 0x3d:
	case 0x47:
	case 0x4f:
	case 0x56:
	CPU = "broadwell";
	*Type = INTEL_COREI7;
	*Subtype = INTEL_COREI7_BROADWELL;
	break;

	// Skylake:
	case 0x4e: // Skylake mobile
	case 0x5e: // Skylake desktop
	case 0x8e: // Kaby Lake mobile
	case 0x9e: // Kaby Lake desktop
	case 0xa5: // Comet Lake-H/S
	case 0xa6: // Comet Lake-U
	CPU = "skylake";
	*Type = INTEL_COREI7;
	*Subtype = INTEL_COREI7_SKYLAKE;
	break;

	// Rocketlake:
	case 0xa7:
	CPU = "rocketlake";
	*Type = INTEL_COREI7;
	*Subtype = INTEL_COREI7_ROCKETLAKE;
	break;

	// Skylake Xeon:
	case 0x55:
	*Type = INTEL_COREI7;
	if (testFeature(FEATURE_AVX512BF16)) {
	CPU = "cooperlake";
	*Subtype = INTEL_COREI7_COOPERLAKE;
	} else if (testFeature(FEATURE_AVX512VNNI)) {
	CPU = "cascadelake";
	*Subtype = INTEL_COREI7_CASCADELAKE;
	} else {
	CPU = "skylake-avx512";
	*Subtype = INTEL_COREI7_SKYLAKE_AVX512;
	}
	break;

	// Cannonlake:
	case 0x66:
	CPU = "cannonlake";
	*Type = INTEL_COREI7;
	*Subtype = INTEL_COREI7_CANNONLAKE;
	break;

	// Icelake:
	case 0x7d:
	case 0x7e:
	CPU = "icelake-client";
	*Type = INTEL_COREI7;
	*Subtype = INTEL_COREI7_ICELAKE_CLIENT;
	break;

	// Tigerlake:
	case 0x8c:
	case 0x8d:
	CPU = "tigerlake";
	*Type = INTEL_COREI7;
	*Subtype = INTEL_COREI7_TIGERLAKE;
	break;

	// Alderlake:
	case 0x97:
	case 0x9a:
	// Raptorlake:
	case 0xb7:
	case 0xba:
	case 0xbf:
	// Meteorlake:
	case 0xaa:
	case 0xac:
	// Gracemont:
	case 0xbe:
	CPU = "alderlake";
	*Type = INTEL_COREI7;
	*Subtype = INTEL_COREI7_ALDERLAKE;
	break;

	// Arrowlake:
	case 0xc5:
	CPU = "arrowlake";
	*Type = INTEL_COREI7;
	*Subtype = INTEL_COREI7_ARROWLAKE;
	break;

	// Arrowlake S:
	case 0xc6:
	// Lunarlake:
	case 0xbd:
	CPU = "arrowlake-s";
	*Type = INTEL_COREI7;
	*Subtype = INTEL_COREI7_ARROWLAKE_S;
	break;

	// Pantherlake:
	case 0xcc:
	CPU = "pantherlake";
	*Type = INTEL_COREI7;
	*Subtype = INTEL_COREI7_PANTHERLAKE;
	break;

	// Icelake Xeon:
	case 0x6a:
	case 0x6c:
	CPU = "icelake-server";
	*Type = INTEL_COREI7;
	*Subtype = INTEL_COREI7_ICELAKE_SERVER;
	break;

	// Emerald Rapids:
	case 0xcf:
	// Sapphire Rapids:
	case 0x8f:
	CPU = "sapphirerapids";
	*Type = INTEL_COREI7;
	*Subtype = INTEL_COREI7_SAPPHIRERAPIDS;
	break;

	// Granite Rapids:
	case 0xad:
	CPU = "graniterapids";
	*Type = INTEL_COREI7;
	*Subtype = INTEL_COREI7_GRANITERAPIDS;
	break;

	// Granite Rapids D:
	case 0xae:
	CPU = "graniterapids-d";
	*Type = INTEL_COREI7;
	*Subtype = INTEL_COREI7_GRANITERAPIDS_D;
	break;

	case 0x1c: // Most 45 nm Intel Atom processors
	case 0x26: // 45 nm Atom Lincroft
	case 0x27: // 32 nm Atom Medfield
	case 0x35: // 32 nm Atom Midview
	case 0x36: // 32 nm Atom Midview
	CPU = "bonnell";
	*Type = INTEL_BONNELL;
	break;

	// Atom Silvermont codes from the Intel software optimization guide.
	case 0x37:
	case 0x4a:
	case 0x4d:
	case 0x5a:
	case 0x5d:
	case 0x4c: // really airmont
	CPU = "silvermont";
	*Type = INTEL_SILVERMONT;
	break;
	// Goldmont:
	case 0x5c: // Apollo Lake
	case 0x5f: // Denverton
	CPU = "goldmont";
	*Type = INTEL_GOLDMONT;
	break; // "goldmont"
	case 0x7a:
	CPU = "goldmont-plus";
	*Type = INTEL_GOLDMONT_PLUS;
	break;
	case 0x86:
	case 0x8a: // Lakefield
	case 0x96: // Elkhart Lake
	case 0x9c: // Jasper Lake
	CPU = "tremont";
	*Type = INTEL_TREMONT;
	break;

	// Sierraforest:
	case 0xaf:
	CPU = "sierraforest";
	*Type = INTEL_SIERRAFOREST;
	break;

	// Grandridge:
	case 0xb6:
	CPU = "grandridge";
	*Type = INTEL_GRANDRIDGE;
	break;

	// Clearwaterforest:
	case 0xdd:
	CPU = "clearwaterforest";
	*Type = INTEL_COREI7;
	*Subtype = INTEL_CLEARWATERFOREST;
	break;

	case 0x57:
	CPU = "knl";
	*Type = INTEL_KNL;
	break;

	case 0x85:
	CPU = "knm";
	*Type = INTEL_KNM;
	break;

	default: // Unknown family 6 CPU.
	break;
	}
	break;
	default:
	break; // Unknown.
	}

	return CPU;
	}

	static const char *getAMDProcessorTypeAndSubtype(unsigned Family,
	unsigned Model,
	const unsigned *Features,
	unsigned *Type,
	unsigned *Subtype) {
	const char *CPU = 0;

	switch (Family) {
	case 4:
	CPU = "i486";
	break;
	case 5:
	CPU = "pentium";
	switch (Model) {
	case 6:
	case 7:
	CPU = "k6";
	break;
	case 8:
	CPU = "k6-2";
	break;
	case 9:
	case 13:
	CPU = "k6-3";
	break;
	case 10:
	CPU = "geode";
	break;
	}
	break;
	case 6:
	if (testFeature(FEATURE_SSE)) {
	CPU = "athlon-xp";
	break;
	}
	CPU = "athlon";
	break;
	case 15:
	if (testFeature(FEATURE_SSE3)) {
	CPU = "k8-sse3";
	break;
	}
	CPU = "k8";
	break;
	case 16:
	CPU = "amdfam10";
	*Type = AMDFAM10H; // "amdfam10"
	switch (Model) {
	case 2:
	*Subtype = AMDFAM10H_BARCELONA;
	break;
	case 4:
	*Subtype = AMDFAM10H_SHANGHAI;
	break;
	case 8:
	*Subtype = AMDFAM10H_ISTANBUL;
	break;
	}
	break;
	case 20:
	CPU = "btver1";
	*Type = AMD_BTVER1;
	break;
	case 21:
	CPU = "bdver1";
	*Type = AMDFAM15H;
	if (Model >= 0x60 && Model <= 0x7f) {
	CPU = "bdver4";
	*Subtype = AMDFAM15H_BDVER4;
	break; // 60h-7Fh: Excavator
	}
	if (Model >= 0x30 && Model <= 0x3f) {
	CPU = "bdver3";
	*Subtype = AMDFAM15H_BDVER3;
	break; // 30h-3Fh: Steamroller
	}
	if ((Model >= 0x10 && Model <= 0x1f) \|\| Model == 0x02) {
	CPU = "bdver2";
	*Subtype = AMDFAM15H_BDVER2;
	break; // 02h, 10h-1Fh: Piledriver
	}
	if (Model <= 0x0f) {
	*Subtype = AMDFAM15H_BDVER1;
	break; // 00h-0Fh: Bulldozer
	}
	break;
	case 22:
	CPU = "btver2";
	*Type = AMD_BTVER2;
	break;
	case 23:
	CPU = "znver1";
	*Type = AMDFAM17H;
	if ((Model >= 0x30 && Model <= 0x3f) \|\| (Model == 0x47) \|\|
	(Model >= 0x60 && Model <= 0x67) \|\| (Model >= 0x68 && Model <= 0x6f) \|\|
	(Model >= 0x70 && Model <= 0x7f) \|\| (Model >= 0x84 && Model <= 0x87) \|\|
	(Model >= 0x90 && Model <= 0x97) \|\| (Model >= 0x98 && Model <= 0x9f) \|\|
	(Model >= 0xa0 && Model <= 0xaf)) {
	// Family 17h Models 30h-3Fh (Starship) Zen 2
	// Family 17h Models 47h (Cardinal) Zen 2
	// Family 17h Models 60h-67h (Renoir) Zen 2
	// Family 17h Models 68h-6Fh (Lucienne) Zen 2
	// Family 17h Models 70h-7Fh (Matisse) Zen 2
	// Family 17h Models 84h-87h (ProjectX) Zen 2
	// Family 17h Models 90h-97h (VanGogh) Zen 2
	// Family 17h Models 98h-9Fh (Mero) Zen 2
	// Family 17h Models A0h-AFh (Mendocino) Zen 2
	CPU = "znver2";
	*Subtype = AMDFAM17H_ZNVER2;
	break;
	}
	if ((Model >= 0x10 && Model <= 0x1f) \|\| (Model >= 0x20 && Model <= 0x2f)) {
	// Family 17h Models 10h-1Fh (Raven1) Zen
	// Family 17h Models 10h-1Fh (Picasso) Zen+
	// Family 17h Models 20h-2Fh (Raven2 x86) Zen
	*Subtype = AMDFAM17H_ZNVER1;
	break;
	}
	break;
	case 25:
	CPU = "znver3";
	*Type = AMDFAM19H;
	if (Model <= 0x0f \|\| (Model >= 0x20 && Model <= 0x2f) \|\|
	(Model >= 0x30 && Model <= 0x3f) \|\| (Model >= 0x40 && Model <= 0x4f) \|\|
	(Model >= 0x50 && Model <= 0x5f)) {
	// Family 19h Models 00h-0Fh (Genesis, Chagall) Zen 3
	// Family 19h Models 20h-2Fh (Vermeer) Zen 3
	// Family 19h Models 30h-3Fh (Badami) Zen 3
	// Family 19h Models 40h-4Fh (Rembrandt) Zen 3+
	// Family 19h Models 50h-5Fh (Cezanne) Zen 3
	*Subtype = AMDFAM19H_ZNVER3;
	break;
	}
	if ((Model >= 0x10 && Model <= 0x1f) \|\| (Model >= 0x60 && Model <= 0x6f) \|\|
	(Model >= 0x70 && Model <= 0x77) \|\| (Model >= 0x78 && Model <= 0x7f) \|\|
	(Model >= 0xa0 && Model <= 0xaf)) {
	// Family 19h Models 10h-1Fh (Stones; Storm Peak) Zen 4
	// Family 19h Models 60h-6Fh (Raphael) Zen 4
	// Family 19h Models 70h-77h (Phoenix, Hawkpoint1) Zen 4
	// Family 19h Models 78h-7Fh (Phoenix 2, Hawkpoint2) Zen 4
	// Family 19h Models A0h-AFh (Stones-Dense) Zen 4
	CPU = "znver4";
	*Subtype = AMDFAM19H_ZNVER4;
	break; // "znver4"
	}
	break; // family 19h
	+ case 26:
	+ CPU = "znver5";
	+ *Type = AMDFAM1AH;
	+ if (Model <= 0x77) {
	+ // Models 00h-0Fh (Breithorn).
	+ // Models 10h-1Fh (Breithorn-Dense).
	+ // Models 20h-2Fh (Strix 1).
	+ // Models 30h-37h (Strix 2).
	+ // Models 38h-3Fh (Strix 3).
	+ // Models 40h-4Fh (Granite Ridge).
	+ // Models 50h-5Fh (Weisshorn).
	+ // Models 60h-6Fh (Krackan1).
	+ // Models 70h-77h (Sarlak).
	+ CPU = "znver5";
	+ *Subtype = AMDFAM1AH_ZNVER5;
	+ break; // "znver5"
	+ }
	+ break;
	default:
	break; // Unknown AMD CPU.
	}

	return CPU;
	}

	#undef testFeature

	static void getAvailableFeatures(unsigned ECX, unsigned EDX, unsigned MaxLeaf,
	unsigned *Features) {
	unsigned EAX = 0, EBX = 0;

	#define hasFeature(F) ((Features[F / 32] >> (F % 32)) & 1)
	#define setFeature(F) Features[F / 32] \|= 1U << (F % 32)

	if ((EDX >> 15) & 1)
	setFeature(FEATURE_CMOV);
	if ((EDX >> 23) & 1)
	setFeature(FEATURE_MMX);
	if ((EDX >> 25) & 1)
	setFeature(FEATURE_SSE);
	if ((EDX >> 26) & 1)
	setFeature(FEATURE_SSE2);

	if ((ECX >> 0) & 1)
	setFeature(FEATURE_SSE3);
	if ((ECX >> 1) & 1)
	setFeature(FEATURE_PCLMUL);
	if ((ECX >> 9) & 1)
	setFeature(FEATURE_SSSE3);
	if ((ECX >> 12) & 1)
	setFeature(FEATURE_FMA);
	if ((ECX >> 13) & 1)
	setFeature(FEATURE_CMPXCHG16B);
	if ((ECX >> 19) & 1)
	setFeature(FEATURE_SSE4_1);
	if ((ECX >> 20) & 1)
	setFeature(FEATURE_SSE4_2);
	if ((ECX >> 22) & 1)
	setFeature(FEATURE_MOVBE);
	if ((ECX >> 23) & 1)
	setFeature(FEATURE_POPCNT);
	if ((ECX >> 25) & 1)
	setFeature(FEATURE_AES);
	if ((ECX >> 29) & 1)
	setFeature(FEATURE_F16C);
	if ((ECX >> 30) & 1)
	setFeature(FEATURE_RDRND);

	// If CPUID indicates support for XSAVE, XRESTORE and AVX, and XGETBV
	// indicates that the AVX registers will be saved and restored on context
	// switch, then we have full AVX support.
	const unsigned AVXBits = (1 << 27) \| (1 << 28);
	bool HasAVXSave = ((ECX & AVXBits) == AVXBits) && !getX86XCR0(&EAX, &EDX) &&
	((EAX & 0x6) == 0x6);
	#if defined(__APPLE__)
	// Darwin lazily saves the AVX512 context on first use: trust that the OS will
	// save the AVX512 context if we use AVX512 instructions, even the bit is not
	// set right now.
	bool HasAVX512Save = true;
	#else
	// AVX512 requires additional context to be saved by the OS.
	bool HasAVX512Save = HasAVXSave && ((EAX & 0xe0) == 0xe0);
	#endif
	// AMX requires additional context to be saved by the OS.
	const unsigned AMXBits = (1 << 17) \| (1 << 18);
	bool HasXSave = ((ECX >> 27) & 1) && !getX86XCR0(&EAX, &EDX);
	bool HasAMXSave = HasXSave && ((EAX & AMXBits) == AMXBits);

	if (HasAVXSave)
	setFeature(FEATURE_AVX);

	if (((ECX >> 26) & 1) && HasAVXSave)
	setFeature(FEATURE_XSAVE);

	bool HasLeaf7 =
	MaxLeaf >= 0x7 && !getX86CpuIDAndInfoEx(0x7, 0x0, &EAX, &EBX, &ECX, &EDX);

	if (HasLeaf7 && ((EBX >> 0) & 1))
	setFeature(FEATURE_FSGSBASE);
	if (HasLeaf7 && ((EBX >> 2) & 1))
	setFeature(FEATURE_SGX);
	if (HasLeaf7 && ((EBX >> 3) & 1))
	setFeature(FEATURE_BMI);
	if (HasLeaf7 && ((EBX >> 5) & 1) && HasAVXSave)
	setFeature(FEATURE_AVX2);
	if (HasLeaf7 && ((EBX >> 8) & 1))
	setFeature(FEATURE_BMI2);
	if (HasLeaf7 && ((EBX >> 11) & 1))
	setFeature(FEATURE_RTM);
	if (HasLeaf7 && ((EBX >> 16) & 1) && HasAVX512Save)
	setFeature(FEATURE_AVX512F);
	if (HasLeaf7 && ((EBX >> 17) & 1) && HasAVX512Save)
	setFeature(FEATURE_AVX512DQ);
	if (HasLeaf7 && ((EBX >> 18) & 1))
	setFeature(FEATURE_RDSEED);
	if (HasLeaf7 && ((EBX >> 19) & 1))
	setFeature(FEATURE_ADX);
	if (HasLeaf7 && ((EBX >> 21) & 1) && HasAVX512Save)
	setFeature(FEATURE_AVX512IFMA);
	if (HasLeaf7 && ((EBX >> 24) & 1))
	setFeature(FEATURE_CLWB);
	if (HasLeaf7 && ((EBX >> 26) & 1) && HasAVX512Save)
	setFeature(FEATURE_AVX512PF);
	if (HasLeaf7 && ((EBX >> 27) & 1) && HasAVX512Save)
	setFeature(FEATURE_AVX512ER);
	if (HasLeaf7 && ((EBX >> 28) & 1) && HasAVX512Save)
	setFeature(FEATURE_AVX512CD);
	if (HasLeaf7 && ((EBX >> 29) & 1))
	setFeature(FEATURE_SHA);
	if (HasLeaf7 && ((EBX >> 30) & 1) && HasAVX512Save)
	setFeature(FEATURE_AVX512BW);
	if (HasLeaf7 && ((EBX >> 31) & 1) && HasAVX512Save)
	setFeature(FEATURE_AVX512VL);

	if (HasLeaf7 && ((ECX >> 0) & 1))
	setFeature(FEATURE_PREFETCHWT1);
	if (HasLeaf7 && ((ECX >> 1) & 1) && HasAVX512Save)
	setFeature(FEATURE_AVX512VBMI);
	if (HasLeaf7 && ((ECX >> 4) & 1))
	setFeature(FEATURE_PKU);
	if (HasLeaf7 && ((ECX >> 5) & 1))
	setFeature(FEATURE_WAITPKG);
	if (HasLeaf7 && ((ECX >> 6) & 1) && HasAVX512Save)
	setFeature(FEATURE_AVX512VBMI2);
	if (HasLeaf7 && ((ECX >> 7) & 1))
	setFeature(FEATURE_SHSTK);
	if (HasLeaf7 && ((ECX >> 8) & 1))
	setFeature(FEATURE_GFNI);
	if (HasLeaf7 && ((ECX >> 9) & 1) && HasAVXSave)
	setFeature(FEATURE_VAES);
	if (HasLeaf7 && ((ECX >> 10) & 1) && HasAVXSave)
	setFeature(FEATURE_VPCLMULQDQ);
	if (HasLeaf7 && ((ECX >> 11) & 1) && HasAVX512Save)
	setFeature(FEATURE_AVX512VNNI);
	if (HasLeaf7 && ((ECX >> 12) & 1) && HasAVX512Save)
	setFeature(FEATURE_AVX512BITALG);
	if (HasLeaf7 && ((ECX >> 14) & 1) && HasAVX512Save)
	setFeature(FEATURE_AVX512VPOPCNTDQ);
	if (HasLeaf7 && ((ECX >> 22) & 1))
	setFeature(FEATURE_RDPID);
	if (HasLeaf7 && ((ECX >> 23) & 1))
	setFeature(FEATURE_KL);
	if (HasLeaf7 && ((ECX >> 25) & 1))
	setFeature(FEATURE_CLDEMOTE);
	if (HasLeaf7 && ((ECX >> 27) & 1))
	setFeature(FEATURE_MOVDIRI);
	if (HasLeaf7 && ((ECX >> 28) & 1))
	setFeature(FEATURE_MOVDIR64B);
	if (HasLeaf7 && ((ECX >> 29) & 1))
	setFeature(FEATURE_ENQCMD);

	if (HasLeaf7 && ((EDX >> 2) & 1) && HasAVX512Save)
	setFeature(FEATURE_AVX5124VNNIW);
	if (HasLeaf7 && ((EDX >> 3) & 1) && HasAVX512Save)
	setFeature(FEATURE_AVX5124FMAPS);
	if (HasLeaf7 && ((EDX >> 5) & 1))
	setFeature(FEATURE_UINTR);
	if (HasLeaf7 && ((EDX >> 8) & 1) && HasAVX512Save)
	setFeature(FEATURE_AVX512VP2INTERSECT);
	if (HasLeaf7 && ((EDX >> 14) & 1))
	setFeature(FEATURE_SERIALIZE);
	if (HasLeaf7 && ((EDX >> 16) & 1))
	setFeature(FEATURE_TSXLDTRK);
	if (HasLeaf7 && ((EDX >> 18) & 1))
	setFeature(FEATURE_PCONFIG);
	if (HasLeaf7 && ((EDX >> 22) & 1) && HasAMXSave)
	setFeature(FEATURE_AMX_BF16);
	if (HasLeaf7 && ((EDX >> 23) & 1) && HasAVX512Save)
	setFeature(FEATURE_AVX512FP16);
	if (HasLeaf7 && ((EDX >> 24) & 1) && HasAMXSave)
	setFeature(FEATURE_AMX_TILE);
	if (HasLeaf7 && ((EDX >> 25) & 1) && HasAMXSave)
	setFeature(FEATURE_AMX_INT8);

	// EAX from subleaf 0 is the maximum subleaf supported. Some CPUs don't
	// return all 0s for invalid subleaves so check the limit.
	bool HasLeaf7Subleaf1 =
	HasLeaf7 && EAX >= 1 &&
	!getX86CpuIDAndInfoEx(0x7, 0x1, &EAX, &EBX, &ECX, &EDX);
	if (HasLeaf7Subleaf1 && ((EAX >> 0) & 1))
	setFeature(FEATURE_SHA512);
	if (HasLeaf7Subleaf1 && ((EAX >> 1) & 1))
	setFeature(FEATURE_SM3);
	if (HasLeaf7Subleaf1 && ((EAX >> 2) & 1))
	setFeature(FEATURE_SM4);
	if (HasLeaf7Subleaf1 && ((EAX >> 3) & 1))
	setFeature(FEATURE_RAOINT);
	if (HasLeaf7Subleaf1 && ((EAX >> 4) & 1) && HasAVXSave)
	setFeature(FEATURE_AVXVNNI);
	if (HasLeaf7Subleaf1 && ((EAX >> 5) & 1) && HasAVX512Save)
	setFeature(FEATURE_AVX512BF16);
	if (HasLeaf7Subleaf1 && ((EAX >> 7) & 1))
	setFeature(FEATURE_CMPCCXADD);
	if (HasLeaf7Subleaf1 && ((EAX >> 21) & 1) && HasAMXSave)
	setFeature(FEATURE_AMX_FP16);
	if (HasLeaf7Subleaf1 && ((EAX >> 22) & 1))
	setFeature(FEATURE_HRESET);
	if (HasLeaf7Subleaf1 && ((EAX >> 23) & 1) && HasAVXSave)
	setFeature(FEATURE_AVXIFMA);

	if (HasLeaf7Subleaf1 && ((EDX >> 4) & 1) && HasAVXSave)
	setFeature(FEATURE_AVXVNNIINT8);
	if (HasLeaf7Subleaf1 && ((EDX >> 5) & 1) && HasAVXSave)
	setFeature(FEATURE_AVXNECONVERT);
	if (HasLeaf7Subleaf1 && ((EDX >> 8) & 1) && HasAMXSave)
	setFeature(FEATURE_AMX_COMPLEX);
	if (HasLeaf7Subleaf1 && ((EDX >> 10) & 1) && HasAVXSave)
	setFeature(FEATURE_AVXVNNIINT16);
	if (HasLeaf7Subleaf1 && ((EDX >> 14) & 1))
	setFeature(FEATURE_PREFETCHI);
	if (HasLeaf7Subleaf1 && ((EDX >> 15) & 1))
	setFeature(FEATURE_USERMSR);
	if (HasLeaf7Subleaf1 && ((EDX >> 19) & 1))
	setFeature(FEATURE_AVX10_1_256);
	if (HasLeaf7Subleaf1 && ((EDX >> 21) & 1))
	setFeature(FEATURE_APXF);

	unsigned MaxLevel;
	getX86CpuIDAndInfo(0, &MaxLevel, &EBX, &ECX, &EDX);
	bool HasLeafD = MaxLevel >= 0xd &&
	!getX86CpuIDAndInfoEx(0xd, 0x1, &EAX, &EBX, &ECX, &EDX);
	if (HasLeafD && ((EAX >> 0) & 1) && HasAVXSave)
	setFeature(FEATURE_XSAVEOPT);
	if (HasLeafD && ((EAX >> 1) & 1) && HasAVXSave)
	setFeature(FEATURE_XSAVEC);
	if (HasLeafD && ((EAX >> 3) & 1) && HasAVXSave)
	setFeature(FEATURE_XSAVES);

	bool HasLeaf24 =
	MaxLevel >= 0x24 && !getX86CpuIDAndInfo(0x24, &EAX, &EBX, &ECX, &EDX);
	if (HasLeaf7Subleaf1 && ((EDX >> 19) & 1) && HasLeaf24 && ((EBX >> 18) & 1))
	setFeature(FEATURE_AVX10_1_512);

	unsigned MaxExtLevel;
	getX86CpuIDAndInfo(0x80000000, &MaxExtLevel, &EBX, &ECX, &EDX);

	bool HasExtLeaf1 = MaxExtLevel >= 0x80000001 &&
	!getX86CpuIDAndInfo(0x80000001, &EAX, &EBX, &ECX, &EDX);
	if (HasExtLeaf1) {
	if (ECX & 1)
	setFeature(FEATURE_LAHF_LM);
	if ((ECX >> 5) & 1)
	setFeature(FEATURE_LZCNT);
	if (((ECX >> 6) & 1))
	setFeature(FEATURE_SSE4_A);
	if (((ECX >> 8) & 1))
	setFeature(FEATURE_PRFCHW);
	if (((ECX >> 11) & 1))
	setFeature(FEATURE_XOP);
	if (((ECX >> 15) & 1))
	setFeature(FEATURE_LWP);
	if (((ECX >> 16) & 1))
	setFeature(FEATURE_FMA4);
	if (((ECX >> 21) & 1))
	setFeature(FEATURE_TBM);
	if (((ECX >> 29) & 1))
	setFeature(FEATURE_MWAITX);

	if (((EDX >> 29) & 1))
	setFeature(FEATURE_LM);
	}

	bool HasExtLeaf8 = MaxExtLevel >= 0x80000008 &&
	!getX86CpuIDAndInfo(0x80000008, &EAX, &EBX, &ECX, &EDX);
	if (HasExtLeaf8 && ((EBX >> 0) & 1))
	setFeature(FEATURE_CLZERO);
	if (HasExtLeaf8 && ((EBX >> 9) & 1))
	setFeature(FEATURE_WBNOINVD);

	bool HasLeaf14 = MaxLevel >= 0x14 &&
	!getX86CpuIDAndInfoEx(0x14, 0x0, &EAX, &EBX, &ECX, &EDX);
	if (HasLeaf14 && ((EBX >> 4) & 1))
	setFeature(FEATURE_PTWRITE);

	bool HasLeaf19 =
	MaxLevel >= 0x19 && !getX86CpuIDAndInfo(0x19, &EAX, &EBX, &ECX, &EDX);
	if (HasLeaf7 && HasLeaf19 && ((EBX >> 2) & 1))
	setFeature(FEATURE_WIDEKL);

	if (hasFeature(FEATURE_LM) && hasFeature(FEATURE_SSE2)) {
	setFeature(FEATURE_X86_64_BASELINE);
	if (hasFeature(FEATURE_CMPXCHG16B) && hasFeature(FEATURE_POPCNT) &&
	hasFeature(FEATURE_LAHF_LM) && hasFeature(FEATURE_SSE4_2)) {
	setFeature(FEATURE_X86_64_V2);
	if (hasFeature(FEATURE_AVX2) && hasFeature(FEATURE_BMI) &&
	hasFeature(FEATURE_BMI2) && hasFeature(FEATURE_F16C) &&
	hasFeature(FEATURE_FMA) && hasFeature(FEATURE_LZCNT) &&
	hasFeature(FEATURE_MOVBE)) {
	setFeature(FEATURE_X86_64_V3);
	if (hasFeature(FEATURE_AVX512BW) && hasFeature(FEATURE_AVX512CD) &&
	hasFeature(FEATURE_AVX512DQ) && hasFeature(FEATURE_AVX512VL))
	setFeature(FEATURE_X86_64_V4);
	}
	}
	}

	#undef hasFeature
	#undef setFeature
	}

	#ifndef _WIN32
	__attribute__((visibility("hidden")))
	#endif
	int __cpu_indicator_init(void) CONSTRUCTOR_ATTRIBUTE;

	#ifndef _WIN32
	__attribute__((visibility("hidden")))
	#endif
	struct __processor_model {
	unsigned int __cpu_vendor;
	unsigned int __cpu_type;
	unsigned int __cpu_subtype;
	unsigned int __cpu_features[1];
	} __cpu_model = {0, 0, 0, {0}};

	#ifndef _WIN32
	__attribute__((visibility("hidden")))
	#endif
	unsigned __cpu_features2[(CPU_FEATURE_MAX - 1) / 32];

	// A constructor function that is sets __cpu_model and __cpu_features2 with
	// the right values. This needs to run only once. This constructor is
	// given the highest priority and it should run before constructors without
	// the priority set. However, it still runs after ifunc initializers and
	// needs to be called explicitly there.

	int CONSTRUCTOR_ATTRIBUTE __cpu_indicator_init(void) {
	unsigned EAX, EBX, ECX, EDX;
	unsigned MaxLeaf = 5;
	unsigned Vendor;
	unsigned Model, Family;
	unsigned Features[(CPU_FEATURE_MAX + 31) / 32] = {0};
	static_assert(sizeof(Features) / sizeof(Features[0]) == 4, "");
	static_assert(sizeof(__cpu_features2) / sizeof(__cpu_features2[0]) == 3, "");

	// This function needs to run just once.
	if (__cpu_model.__cpu_vendor)
	return 0;

	if (!isCpuIdSupported() \|\|
	getX86CpuIDAndInfo(0, &MaxLeaf, &Vendor, &ECX, &EDX) \|\| MaxLeaf < 1) {
	__cpu_model.__cpu_vendor = VENDOR_OTHER;
	return -1;
	}

	getX86CpuIDAndInfo(1, &EAX, &EBX, &ECX, &EDX);
	detectX86FamilyModel(EAX, &Family, &Model);

	// Find available features.
	getAvailableFeatures(ECX, EDX, MaxLeaf, &Features[0]);

	__cpu_model.__cpu_features[0] = Features[0];
	__cpu_features2[0] = Features[1];
	__cpu_features2[1] = Features[2];
	__cpu_features2[2] = Features[3];

	if (Vendor == SIG_INTEL) {
	// Get CPU type.
	getIntelProcessorTypeAndSubtype(Family, Model, &Features[0],
	&(__cpu_model.__cpu_type),
	&(__cpu_model.__cpu_subtype));
	__cpu_model.__cpu_vendor = VENDOR_INTEL;
	} else if (Vendor == SIG_AMD) {
	// Get CPU type.
	getAMDProcessorTypeAndSubtype(Family, Model, &Features[0],
	&(__cpu_model.__cpu_type),
	&(__cpu_model.__cpu_subtype));
	__cpu_model.__cpu_vendor = VENDOR_AMD;
	} else
	__cpu_model.__cpu_vendor = VENDOR_OTHER;

	assert(__cpu_model.__cpu_vendor < VENDOR_MAX);
	assert(__cpu_model.__cpu_type < CPU_TYPE_MAX);
	assert(__cpu_model.__cpu_subtype < CPU_SUBTYPE_MAX);

	return 0;
	}
	#endif // defined(__GNUC__) \|\| defined(__clang__) \|\| defined(_MSC_VER)
	diff --git a/contrib/llvm-project/compiler-rt/lib/builtins/divtc3.c b/contrib/llvm-project/compiler-rt/lib/builtins/divtc3.c
	index 099de5802daf..c393de815337 100644
	--- a/contrib/llvm-project/compiler-rt/lib/builtins/divtc3.c
	+++ b/contrib/llvm-project/compiler-rt/lib/builtins/divtc3.c
	@@ -1,56 +1,56 @@
	//===-- divtc3.c - Implement __divtc3 -------------------------------------===//
	//
	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
	// See https://llvm.org/LICENSE.txt for license information.
	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
	//
	//===----------------------------------------------------------------------===//
	//
	// This file implements __divtc3 for the compiler_rt library.
	//
	//===----------------------------------------------------------------------===//

	#define QUAD_PRECISION
	#include "fp_lib.h"

	-#if defined(CRT_HAS_F128)
	+#if defined(CRT_HAS_128BIT) && defined(CRT_HAS_F128)

	// Returns: the quotient of (a + ib) / (c + id)

	COMPILER_RT_ABI Qcomplex __divtc3(fp_t __a, fp_t __b, fp_t __c, fp_t __d) {
	int __ilogbw = 0;
	fp_t __logbw = __compiler_rt_logbtf(
	__compiler_rt_fmaxtf(crt_fabstf(__c), crt_fabstf(__d)));
	if (crt_isfinite(__logbw)) {
	__ilogbw = (int)__logbw;
	__c = __compiler_rt_scalbntf(__c, -__ilogbw);
	__d = __compiler_rt_scalbntf(__d, -__ilogbw);
	}
	fp_t __denom = __c * __c + __d * __d;
	Qcomplex z;
	COMPLEXTF_REAL(z) =
	__compiler_rt_scalbntf((__a * __c + __b * __d) / __denom, -__ilogbw);
	COMPLEXTF_IMAGINARY(z) =
	__compiler_rt_scalbntf((__b * __c - __a * __d) / __denom, -__ilogbw);
	if (crt_isnan(COMPLEXTF_REAL(z)) && crt_isnan(COMPLEXTF_IMAGINARY(z))) {
	if ((__denom == 0.0) && (!crt_isnan(__a) \|\| !crt_isnan(__b))) {
	COMPLEXTF_REAL(z) = crt_copysigntf(CRT_INFINITY, __c) * __a;
	COMPLEXTF_IMAGINARY(z) = crt_copysigntf(CRT_INFINITY, __c) * __b;
	} else if ((crt_isinf(__a) \|\| crt_isinf(__b)) && crt_isfinite(__c) &&
	crt_isfinite(__d)) {
	__a = crt_copysigntf(crt_isinf(__a) ? (fp_t)1.0 : (fp_t)0.0, __a);
	__b = crt_copysigntf(crt_isinf(__b) ? (fp_t)1.0 : (fp_t)0.0, __b);
	COMPLEXTF_REAL(z) = CRT_INFINITY * (__a * __c + __b * __d);
	COMPLEXTF_IMAGINARY(z) = CRT_INFINITY * (__b * __c - __a * __d);
	} else if (crt_isinf(__logbw) && __logbw > 0.0 && crt_isfinite(__a) &&
	crt_isfinite(__b)) {
	__c = crt_copysigntf(crt_isinf(__c) ? (fp_t)1.0 : (fp_t)0.0, __c);
	__d = crt_copysigntf(crt_isinf(__d) ? (fp_t)1.0 : (fp_t)0.0, __d);
	COMPLEXTF_REAL(z) = 0.0 * (__a * __c + __b * __d);
	COMPLEXTF_IMAGINARY(z) = 0.0 * (__b * __c - __a * __d);
	}
	}
	return z;
	}

	#endif
	diff --git a/contrib/llvm-project/compiler-rt/lib/builtins/multc3.c b/contrib/llvm-project/compiler-rt/lib/builtins/multc3.c
	index 61a3f45e4727..a89832f0e883 100644
	--- a/contrib/llvm-project/compiler-rt/lib/builtins/multc3.c
	+++ b/contrib/llvm-project/compiler-rt/lib/builtins/multc3.c
	@@ -1,70 +1,70 @@
	//===-- multc3.c - Implement __multc3 -------------------------------------===//
	//
	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
	// See https://llvm.org/LICENSE.txt for license information.
	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
	//
	//===----------------------------------------------------------------------===//
	//
	// This file implements __multc3 for the compiler_rt library.
	//
	//===----------------------------------------------------------------------===//

	#define QUAD_PRECISION
	#include "fp_lib.h"
	#include "int_lib.h"
	#include "int_math.h"

	-#if defined(CRT_HAS_F128)
	+#if defined(CRT_HAS_128BIT) && defined(CRT_HAS_F128)

	// Returns: the product of a + ib and c + id

	COMPILER_RT_ABI Qcomplex __multc3(fp_t a, fp_t b, fp_t c, fp_t d) {
	fp_t ac = a * c;
	fp_t bd = b * d;
	fp_t ad = a * d;
	fp_t bc = b * c;
	Qcomplex z;
	COMPLEXTF_REAL(z) = ac - bd;
	COMPLEXTF_IMAGINARY(z) = ad + bc;
	if (crt_isnan(COMPLEXTF_REAL(z)) && crt_isnan(COMPLEXTF_IMAGINARY(z))) {
	int recalc = 0;
	if (crt_isinf(a) \|\| crt_isinf(b)) {
	a = crt_copysigntf(crt_isinf(a) ? 1 : 0, a);
	b = crt_copysigntf(crt_isinf(b) ? 1 : 0, b);
	if (crt_isnan(c))
	c = crt_copysigntf(0, c);
	if (crt_isnan(d))
	d = crt_copysigntf(0, d);
	recalc = 1;
	}
	if (crt_isinf(c) \|\| crt_isinf(d)) {
	c = crt_copysigntf(crt_isinf(c) ? 1 : 0, c);
	d = crt_copysigntf(crt_isinf(d) ? 1 : 0, d);
	if (crt_isnan(a))
	a = crt_copysigntf(0, a);
	if (crt_isnan(b))
	b = crt_copysigntf(0, b);
	recalc = 1;
	}
	if (!recalc &&
	(crt_isinf(ac) \|\| crt_isinf(bd) \|\| crt_isinf(ad) \|\| crt_isinf(bc))) {
	if (crt_isnan(a))
	a = crt_copysigntf(0, a);
	if (crt_isnan(b))
	b = crt_copysigntf(0, b);
	if (crt_isnan(c))
	c = crt_copysigntf(0, c);
	if (crt_isnan(d))
	d = crt_copysigntf(0, d);
	recalc = 1;
	}
	if (recalc) {
	COMPLEXTF_REAL(z) = CRT_INFINITY * (a * c - b * d);
	COMPLEXTF_IMAGINARY(z) = CRT_INFINITY * (a * d + b * c);
	}
	}
	return z;
	}

	#endif
	diff --git a/contrib/llvm-project/libcxx/include/chrono b/contrib/llvm-project/libcxx/include/chrono
	index 990c415ec2e9..7bec5e5a26ef 100644
	--- a/contrib/llvm-project/libcxx/include/chrono
	+++ b/contrib/llvm-project/libcxx/include/chrono
	@@ -1,1022 +1,1022 @@
	// -- C++ --
	//===----------------------------------------------------------------------===//
	//
	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
	// See https://llvm.org/LICENSE.txt for license information.
	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
	//
	//===----------------------------------------------------------------------===//

	#ifndef _LIBCPP_CHRONO
	#define _LIBCPP_CHRONO

	// clang-format off

	/*
	chrono synopsis

	#include <compare> // C++20

	namespace std
	{
	namespace chrono
	{

	template <class ToDuration, class Rep, class Period>
	constexpr
	ToDuration
	duration_cast(const duration<Rep, Period>& fd);

	template <class Rep> struct treat_as_floating_point : is_floating_point<Rep> {};

	template <class Rep> inline constexpr bool treat_as_floating_point_v
	= treat_as_floating_point<Rep>::value; // C++17

	template <class Rep>
	struct duration_values
	{
	public:
	static constexpr Rep zero(); // noexcept in C++20
	static constexpr Rep max(); // noexcept in C++20
	static constexpr Rep min(); // noexcept in C++20
	};

	// duration

	template <class Rep, class Period = ratio<1>>
	class duration
	{
	static_assert(!__is_duration<Rep>::value, "A duration representation can not be a duration");
	static_assert(__is_ratio<Period>::value, "Second template parameter of duration must be a std::ratio");
	static_assert(Period::num > 0, "duration period must be positive");
	public:
	typedef Rep rep;
	typedef typename _Period::type period;

	constexpr duration() = default;
	template <class Rep2>
	constexpr explicit duration(const Rep2& r,
	typename enable_if
	<
	is_convertible<const Rep2&, rep>::value &&
	(treat_as_floating_point<rep>::value \|\|
	!treat_as_floating_point<rep>::value && !treat_as_floating_point<Rep2>::value)
	>::type* = 0);

	// conversions
	template <class Rep2, class Period2>
	constexpr duration(const duration<Rep2, Period2>& d,
	typename enable_if
	<
	treat_as_floating_point<rep>::value \|\|
	ratio_divide<Period2, period>::type::den == 1
	>::type* = 0);

	// observer

	constexpr rep count() const;

	// arithmetic

	constexpr common_type<duration>::type operator+() const;
	constexpr common_type<duration>::type operator-() const;
	constexpr duration& operator++(); // constexpr in C++17
	constexpr duration operator++(int); // constexpr in C++17
	constexpr duration& operator--(); // constexpr in C++17
	constexpr duration operator--(int); // constexpr in C++17

	constexpr duration& operator+=(const duration& d); // constexpr in C++17
	constexpr duration& operator-=(const duration& d); // constexpr in C++17

	duration& operator*=(const rep& rhs); // constexpr in C++17
	duration& operator/=(const rep& rhs); // constexpr in C++17
	duration& operator%=(const rep& rhs); // constexpr in C++17
	duration& operator%=(const duration& rhs); // constexpr in C++17

	// special values

	static constexpr duration zero(); // noexcept in C++20
	static constexpr duration min(); // noexcept in C++20
	static constexpr duration max(); // noexcept in C++20
	};

	typedef duration<long long, nano> nanoseconds;
	typedef duration<long long, micro> microseconds;
	typedef duration<long long, milli> milliseconds;
	typedef duration<long long > seconds;
	typedef duration< long, ratio< 60> > minutes;
	typedef duration< long, ratio<3600> > hours;

	template <class Clock, class Duration = typename Clock::duration>
	class time_point
	{
	public:
	typedef Clock clock;
	typedef Duration duration;
	typedef typename duration::rep rep;
	typedef typename duration::period period;
	private:
	duration d_; // exposition only

	public:
	time_point(); // has value "epoch" // constexpr in C++14
	explicit time_point(const duration& d); // same as time_point() + d // constexpr in C++14

	// conversions
	template <class Duration2>
	time_point(const time_point<clock, Duration2>& t); // constexpr in C++14

	// observer

	duration time_since_epoch() const; // constexpr in C++14

	// arithmetic

	time_point& operator+=(const duration& d); // constexpr in C++17
	time_point& operator-=(const duration& d); // constexpr in C++17

	// special values

	static constexpr time_point min(); // noexcept in C++20
	static constexpr time_point max(); // noexcept in C++20
	};

	} // chrono

	// common_type traits
	template <class Rep1, class Period1, class Rep2, class Period2>
	struct common_type<chrono::duration<Rep1, Period1>, chrono::duration<Rep2, Period2>>;

	template <class Clock, class Duration1, class Duration2>
	struct common_type<chrono::time_point<Clock, Duration1>, chrono::time_point<Clock, Duration2>>;

	namespace chrono {

	// duration arithmetic
	template <class Rep1, class Period1, class Rep2, class Period2>
	constexpr
	typename common_type<duration<Rep1, Period1>, duration<Rep2, Period2>>::type
	operator+(const duration<Rep1, Period1>& lhs, const duration<Rep2, Period2>& rhs);
	template <class Rep1, class Period1, class Rep2, class Period2>
	constexpr
	typename common_type<duration<Rep1, Period1>, duration<Rep2, Period2>>::type
	operator-(const duration<Rep1, Period1>& lhs, const duration<Rep2, Period2>& rhs);
	template <class Rep1, class Period, class Rep2>
	constexpr
	duration<typename common_type<Rep1, Rep2>::type, Period>
	operator*(const duration<Rep1, Period>& d, const Rep2& s);
	template <class Rep1, class Period, class Rep2>
	constexpr
	duration<typename common_type<Rep1, Rep2>::type, Period>
	operator*(const Rep1& s, const duration<Rep2, Period>& d);
	template <class Rep1, class Period, class Rep2>
	constexpr
	duration<typename common_type<Rep1, Rep2>::type, Period>
	operator/(const duration<Rep1, Period>& d, const Rep2& s);
	template <class Rep1, class Period1, class Rep2, class Period2>
	constexpr
	typename common_type<Rep1, Rep2>::type
	operator/(const duration<Rep1, Period1>& lhs, const duration<Rep2, Period2>& rhs);

	// duration comparisons
	template <class Rep1, class Period1, class Rep2, class Period2>
	constexpr
	bool operator==(const duration<Rep1, Period1>& lhs, const duration<Rep2, Period2>& rhs);
	template <class Rep1, class Period1, class Rep2, class Period2>
	constexpr
	bool operator!=(const duration<Rep1, Period1>& lhs, const duration<Rep2, Period2>& rhs); // removed in C++20
	template <class Rep1, class Period1, class Rep2, class Period2>
	constexpr
	bool operator< (const duration<Rep1, Period1>& lhs, const duration<Rep2, Period2>& rhs);
	template <class Rep1, class Period1, class Rep2, class Period2>
	constexpr
	bool operator<=(const duration<Rep1, Period1>& lhs, const duration<Rep2, Period2>& rhs);
	template <class Rep1, class Period1, class Rep2, class Period2>
	constexpr
	bool operator> (const duration<Rep1, Period1>& lhs, const duration<Rep2, Period2>& rhs);
	template <class Rep1, class Period1, class Rep2, class Period2>
	constexpr
	bool operator>=(const duration<Rep1, Period1>& lhs, const duration<Rep2, Period2>& rhs);
	template<class Rep1, class Period1, class Rep2, class Period2>
	requires three_way_comparable<typename CT::rep>
	constexpr auto operator<=>(const duration<Rep1, Period1>& lhs,
	const duration<Rep2, Period2>& rhs); // since C++20

	// duration_cast
	template <class ToDuration, class Rep, class Period>
	ToDuration duration_cast(const duration<Rep, Period>& d);

	template <class ToDuration, class Rep, class Period>
	constexpr ToDuration floor(const duration<Rep, Period>& d); // C++17
	template <class ToDuration, class Rep, class Period>
	constexpr ToDuration ceil(const duration<Rep, Period>& d); // C++17
	template <class ToDuration, class Rep, class Period>
	constexpr ToDuration round(const duration<Rep, Period>& d); // C++17

	// duration I/O
	template<class charT, class traits, class Rep, class Period> // C++20
	basic_ostream<charT, traits>&
	operator<<(basic_ostream<charT, traits>& os,
	const duration<Rep, Period>& d);

	// time_point arithmetic (all constexpr in C++14)
	template <class Clock, class Duration1, class Rep2, class Period2>
	time_point<Clock, typename common_type<Duration1, duration<Rep2, Period2>>::type>
	operator+(const time_point<Clock, Duration1>& lhs, const duration<Rep2, Period2>& rhs);
	template <class Rep1, class Period1, class Clock, class Duration2>
	time_point<Clock, typename common_type<duration<Rep1, Period1>, Duration2>::type>
	operator+(const duration<Rep1, Period1>& lhs, const time_point<Clock, Duration2>& rhs);
	template <class Clock, class Duration1, class Rep2, class Period2>
	time_point<Clock, typename common_type<Duration1, duration<Rep2, Period2>>::type>
	operator-(const time_point<Clock, Duration1>& lhs, const duration<Rep2, Period2>& rhs);
	template <class Clock, class Duration1, class Duration2>
	typename common_type<Duration1, Duration2>::type
	operator-(const time_point<Clock, Duration1>& lhs, const time_point<Clock, Duration2>& rhs);

	// time_point comparisons (all constexpr in C++14)
	template <class Clock, class Duration1, class Duration2>
	bool operator==(const time_point<Clock, Duration1>& lhs, const time_point<Clock, Duration2>& rhs);
	template <class Clock, class Duration1, class Duration2>
	bool operator!=(const time_point<Clock, Duration1>& lhs, const time_point<Clock, Duration2>& rhs); // removed in C++20
	template <class Clock, class Duration1, class Duration2>
	bool operator< (const time_point<Clock, Duration1>& lhs, const time_point<Clock, Duration2>& rhs);
	template <class Clock, class Duration1, class Duration2>
	bool operator<=(const time_point<Clock, Duration1>& lhs, const time_point<Clock, Duration2>& rhs);
	template <class Clock, class Duration1, class Duration2>
	bool operator> (const time_point<Clock, Duration1>& lhs, const time_point<Clock, Duration2>& rhs);
	template <class Clock, class Duration1, class Duration2>
	bool operator>=(const time_point<Clock, Duration1>& lhs, const time_point<Clock, Duration2>& rhs);
	template<class Clock, class Duration1,
	three_way_comparable_with<Duration1> Duration2>
	constexpr auto operator<=>(const time_point<Clock, Duration1>& lhs,
	const time_point<Clock, Duration2>& rhs); // since C++20

	// time_point_cast (constexpr in C++14)

	template <class ToDuration, class Clock, class Duration>
	time_point<Clock, ToDuration> time_point_cast(const time_point<Clock, Duration>& t);

	template <class ToDuration, class Clock, class Duration>
	constexpr time_point<Clock, ToDuration>
	floor(const time_point<Clock, Duration>& tp); // C++17

	template <class ToDuration, class Clock, class Duration>
	constexpr time_point<Clock, ToDuration>
	ceil(const time_point<Clock, Duration>& tp); // C++17

	template <class ToDuration, class Clock, class Duration>
	constexpr time_point<Clock, ToDuration>
	round(const time_point<Clock, Duration>& tp); // C++17

	template <class Rep, class Period>
	constexpr duration<Rep, Period> abs(duration<Rep, Period> d); // C++17

	// Clocks

	class system_clock
	{
	public:
	typedef microseconds duration;
	typedef duration::rep rep;
	typedef duration::period period;
	typedef chrono::time_point<system_clock> time_point;
	static const bool is_steady = false; // constexpr in C++14

	static time_point now() noexcept;
	static time_t to_time_t (const time_point& __t) noexcept;
	static time_point from_time_t(time_t __t) noexcept;
	};

	template <class Duration>
	using sys_time = time_point<system_clock, Duration>; // C++20
	using sys_seconds = sys_time<seconds>; // C++20
	using sys_days = sys_time<days>; // C++20

	template<class charT, class traits, class Duration> // C++20
	basic_ostream<charT, traits>&
	operator<<(basic_ostream<charT, traits>& os, const sys_time<Duration>& tp);

	template<class charT, class traits> // C++20
	basic_ostream<charT, traits>&
	operator<<(basic_ostream<charT, traits>& os, const sys_days& dp);

	class file_clock // C++20
	{
	public:
	typedef see-below rep;
	typedef nano period;
	typedef chrono::duration<rep, period> duration;
	typedef chrono::time_point<file_clock> time_point;
	static constexpr bool is_steady = false;

	static time_point now() noexcept;

	template<class Duration>
	static sys_time<see-below> to_sys(const file_time<Duration>&);

	template<class Duration>
	static file_time<see-below> from_sys(const sys_time<Duration>&);
	};

	template<class Duration>
	using file_time = time_point<file_clock, Duration>; // C++20

	template<class charT, class traits, class Duration> // C++20
	basic_ostream<charT, traits>&
	operator<<(basic_ostream<charT, traits>& os, const file_time<Duration>& tp);

	class steady_clock
	{
	public:
	typedef nanoseconds duration;
	typedef duration::rep rep;
	typedef duration::period period;
	typedef chrono::time_point<steady_clock, duration> time_point;
	static const bool is_steady = true; // constexpr in C++14

	static time_point now() noexcept;
	};

	typedef steady_clock high_resolution_clock;

	// 25.7.8, local time // C++20
	struct local_t {};
	template<class Duration>
	using local_time = time_point<local_t, Duration>;
	using local_seconds = local_time<seconds>;
	using local_days = local_time<days>;

	template<class charT, class traits, class Duration> // C++20
	basic_ostream<charT, traits>&
	operator<<(basic_ostream<charT, traits>& os, const local_time<Duration>& tp);

	// 25.8.2, class last_spec // C++20
	struct last_spec;

	// 25.8.3, class day // C++20

	class day;
	constexpr bool operator==(const day& x, const day& y) noexcept;
	constexpr strong_ordering operator<=>(const day& x, const day& y) noexcept;
	constexpr day operator+(const day& x, const days& y) noexcept;
	constexpr day operator+(const days& x, const day& y) noexcept;
	constexpr day operator-(const day& x, const days& y) noexcept;
	constexpr days operator-(const day& x, const day& y) noexcept;
	template<class charT, class traits>
	basic_ostream<charT, traits>&
	operator<<(basic_ostream<charT, traits>& os, const day& d);

	// 25.8.4, class month // C++20
	class month;
	constexpr bool operator==(const month& x, const month& y) noexcept;
	constexpr strong_ordering operator<=>(const month& x, const month& y) noexcept;

	constexpr month operator+(const month& x, const months& y) noexcept;
	constexpr month operator+(const months& x, const month& y) noexcept;
	constexpr month operator-(const month& x, const months& y) noexcept;
	constexpr months operator-(const month& x, const month& y) noexcept;
	template<class charT, class traits>
	basic_ostream<charT, traits>&
	operator<<(basic_ostream<charT, traits>& os, const month& m);

	// 25.8.5, class year // C++20
	class year;
	constexpr bool operator==(const year& x, const year& y) noexcept;
	constexpr strong_ordering operator<=>(const year& x, const year& y) noexcept;

	constexpr year operator+(const year& x, const years& y) noexcept;
	constexpr year operator+(const years& x, const year& y) noexcept;
	constexpr year operator-(const year& x, const years& y) noexcept;
	constexpr years operator-(const year& x, const year& y) noexcept;
	template<class charT, class traits>
	basic_ostream<charT, traits>&
	operator<<(basic_ostream<charT, traits>& os, const year& y);

	// 25.8.6, class weekday // C++20
	class weekday;

	constexpr bool operator==(const weekday& x, const weekday& y) noexcept;
	constexpr weekday operator+(const weekday& x, const days& y) noexcept;
	constexpr weekday operator+(const days& x, const weekday& y) noexcept;
	constexpr weekday operator-(const weekday& x, const days& y) noexcept;
	constexpr days operator-(const weekday& x, const weekday& y) noexcept;
	template<class charT, class traits>
	basic_ostream<charT, traits>&
	operator<<(basic_ostream<charT, traits>& os, const weekday& wd);

	// 25.8.7, class weekday_indexed // C++20

	class weekday_indexed;
	constexpr bool operator==(const weekday_indexed& x, const weekday_indexed& y) noexcept;

	template<class charT, class traits>
	basic_ostream<charT, traits>&
	operator<<(basic_ostream<charT, traits>& os, const weekday_indexed& wdi);

	// 25.8.8, class weekday_last // C++20
	class weekday_last;

	constexpr bool operator==(const weekday_last& x, const weekday_last& y) noexcept;

	template<class charT, class traits>
	basic_ostream<charT, traits>&
	operator<<(basic_ostream<charT, traits>& os, const weekday_last& wdl);

	// 25.8.9, class month_day // C++20
	class month_day;

	constexpr bool operator==(const month_day& x, const month_day& y) noexcept;
	constexpr strong_ordering operator<=>(const month_day& x, const month_day& y) noexcept;

	template<class charT, class traits>
	basic_ostream<charT, traits>&
	operator<<(basic_ostream<charT, traits>& os, const month_day& md);

	// 25.8.10, class month_day_last // C++20
	class month_day_last;

	constexpr bool operator==(const month_day_last& x, const month_day_last& y) noexcept;
	constexpr strong_ordering operator<=>(const month_day_last& x, const month_day_last& y) noexcept;

	template<class charT, class traits>
	basic_ostream<charT, traits>&
	operator<<(basic_ostream<charT, traits>& os, const month_day_last& mdl);

	// 25.8.11, class month_weekday // C++20
	class month_weekday;

	constexpr bool operator==(const month_weekday& x, const month_weekday& y) noexcept;

	template<class charT, class traits>
	basic_ostream<charT, traits>&
	operator<<(basic_ostream<charT, traits>& os, const month_weekday& mwd);

	// 25.8.12, class month_weekday_last // C++20
	class month_weekday_last;

	constexpr bool operator==(const month_weekday_last& x, const month_weekday_last& y) noexcept;

	template<class charT, class traits>
	basic_ostream<charT, traits>&
	operator<<(basic_ostream<charT, traits>& os, const month_weekday_last& mwdl);


	// 25.8.13, class year_month // C++20
	class year_month;

	constexpr bool operator==(const year_month& x, const year_month& y) noexcept;
	constexpr strong_ordering operator<=>(const year_month& x, const year_month& y) noexcept;

	constexpr year_month operator+(const year_month& ym, const months& dm) noexcept;
	constexpr year_month operator+(const months& dm, const year_month& ym) noexcept;
	constexpr year_month operator-(const year_month& ym, const months& dm) noexcept;
	constexpr months operator-(const year_month& x, const year_month& y) noexcept;
	constexpr year_month operator+(const year_month& ym, const years& dy) noexcept;
	constexpr year_month operator+(const years& dy, const year_month& ym) noexcept;
	constexpr year_month operator-(const year_month& ym, const years& dy) noexcept;

	template<class charT, class traits>
	basic_ostream<charT, traits>&
	operator<<(basic_ostream<charT, traits>& os, const year_month& ym);

	// 25.8.14, class year_month_day class // C++20
	year_month_day;

	constexpr bool operator==(const year_month_day& x, const year_month_day& y) noexcept;
	constexpr strong_ordering operator<=>(const year_month_day& x, const year_month_day& y) noexcept;

	constexpr year_month_day operator+(const year_month_day& ymd, const months& dm) noexcept;
	constexpr year_month_day operator+(const months& dm, const year_month_day& ymd) noexcept;
	constexpr year_month_day operator+(const year_month_day& ymd, const years& dy) noexcept;
	constexpr year_month_day operator+(const years& dy, const year_month_day& ymd) noexcept;
	constexpr year_month_day operator-(const year_month_day& ymd, const months& dm) noexcept;
	constexpr year_month_day operator-(const year_month_day& ymd, const years& dy) noexcept;

	template<class charT, class traits>
	basic_ostream<charT, traits>&
	operator<<(basic_ostream<charT, traits>& os, const year_month_day& ymd);

	// 25.8.15, class year_month_day_last // C++20
	class year_month_day_last;

	constexpr bool operator==(const year_month_day_last& x, const year_month_day_last& y) noexcept;
	constexpr strong_ordering operator<=>(const year_month_day_last_day& x, const year_month_day_last_day& y) noexcept;

	constexpr year_month_day_last
	operator+(const year_month_day_last& ymdl, const months& dm) noexcept;
	constexpr year_month_day_last
	operator+(const months& dm, const year_month_day_last& ymdl) noexcept;
	constexpr year_month_day_last
	operator+(const year_month_day_last& ymdl, const years& dy) noexcept;
	constexpr year_month_day_last
	operator+(const years& dy, const year_month_day_last& ymdl) noexcept;
	constexpr year_month_day_last
	operator-(const year_month_day_last& ymdl, const months& dm) noexcept;
	constexpr year_month_day_last
	operator-(const year_month_day_last& ymdl, const years& dy) noexcept;

	template<class charT, class traits>
	basic_ostream<charT, traits>&
	operator<<(basic_ostream<charT, traits>& os, const year_month_day_last& ymdl);

	// 25.8.16, class year_month_weekday // C++20
	class year_month_weekday;

	constexpr bool operator==(const year_month_weekday& x,
	const year_month_weekday& y) noexcept;

	constexpr year_month_weekday
	operator+(const year_month_weekday& ymwd, const months& dm) noexcept;
	constexpr year_month_weekday
	operator+(const months& dm, const year_month_weekday& ymwd) noexcept;
	constexpr year_month_weekday
	operator+(const year_month_weekday& ymwd, const years& dy) noexcept;
	constexpr year_month_weekday
	operator+(const years& dy, const year_month_weekday& ymwd) noexcept;
	constexpr year_month_weekday
	operator-(const year_month_weekday& ymwd, const months& dm) noexcept;
	constexpr year_month_weekday
	operator-(const year_month_weekday& ymwd, const years& dy) noexcept;

	template<class charT, class traits>
	basic_ostream<charT, traits>&
	operator<<(basic_ostream<charT, traits>& os, const year_month_weekday& ymwd);

	// 25.8.17, class year_month_weekday_last // C++20
	class year_month_weekday_last;

	constexpr bool operator==(const year_month_weekday_last& x,
	const year_month_weekday_last& y) noexcept;
	constexpr year_month_weekday_last
	operator+(const year_month_weekday_last& ymwdl, const months& dm) noexcept;
	constexpr year_month_weekday_last
	operator+(const months& dm, const year_month_weekday_last& ymwdl) noexcept;
	constexpr year_month_weekday_last
	operator+(const year_month_weekday_last& ymwdl, const years& dy) noexcept;
	constexpr year_month_weekday_last
	operator+(const years& dy, const year_month_weekday_last& ymwdl) noexcept;
	constexpr year_month_weekday_last
	operator-(const year_month_weekday_last& ymwdl, const months& dm) noexcept;
	constexpr year_month_weekday_last
	operator-(const year_month_weekday_last& ymwdl, const years& dy) noexcept;

	template<class charT, class traits>
	basic_ostream<charT, traits>&
	operator<<(basic_ostream<charT, traits>& os, const year_month_weekday_last& ymwdl);

	// 25.8.18, civil calendar conventional syntax operators // C++20
	constexpr year_month
	operator/(const year& y, const month& m) noexcept;
	constexpr year_month
	operator/(const year& y, int m) noexcept;
	constexpr month_day
	operator/(const month& m, const day& d) noexcept;
	constexpr month_day
	operator/(const month& m, int d) noexcept;
	constexpr month_day
	operator/(int m, const day& d) noexcept;
	constexpr month_day
	operator/(const day& d, const month& m) noexcept;
	constexpr month_day
	operator/(const day& d, int m) noexcept;
	constexpr month_day_last
	operator/(const month& m, last_spec) noexcept;
	constexpr month_day_last
	operator/(int m, last_spec) noexcept;
	constexpr month_day_last
	operator/(last_spec, const month& m) noexcept;
	constexpr month_day_last
	operator/(last_spec, int m) noexcept;
	constexpr month_weekday
	operator/(const month& m, const weekday_indexed& wdi) noexcept;
	constexpr month_weekday
	operator/(int m, const weekday_indexed& wdi) noexcept;
	constexpr month_weekday
	operator/(const weekday_indexed& wdi, const month& m) noexcept;
	constexpr month_weekday
	operator/(const weekday_indexed& wdi, int m) noexcept;
	constexpr month_weekday_last
	operator/(const month& m, const weekday_last& wdl) noexcept;
	constexpr month_weekday_last
	operator/(int m, const weekday_last& wdl) noexcept;
	constexpr month_weekday_last
	operator/(const weekday_last& wdl, const month& m) noexcept;
	constexpr month_weekday_last
	operator/(const weekday_last& wdl, int m) noexcept;
	constexpr year_month_day
	operator/(const year_month& ym, const day& d) noexcept;
	constexpr year_month_day
	operator/(const year_month& ym, int d) noexcept;
	constexpr year_month_day
	operator/(const year& y, const month_day& md) noexcept;
	constexpr year_month_day
	operator/(int y, const month_day& md) noexcept;
	constexpr year_month_day
	operator/(const month_day& md, const year& y) noexcept;
	constexpr year_month_day
	operator/(const month_day& md, int y) noexcept;
	constexpr year_month_day_last
	operator/(const year_month& ym, last_spec) noexcept;
	constexpr year_month_day_last
	operator/(const year& y, const month_day_last& mdl) noexcept;
	constexpr year_month_day_last
	operator/(int y, const month_day_last& mdl) noexcept;
	constexpr year_month_day_last
	operator/(const month_day_last& mdl, const year& y) noexcept;
	constexpr year_month_day_last
	operator/(const month_day_last& mdl, int y) noexcept;
	constexpr year_month_weekday
	operator/(const year_month& ym, const weekday_indexed& wdi) noexcept;
	constexpr year_month_weekday
	operator/(const year& y, const month_weekday& mwd) noexcept;
	constexpr year_month_weekday
	operator/(int y, const month_weekday& mwd) noexcept;
	constexpr year_month_weekday
	operator/(const month_weekday& mwd, const year& y) noexcept;
	constexpr year_month_weekday
	operator/(const month_weekday& mwd, int y) noexcept;
	constexpr year_month_weekday_last
	operator/(const year_month& ym, const weekday_last& wdl) noexcept;
	constexpr year_month_weekday_last
	operator/(const year& y, const month_weekday_last& mwdl) noexcept;
	constexpr year_month_weekday_last
	operator/(int y, const month_weekday_last& mwdl) noexcept;
	constexpr year_month_weekday_last
	operator/(const month_weekday_last& mwdl, const year& y) noexcept;
	constexpr year_month_weekday_last
	operator/(const month_weekday_last& mwdl, int y) noexcept;

	// 26.9, class template hh_mm_ss
	template <class Duration>
	class hh_mm_ss
	{
	bool is_neg; // exposition only
	chrono::hours h; // exposition only
	chrono::minutes m; // exposition only
	chrono::seconds s; // exposition only
	precision ss; // exposition only

	public:
	static unsigned constexpr fractional_width = see below;
	using precision = see below;

	constexpr hh_mm_ss() noexcept : hh_mm_ss{Duration::zero()} {}
	constexpr explicit hh_mm_ss(Duration d) noexcept;

	constexpr bool is_negative() const noexcept;
	constexpr chrono::hours hours() const noexcept;
	constexpr chrono::minutes minutes() const noexcept;
	constexpr chrono::seconds seconds() const noexcept;
	constexpr precision subseconds() const noexcept;

	constexpr explicit operator precision() const noexcept;
	constexpr precision to_duration() const noexcept;
	};

	template<class charT, class traits, class Duration>
	basic_ostream<charT, traits>&
	operator<<(basic_ostream<charT, traits>& os, const hh_mm_ss<Duration>& hms); // C++20

	// 26.10, 12/24 hour functions
	constexpr bool is_am(hours const& h) noexcept;
	constexpr bool is_pm(hours const& h) noexcept;
	constexpr hours make12(const hours& h) noexcept;
	constexpr hours make24(const hours& h, bool is_pm) noexcept;

	// [time.zone.db], time zone database
	struct tzdb { // C++20
	string version;
	vector<time_zone> zones;
	vector<time_zone_link> links;
	vector<leap_second> leap_seconds;

	const time_zone* locate_zone(string_view tz_name) const;
	const time_zone* current_zone() const;
	};

	class tzdb_list { // C++20
	public:
	tzdb_list(const tzdb_list&) = delete;
	tzdb_list& operator=(const tzdb_list&) = delete;

	// unspecified additional constructors

	class const_iterator;

	const tzdb& front() const noexcept;

	const_iterator erase_after(const_iterator p);

	const_iterator begin() const noexcept;
	const_iterator end() const noexcept;

	const_iterator cbegin() const noexcept;
	const_iterator cend() const noexcept;
	};

	// [time.zone.db.access], time zone database access
	const tzdb& get_tzdb(); // C++20
	tzdb_list& get_tzdb_list(); // C++20
	const time_zone* locate_zone(string_view tz_name); // C++20
	const time_zone* current_zone() // C++20

	// [time.zone.db.remote], remote time zone database support
	const tzdb& reload_tzdb(); // C++20
	string remote_version(); // C++20

	// [time.zone.exception], exception classes
	class nonexistent_local_time; // C++20
	class ambiguous_local_time; // C++20

	// [time.zone.info], information classes
	struct sys_info { // C++20
	sys_seconds begin;
	sys_seconds end;
	seconds offset;
	minutes save;
	string abbrev;
	};

	template<class charT, class traits> // C++20
	basic_ostream<charT, traits>&
	operator<<(basic_ostream<charT, traits>& os, const sys_info& si);

	struct local_info { // C++20
	static constexpr int unique = 0;
	static constexpr int nonexistent = 1;
	static constexpr int ambiguous = 2;

	int result;
	sys_info first;
	sys_info second;
	};

	template<class charT, class traits> // C++20
	basic_ostream<charT, traits>&
	operator<<(basic_ostream<charT, traits>& os, const local_info& li);

	// 25.10.5, class time_zone // C++20
	enum class choose {earliest, latest};
	class time_zone {
	time_zone(time_zone&&) = default;
	time_zone& operator=(time_zone&&) = default;

	// unspecified additional constructors

	string_view name() const noexcept;

	template<class Duration>
	sys_info get_info(const sys_time<Duration>& st) const;

	template<class Duration>
	local_info get_info(const local_time<Duration>& tp) const;

	template<class Duration>
	sys_time<common_type_t<Duration, seconds>>
	to_sys(const local_time<Duration>& tp) const;

	template<class Duration>
	sys_time<common_type_t<Duration, seconds>>
	to_sys(const local_time<Duration>& tp, choose z) const;

	template<class Duration>
	local_time<common_type_t<Duration, seconds>>
	to_local(const sys_time<Duration>& tp) const;
	};
	bool operator==(const time_zone& x, const time_zone& y) noexcept; // C++20
	strong_ordering operator<=>(const time_zone& x, const time_zone& y) noexcept; // C++20

	// [time.zone.zonedtraits], class template zoned_traits
	template<class T> struct zoned_traits; // C++20

	// [time.zone.zonedtime], class template zoned_time
	template<class Duration, class TimeZonePtr = const time_zone*> // C++20
	class zoned_time;

	using zoned_seconds = zoned_time<seconds>; // C++20

	template<class Duration1, class Duration2, class TimeZonePtr> // C++20
	bool operator==(const zoned_time<Duration1, TimeZonePtr>& x,
	const zoned_time<Duration2, TimeZonePtr>& y);

	template<class charT, class traits, class Duration, class TimeZonePtr> // C++20
	basic_ostream<charT, traits>&
	operator<<(basic_ostream<charT, traits>& os,
	const zoned_time<Duration, TimeZonePtr>& t);

	// [time.zone.leap], leap second support
	class leap_second { // C++20
	public:
	leap_second(const leap_second&) = default;
	leap_second& operator=(const leap_second&) = default;

	// unspecified additional constructors

	constexpr sys_seconds date() const noexcept;
	constexpr seconds value() const noexcept;
	};

	constexpr bool operator==(const leap_second& x, const leap_second& y); // C++20
	constexpr strong_ordering operator<=>(const leap_second& x, const leap_second& y);

	template<class Duration> // C++20
	constexpr bool operator==(const leap_second& x, const sys_time<Duration>& y);
	template<class Duration> // C++20
	constexpr bool operator< (const leap_second& x, const sys_time<Duration>& y);
	template<class Duration> // C++20
	constexpr bool operator< (const sys_time<Duration>& x, const leap_second& y);
	template<class Duration> // C++20
	constexpr bool operator> (const leap_second& x, const sys_time<Duration>& y);
	template<class Duration> // C++20
	constexpr bool operator> (const sys_time<Duration>& x, const leap_second& y);
	template<class Duration> // C++20
	constexpr bool operator<=(const leap_second& x, const sys_time<Duration>& y);
	template<class Duration> // C++20
	constexpr bool operator<=(const sys_time<Duration>& x, const leap_second& y);
	template<class Duration> // C++20
	constexpr bool operator>=(const leap_second& x, const sys_time<Duration>& y);
	template<class Duration> // C++20
	constexpr bool operator>=(const sys_time<Duration>& x, const leap_second& y);
	template<class Duration> // C++20
	requires three_way_comparable_with<sys_seconds, sys_time<Duration>>
	constexpr auto operator<=>(const leap_second& x, const sys_time<Duration>& y);

	// [time.zone.link], class time_zone_link
	class time_zone_link { // C++20
	public:
	time_zone_link(time_zone_link&&) = default;
	time_zone_link& operator=(time_zone_link&&) = default;

	// unspecified additional constructors

	string_view name() const noexcept;
	string_view target() const noexcept;
	};

	bool operator==(const time_zone_link& x, const time_zone_link& y); // C++20
	strong_ordering operator<=>(const time_zone_link& x, const time_zone_link& y); // C++20

	} // chrono

	namespace std {
	template<class Duration, class charT>
	struct formatter<chrono::sys_time<Duration>, charT>; // C++20
	template<class Duration, class charT>
	struct formatter<chrono::filetime<Duration>, charT>; // C++20
	template<class Duration, class charT>
	struct formatter<chrono::local_time<Duration>, charT>; // C++20
	template<class Rep, class Period, class charT>
	struct formatter<chrono::duration<Rep, Period>, charT>; // C++20
	template<class charT> struct formatter<chrono::day, charT>; // C++20
	template<class charT> struct formatter<chrono::month, charT>; // C++20
	template<class charT> struct formatter<chrono::year, charT>; // C++20
	template<class charT> struct formatter<chrono::weekday, charT>; // C++20
	template<class charT> struct formatter<chrono::weekday_indexed, charT>; // C++20
	template<class charT> struct formatter<chrono::weekday_last, charT>; // C++20
	template<class charT> struct formatter<chrono::month_day, charT>; // C++20
	template<class charT> struct formatter<chrono::month_day_last, charT>; // C++20
	template<class charT> struct formatter<chrono::month_weekday, charT>; // C++20
	template<class charT> struct formatter<chrono::month_weekday_last, charT>; // C++20
	template<class charT> struct formatter<chrono::year_month, charT>; // C++20
	template<class charT> struct formatter<chrono::year_month_day, charT>; // C++20
	template<class charT> struct formatter<chrono::year_month_day_last, charT>; // C++20
	template<class charT> struct formatter<chrono::year_month_weekday, charT>; // C++20
	template<class charT> struct formatter<chrono::year_month_weekday_last, charT>; // C++20
	template<class Rep, class Period, class charT>
	struct formatter<chrono::hh_mm_ss<duration<Rep, Period>>, charT>; // C++20
	template<class charT> struct formatter<chrono::sys_info, charT>; // C++20
	template<class charT> struct formatter<chrono::local_info, charT>; // C++20
	template<class Duration, class TimeZonePtr, class charT> // C++20
	struct formatter<chrono::zoned_time<Duration, TimeZonePtr>, charT>;
	} // namespace std

	namespace chrono {
	// calendrical constants
	inline constexpr last_spec last{}; // C++20
	inline constexpr chrono::weekday Sunday{0}; // C++20
	inline constexpr chrono::weekday Monday{1}; // C++20
	inline constexpr chrono::weekday Tuesday{2}; // C++20
	inline constexpr chrono::weekday Wednesday{3}; // C++20
	inline constexpr chrono::weekday Thursday{4}; // C++20
	inline constexpr chrono::weekday Friday{5}; // C++20
	inline constexpr chrono::weekday Saturday{6}; // C++20

	inline constexpr chrono::month January{1}; // C++20
	inline constexpr chrono::month February{2}; // C++20
	inline constexpr chrono::month March{3}; // C++20
	inline constexpr chrono::month April{4}; // C++20
	inline constexpr chrono::month May{5}; // C++20
	inline constexpr chrono::month June{6}; // C++20
	inline constexpr chrono::month July{7}; // C++20
	inline constexpr chrono::month August{8}; // C++20
	inline constexpr chrono::month September{9}; // C++20
	inline constexpr chrono::month October{10}; // C++20
	inline constexpr chrono::month November{11}; // C++20
	inline constexpr chrono::month December{12}; // C++20
	} // chrono

	inline namespace literals {
	inline namespace chrono_literals {
	constexpr chrono::hours operator ""h(unsigned long long); // C++14
	constexpr chrono::duration<unspecified , ratio<3600,1>> operator ""h(long double); // C++14
	constexpr chrono::minutes operator ""min(unsigned long long); // C++14
	constexpr chrono::duration<unspecified , ratio<60,1>> operator ""min(long double); // C++14
	constexpr chrono::seconds operator ""s(unsigned long long); // C++14
	constexpr chrono::duration<unspecified > operator ""s(long double); // C++14
	constexpr chrono::milliseconds operator ""ms(unsigned long long); // C++14
	constexpr chrono::duration<unspecified , milli> operator ""ms(long double); // C++14
	constexpr chrono::microseconds operator ""us(unsigned long long); // C++14
	constexpr chrono::duration<unspecified , micro> operator ""us(long double); // C++14
	constexpr chrono::nanoseconds operator ""ns(unsigned long long); // C++14
	constexpr chrono::duration<unspecified , nano> operator ""ns(long double); // C++14
	constexpr chrono::day operator ""d(unsigned long long d) noexcept; // C++20
	constexpr chrono::year operator ""y(unsigned long long y) noexcept; // C++20
	} // chrono_literals
	} // literals

	} // std
	*/

	// clang-format on

	#include <__config>

	#include <__chrono/duration.h>
	#include <__chrono/file_clock.h>
	#include <__chrono/high_resolution_clock.h>
	#include <__chrono/steady_clock.h>
	#include <__chrono/system_clock.h>
	#include <__chrono/time_point.h>

	#if _LIBCPP_STD_VER >= 20
	# include <__chrono/calendar.h>
	# include <__chrono/day.h>
	# include <__chrono/exception.h>
	# include <__chrono/hh_mm_ss.h>
	# include <__chrono/literals.h>
	# include <__chrono/local_info.h>
	# include <__chrono/month.h>
	# include <__chrono/month_weekday.h>
	# include <__chrono/monthday.h>
	# include <__chrono/sys_info.h>
	# include <__chrono/weekday.h>
	# include <__chrono/year.h>
	# include <__chrono/year_month.h>
	# include <__chrono/year_month_day.h>
	# include <__chrono/year_month_weekday.h>

	# if !defined(_LIBCPP_HAS_NO_LOCALIZATION)
	# include <__chrono/formatter.h>
	# include <__chrono/ostream.h>
	# include <__chrono/parser_std_format_spec.h>
	# include <__chrono/statically_widen.h>
	# endif

	# if !defined(_LIBCPP_HAS_NO_TIME_ZONE_DATABASE) && !defined(_LIBCPP_HAS_NO_FILESYSTEM) && \
	!defined(_LIBCPP_HAS_NO_LOCALIZATION)
	# include <__chrono/leap_second.h>
	# include <__chrono/time_zone.h>
	# include <__chrono/time_zone_link.h>
	# include <__chrono/tzdb.h>
	# include <__chrono/tzdb_list.h>
	# include <__chrono/zoned_time.h>
	# endif

	#endif

	#include <version>

	// standard-mandated includes

	// [time.syn]
	#include <compare>

	#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
	# pragma GCC system_header
	#endif

	#if !defined(_LIBCPP_REMOVE_TRANSITIVE_INCLUDES) && _LIBCPP_STD_VER <= 17
	# include <cstdint>
	# include <stdexcept>
	# include <string_view>
	# include <vector>
	#endif

	#if !defined(_LIBCPP_REMOVE_TRANSITIVE_INCLUDES) && _LIBCPP_STD_VER <= 20
	# include <bit>
	# include <concepts>
	# include <cstring>
	# include <forward_list>
	# include <string>
	# include <tuple>
	#endif

	#if !defined(_LIBCPP_REMOVE_TRANSITIVE_INCLUDES) && _LIBCPP_STD_VER == 20
	# include <charconv>
	# if !defined(_LIBCPP_HAS_NO_LOCALIZATION)
	# include <locale>
	+# include <ostream>
	# endif
	-# include <ostream>
	#endif

	#endif // _LIBCPP_CHRONO
	diff --git a/contrib/llvm-project/lld/ELF/Arch/Hexagon.cpp b/contrib/llvm-project/lld/ELF/Arch/Hexagon.cpp
	index 54821c299bde..abde3cd96491 100644
	--- a/contrib/llvm-project/lld/ELF/Arch/Hexagon.cpp
	+++ b/contrib/llvm-project/lld/ELF/Arch/Hexagon.cpp
	@@ -1,412 +1,410 @@
	//===-- Hexagon.cpp -------------------------------------------------------===//
	//
	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
	// See https://llvm.org/LICENSE.txt for license information.
	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
	//
	//===----------------------------------------------------------------------===//

	#include "InputFiles.h"
	#include "Symbols.h"
	#include "SyntheticSections.h"
	#include "Target.h"
	#include "lld/Common/ErrorHandler.h"
	#include "llvm/BinaryFormat/ELF.h"
	#include "llvm/Support/Endian.h"

	using namespace llvm;
	using namespace llvm::object;
	using namespace llvm::support::endian;
	using namespace llvm::ELF;
	using namespace lld;
	using namespace lld::elf;

	namespace {
	class Hexagon final : public TargetInfo {
	public:
	Hexagon();
	uint32_t calcEFlags() const override;
	RelExpr getRelExpr(RelType type, const Symbol &s,
	const uint8_t *loc) const override;
	RelType getDynRel(RelType type) const override;
	int64_t getImplicitAddend(const uint8_t *buf, RelType type) const override;
	void relocate(uint8_t *loc, const Relocation &rel,
	uint64_t val) const override;
	void writePltHeader(uint8_t *buf) const override;
	void writePlt(uint8_t *buf, const Symbol &sym,
	uint64_t pltEntryAddr) const override;
	};
	} // namespace

	Hexagon::Hexagon() {
	pltRel = R_HEX_JMP_SLOT;
	relativeRel = R_HEX_RELATIVE;
	gotRel = R_HEX_GLOB_DAT;
	symbolicRel = R_HEX_32;

	gotBaseSymInGotPlt = true;
	// The zero'th GOT entry is reserved for the address of _DYNAMIC. The
	// next 3 are reserved for the dynamic loader.
	gotPltHeaderEntriesNum = 4;

	pltEntrySize = 16;
	pltHeaderSize = 32;

	// Hexagon Linux uses 64K pages by default.
	defaultMaxPageSize = 0x10000;
	tlsGotRel = R_HEX_TPREL_32;
	tlsModuleIndexRel = R_HEX_DTPMOD_32;
	tlsOffsetRel = R_HEX_DTPREL_32;
	}

	uint32_t Hexagon::calcEFlags() const {
	- assert(!ctx.objectFiles.empty());
	-
	// The architecture revision must always be equal to or greater than
	// greatest revision in the list of inputs.
	- uint32_t ret = 0;
	+ std::optional<uint32_t> ret;
	for (InputFile *f : ctx.objectFiles) {
	uint32_t eflags = cast<ObjFile<ELF32LE>>(f)->getObj().getHeader().e_flags;
	- if (eflags > ret)
	+ if (!ret \|\| eflags > *ret)
	ret = eflags;
	}
	- return ret;
	+ return ret.value_or(/* Default Arch Rev: */ 0x60);
	}

	static uint32_t applyMask(uint32_t mask, uint32_t data) {
	uint32_t result = 0;
	size_t off = 0;

	for (size_t bit = 0; bit != 32; ++bit) {
	uint32_t valBit = (data >> off) & 1;
	uint32_t maskBit = (mask >> bit) & 1;
	if (maskBit) {
	result \|= (valBit << bit);
	++off;
	}
	}
	return result;
	}

	RelExpr Hexagon::getRelExpr(RelType type, const Symbol &s,
	const uint8_t *loc) const {
	switch (type) {
	case R_HEX_NONE:
	return R_NONE;
	case R_HEX_6_X:
	case R_HEX_8_X:
	case R_HEX_9_X:
	case R_HEX_10_X:
	case R_HEX_11_X:
	case R_HEX_12_X:
	case R_HEX_16_X:
	case R_HEX_32:
	case R_HEX_32_6_X:
	case R_HEX_HI16:
	case R_HEX_LO16:
	case R_HEX_DTPREL_32:
	return R_ABS;
	case R_HEX_B9_PCREL:
	case R_HEX_B13_PCREL:
	case R_HEX_B15_PCREL:
	case R_HEX_6_PCREL_X:
	case R_HEX_32_PCREL:
	return R_PC;
	case R_HEX_B9_PCREL_X:
	case R_HEX_B15_PCREL_X:
	case R_HEX_B22_PCREL:
	case R_HEX_PLT_B22_PCREL:
	case R_HEX_B22_PCREL_X:
	case R_HEX_B32_PCREL_X:
	case R_HEX_GD_PLT_B22_PCREL:
	case R_HEX_GD_PLT_B22_PCREL_X:
	case R_HEX_GD_PLT_B32_PCREL_X:
	return R_PLT_PC;
	case R_HEX_IE_32_6_X:
	case R_HEX_IE_16_X:
	case R_HEX_IE_HI16:
	case R_HEX_IE_LO16:
	return R_GOT;
	case R_HEX_GD_GOT_11_X:
	case R_HEX_GD_GOT_16_X:
	case R_HEX_GD_GOT_32_6_X:
	return R_TLSGD_GOTPLT;
	case R_HEX_GOTREL_11_X:
	case R_HEX_GOTREL_16_X:
	case R_HEX_GOTREL_32_6_X:
	case R_HEX_GOTREL_HI16:
	case R_HEX_GOTREL_LO16:
	return R_GOTPLTREL;
	case R_HEX_GOT_11_X:
	case R_HEX_GOT_16_X:
	case R_HEX_GOT_32_6_X:
	return R_GOTPLT;
	case R_HEX_IE_GOT_11_X:
	case R_HEX_IE_GOT_16_X:
	case R_HEX_IE_GOT_32_6_X:
	case R_HEX_IE_GOT_HI16:
	case R_HEX_IE_GOT_LO16:
	return R_GOTPLT;
	case R_HEX_TPREL_11_X:
	case R_HEX_TPREL_16:
	case R_HEX_TPREL_16_X:
	case R_HEX_TPREL_32_6_X:
	case R_HEX_TPREL_HI16:
	case R_HEX_TPREL_LO16:
	return R_TPREL;
	default:
	error(getErrorLocation(loc) + "unknown relocation (" + Twine(type) +
	") against symbol " + toString(s));
	return R_NONE;
	}
	}

	// There are (arguably too) many relocation masks for the DSP's
	// R_HEX_6_X type. The table below is used to select the correct mask
	// for the given instruction.
	struct InstructionMask {
	uint32_t cmpMask;
	uint32_t relocMask;
	};
	static const InstructionMask r6[] = {
	{0x38000000, 0x0000201f}, {0x39000000, 0x0000201f},
	{0x3e000000, 0x00001f80}, {0x3f000000, 0x00001f80},
	{0x40000000, 0x000020f8}, {0x41000000, 0x000007e0},
	{0x42000000, 0x000020f8}, {0x43000000, 0x000007e0},
	{0x44000000, 0x000020f8}, {0x45000000, 0x000007e0},
	{0x46000000, 0x000020f8}, {0x47000000, 0x000007e0},
	{0x6a000000, 0x00001f80}, {0x7c000000, 0x001f2000},
	{0x9a000000, 0x00000f60}, {0x9b000000, 0x00000f60},
	{0x9c000000, 0x00000f60}, {0x9d000000, 0x00000f60},
	{0x9f000000, 0x001f0100}, {0xab000000, 0x0000003f},
	{0xad000000, 0x0000003f}, {0xaf000000, 0x00030078},
	{0xd7000000, 0x006020e0}, {0xd8000000, 0x006020e0},
	{0xdb000000, 0x006020e0}, {0xdf000000, 0x006020e0}};

	static bool isDuplex(uint32_t insn) {
	// Duplex forms have a fixed mask and parse bits 15:14 are always
	// zero. Non-duplex insns will always have at least one bit set in the
	// parse field.
	return (0xC000 & insn) == 0;
	}

	static uint32_t findMaskR6(uint32_t insn) {
	if (isDuplex(insn))
	return 0x03f00000;

	for (InstructionMask i : r6)
	if ((0xff000000 & insn) == i.cmpMask)
	return i.relocMask;

	error("unrecognized instruction for 6_X relocation: 0x" +
	utohexstr(insn));
	return 0;
	}

	static uint32_t findMaskR8(uint32_t insn) {
	if ((0xff000000 & insn) == 0xde000000)
	return 0x00e020e8;
	if ((0xff000000 & insn) == 0x3c000000)
	return 0x0000207f;
	return 0x00001fe0;
	}

	static uint32_t findMaskR11(uint32_t insn) {
	if ((0xff000000 & insn) == 0xa1000000)
	return 0x060020ff;
	return 0x06003fe0;
	}

	static uint32_t findMaskR16(uint32_t insn) {
	if ((0xff000000 & insn) == 0x48000000)
	return 0x061f20ff;
	if ((0xff000000 & insn) == 0x49000000)
	return 0x061f3fe0;
	if ((0xff000000 & insn) == 0x78000000)
	return 0x00df3fe0;
	if ((0xff000000 & insn) == 0xb0000000)
	return 0x0fe03fe0;

	if (isDuplex(insn))
	return 0x03f00000;

	for (InstructionMask i : r6)
	if ((0xff000000 & insn) == i.cmpMask)
	return i.relocMask;

	error("unrecognized instruction for 16_X type: 0x" +
	utohexstr(insn));
	return 0;
	}

	static void or32le(uint8_t *p, int32_t v) { write32le(p, read32le(p) \| v); }

	void Hexagon::relocate(uint8_t *loc, const Relocation &rel,
	uint64_t val) const {
	switch (rel.type) {
	case R_HEX_NONE:
	break;
	case R_HEX_6_PCREL_X:
	case R_HEX_6_X:
	or32le(loc, applyMask(findMaskR6(read32le(loc)), val));
	break;
	case R_HEX_8_X:
	or32le(loc, applyMask(findMaskR8(read32le(loc)), val));
	break;
	case R_HEX_9_X:
	or32le(loc, applyMask(0x00003fe0, val & 0x3f));
	break;
	case R_HEX_10_X:
	or32le(loc, applyMask(0x00203fe0, val & 0x3f));
	break;
	case R_HEX_11_X:
	case R_HEX_GD_GOT_11_X:
	case R_HEX_IE_GOT_11_X:
	case R_HEX_GOT_11_X:
	case R_HEX_GOTREL_11_X:
	case R_HEX_TPREL_11_X:
	or32le(loc, applyMask(findMaskR11(read32le(loc)), val & 0x3f));
	break;
	case R_HEX_12_X:
	or32le(loc, applyMask(0x000007e0, val));
	break;
	case R_HEX_16_X: // These relocs only have 6 effective bits.
	case R_HEX_IE_16_X:
	case R_HEX_IE_GOT_16_X:
	case R_HEX_GD_GOT_16_X:
	case R_HEX_GOT_16_X:
	case R_HEX_GOTREL_16_X:
	case R_HEX_TPREL_16_X:
	or32le(loc, applyMask(findMaskR16(read32le(loc)), val & 0x3f));
	break;
	case R_HEX_TPREL_16:
	or32le(loc, applyMask(findMaskR16(read32le(loc)), val & 0xffff));
	break;
	case R_HEX_32:
	case R_HEX_32_PCREL:
	case R_HEX_DTPREL_32:
	or32le(loc, val);
	break;
	case R_HEX_32_6_X:
	case R_HEX_GD_GOT_32_6_X:
	case R_HEX_GOT_32_6_X:
	case R_HEX_GOTREL_32_6_X:
	case R_HEX_IE_GOT_32_6_X:
	case R_HEX_IE_32_6_X:
	case R_HEX_TPREL_32_6_X:
	or32le(loc, applyMask(0x0fff3fff, val >> 6));
	break;
	case R_HEX_B9_PCREL:
	checkInt(loc, val, 11, rel);
	or32le(loc, applyMask(0x003000fe, val >> 2));
	break;
	case R_HEX_B9_PCREL_X:
	or32le(loc, applyMask(0x003000fe, val & 0x3f));
	break;
	case R_HEX_B13_PCREL:
	checkInt(loc, val, 15, rel);
	or32le(loc, applyMask(0x00202ffe, val >> 2));
	break;
	case R_HEX_B15_PCREL:
	checkInt(loc, val, 17, rel);
	or32le(loc, applyMask(0x00df20fe, val >> 2));
	break;
	case R_HEX_B15_PCREL_X:
	or32le(loc, applyMask(0x00df20fe, val & 0x3f));
	break;
	case R_HEX_B22_PCREL:
	case R_HEX_GD_PLT_B22_PCREL:
	case R_HEX_PLT_B22_PCREL:
	checkInt(loc, val, 22, rel);
	or32le(loc, applyMask(0x1ff3ffe, val >> 2));
	break;
	case R_HEX_B22_PCREL_X:
	case R_HEX_GD_PLT_B22_PCREL_X:
	or32le(loc, applyMask(0x1ff3ffe, val & 0x3f));
	break;
	case R_HEX_B32_PCREL_X:
	case R_HEX_GD_PLT_B32_PCREL_X:
	or32le(loc, applyMask(0x0fff3fff, val >> 6));
	break;
	case R_HEX_GOTREL_HI16:
	case R_HEX_HI16:
	case R_HEX_IE_GOT_HI16:
	case R_HEX_IE_HI16:
	case R_HEX_TPREL_HI16:
	or32le(loc, applyMask(0x00c03fff, val >> 16));
	break;
	case R_HEX_GOTREL_LO16:
	case R_HEX_LO16:
	case R_HEX_IE_GOT_LO16:
	case R_HEX_IE_LO16:
	case R_HEX_TPREL_LO16:
	or32le(loc, applyMask(0x00c03fff, val));
	break;
	default:
	llvm_unreachable("unknown relocation");
	}
	}

	void Hexagon::writePltHeader(uint8_t *buf) const {
	const uint8_t pltData[] = {
	0x00, 0x40, 0x00, 0x00, // { immext (#0)
	0x1c, 0xc0, 0x49, 0x6a, // r28 = add (pc, ##GOT0@PCREL) } # @GOT0
	0x0e, 0x42, 0x9c, 0xe2, // { r14 -= add (r28, #16) # offset of GOTn
	0x4f, 0x40, 0x9c, 0x91, // r15 = memw (r28 + #8) # object ID at GOT2
	0x3c, 0xc0, 0x9c, 0x91, // r28 = memw (r28 + #4) }# dynamic link at GOT1
	0x0e, 0x42, 0x0e, 0x8c, // { r14 = asr (r14, #2) # index of PLTn
	0x00, 0xc0, 0x9c, 0x52, // jumpr r28 } # call dynamic linker
	0x0c, 0xdb, 0x00, 0x54, // trap0(#0xdb) # bring plt0 into 16byte alignment
	};
	memcpy(buf, pltData, sizeof(pltData));

	// Offset from PLT0 to the GOT.
	uint64_t off = in.gotPlt->getVA() - in.plt->getVA();
	relocateNoSym(buf, R_HEX_B32_PCREL_X, off);
	relocateNoSym(buf + 4, R_HEX_6_PCREL_X, off);
	}

	void Hexagon::writePlt(uint8_t *buf, const Symbol &sym,
	uint64_t pltEntryAddr) const {
	const uint8_t inst[] = {
	0x00, 0x40, 0x00, 0x00, // { immext (#0)
	0x0e, 0xc0, 0x49, 0x6a, // r14 = add (pc, ##GOTn@PCREL) }
	0x1c, 0xc0, 0x8e, 0x91, // r28 = memw (r14)
	0x00, 0xc0, 0x9c, 0x52, // jumpr r28
	};
	memcpy(buf, inst, sizeof(inst));

	uint64_t gotPltEntryAddr = sym.getGotPltVA();
	relocateNoSym(buf, R_HEX_B32_PCREL_X, gotPltEntryAddr - pltEntryAddr);
	relocateNoSym(buf + 4, R_HEX_6_PCREL_X, gotPltEntryAddr - pltEntryAddr);
	}

	RelType Hexagon::getDynRel(RelType type) const {
	if (type == R_HEX_32)
	return type;
	return R_HEX_NONE;
	}

	int64_t Hexagon::getImplicitAddend(const uint8_t *buf, RelType type) const {
	switch (type) {
	case R_HEX_NONE:
	case R_HEX_GLOB_DAT:
	case R_HEX_JMP_SLOT:
	return 0;
	case R_HEX_32:
	case R_HEX_RELATIVE:
	case R_HEX_DTPMOD_32:
	case R_HEX_DTPREL_32:
	case R_HEX_TPREL_32:
	return SignExtend64<32>(read32(buf));
	default:
	internalLinkerError(getErrorLocation(buf),
	"cannot read addend for relocation " + toString(type));
	return 0;
	}
	}

	TargetInfo *elf::getHexagonTargetInfo() {
	static Hexagon target;
	return &target;
	}
	diff --git a/contrib/llvm-project/llvm/include/llvm/TargetParser/X86TargetParser.def b/contrib/llvm-project/llvm/include/llvm/TargetParser/X86TargetParser.def
	index 92798cbe4b4c..008cf5381c12 100644
	--- a/contrib/llvm-project/llvm/include/llvm/TargetParser/X86TargetParser.def
	+++ b/contrib/llvm-project/llvm/include/llvm/TargetParser/X86TargetParser.def
	@@ -1,274 +1,277 @@
	//===- X86TargetParser.def - X86 target parsing defines ---------- C++ --===//
	//
	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
	// See https://llvm.org/LICENSE.txt for license information.
	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
	//
	//===----------------------------------------------------------------------===//
	//
	// This file provides defines to build up the X86 target parser's logic.
	//
	//===----------------------------------------------------------------------===//

	// NOTE: NO INCLUDE GUARD DESIRED!

	#ifndef X86_VENDOR
	#define X86_VENDOR(ENUM, STR)
	#endif
	X86_VENDOR(VENDOR_INTEL, "intel")
	X86_VENDOR(VENDOR_AMD, "amd")
	#undef X86_VENDOR

	// This macro is used for cpu types present in compiler-rt/libgcc.
	#ifndef X86_CPU_TYPE
	#define X86_CPU_TYPE(ENUM, STR)
	#endif

	#ifndef X86_CPU_TYPE_ALIAS
	#define X86_CPU_TYPE_ALIAS(ENUM, STR)
	#endif

	// This list must match what is implemented in libgcc and compilert-rt. Clang
	// uses this to know how to implement __builtin_cpu_is.
	X86_CPU_TYPE(INTEL_BONNELL, "bonnell")
	X86_CPU_TYPE(INTEL_CORE2, "core2")
	X86_CPU_TYPE(INTEL_COREI7, "corei7")
	X86_CPU_TYPE(AMDFAM10H, "amdfam10h")
	X86_CPU_TYPE(AMDFAM15H, "amdfam15h")
	X86_CPU_TYPE(INTEL_SILVERMONT, "silvermont")
	X86_CPU_TYPE(INTEL_KNL, "knl")
	X86_CPU_TYPE(AMD_BTVER1, "btver1")
	X86_CPU_TYPE(AMD_BTVER2, "btver2")
	X86_CPU_TYPE(AMDFAM17H, "amdfam17h")
	X86_CPU_TYPE(INTEL_KNM, "knm")
	X86_CPU_TYPE(INTEL_GOLDMONT, "goldmont")
	X86_CPU_TYPE(INTEL_GOLDMONT_PLUS, "goldmont-plus")
	X86_CPU_TYPE(INTEL_TREMONT, "tremont")
	X86_CPU_TYPE(AMDFAM19H, "amdfam19h")
	X86_CPU_TYPE(ZHAOXIN_FAM7H, "zhaoxin_fam7h")
	X86_CPU_TYPE(INTEL_SIERRAFOREST, "sierraforest")
	X86_CPU_TYPE(INTEL_GRANDRIDGE, "grandridge")
	X86_CPU_TYPE(INTEL_CLEARWATERFOREST, "clearwaterforest")
	+X86_CPU_TYPE(AMDFAM1AH, "amdfam1ah")

	// Alternate names supported by __builtin_cpu_is and target multiversioning.
	X86_CPU_TYPE_ALIAS(INTEL_BONNELL, "atom")
	X86_CPU_TYPE_ALIAS(AMDFAM10H, "amdfam10")
	X86_CPU_TYPE_ALIAS(AMDFAM15H, "amdfam15")
	+X86_CPU_TYPE_ALIAS(AMDFAM1AH, "amdfam1a")
	X86_CPU_TYPE_ALIAS(INTEL_SILVERMONT, "slm")

	#undef X86_CPU_TYPE_ALIAS
	#undef X86_CPU_TYPE

	// This macro is used for cpu subtypes present in compiler-rt/libgcc.
	#ifndef X86_CPU_SUBTYPE
	#define X86_CPU_SUBTYPE(ENUM, STR)
	#endif

	#ifndef X86_CPU_SUBTYPE_ALIAS
	#define X86_CPU_SUBTYPE_ALIAS(ENUM, STR)
	#endif

	// This list must match what is implemented in libgcc and compilert-rt. Clang
	// uses this to know how to implement __builtin_cpu_is.
	X86_CPU_SUBTYPE(INTEL_COREI7_NEHALEM, "nehalem")
	X86_CPU_SUBTYPE(INTEL_COREI7_WESTMERE, "westmere")
	X86_CPU_SUBTYPE(INTEL_COREI7_SANDYBRIDGE, "sandybridge")
	X86_CPU_SUBTYPE(AMDFAM10H_BARCELONA, "barcelona")
	X86_CPU_SUBTYPE(AMDFAM10H_SHANGHAI, "shanghai")
	X86_CPU_SUBTYPE(AMDFAM10H_ISTANBUL, "istanbul")
	X86_CPU_SUBTYPE(AMDFAM15H_BDVER1, "bdver1")
	X86_CPU_SUBTYPE(AMDFAM15H_BDVER2, "bdver2")
	X86_CPU_SUBTYPE(AMDFAM15H_BDVER3, "bdver3")
	X86_CPU_SUBTYPE(AMDFAM15H_BDVER4, "bdver4")
	X86_CPU_SUBTYPE(AMDFAM17H_ZNVER1, "znver1")
	X86_CPU_SUBTYPE(INTEL_COREI7_IVYBRIDGE, "ivybridge")
	X86_CPU_SUBTYPE(INTEL_COREI7_HASWELL, "haswell")
	X86_CPU_SUBTYPE(INTEL_COREI7_BROADWELL, "broadwell")
	X86_CPU_SUBTYPE(INTEL_COREI7_SKYLAKE, "skylake")
	X86_CPU_SUBTYPE(INTEL_COREI7_SKYLAKE_AVX512, "skylake-avx512")
	X86_CPU_SUBTYPE(INTEL_COREI7_CANNONLAKE, "cannonlake")
	X86_CPU_SUBTYPE(INTEL_COREI7_ICELAKE_CLIENT, "icelake-client")
	X86_CPU_SUBTYPE(INTEL_COREI7_ICELAKE_SERVER, "icelake-server")
	X86_CPU_SUBTYPE(AMDFAM17H_ZNVER2, "znver2")
	X86_CPU_SUBTYPE(INTEL_COREI7_CASCADELAKE, "cascadelake")
	X86_CPU_SUBTYPE(INTEL_COREI7_TIGERLAKE, "tigerlake")
	X86_CPU_SUBTYPE(INTEL_COREI7_COOPERLAKE, "cooperlake")
	X86_CPU_SUBTYPE(INTEL_COREI7_SAPPHIRERAPIDS, "sapphirerapids")
	X86_CPU_SUBTYPE(INTEL_COREI7_ALDERLAKE, "alderlake")
	X86_CPU_SUBTYPE(AMDFAM19H_ZNVER3, "znver3")
	X86_CPU_SUBTYPE(INTEL_COREI7_ROCKETLAKE, "rocketlake")
	X86_CPU_SUBTYPE(ZHAOXIN_FAM7H_LUJIAZUI, "zhaoxin_fam7h_lujiazui")
	X86_CPU_SUBTYPE(AMDFAM19H_ZNVER4, "znver4")
	X86_CPU_SUBTYPE(INTEL_COREI7_GRANITERAPIDS, "graniterapids")
	X86_CPU_SUBTYPE(INTEL_COREI7_GRANITERAPIDS_D,"graniterapids-d")
	X86_CPU_SUBTYPE(INTEL_COREI7_ARROWLAKE, "arrowlake")
	X86_CPU_SUBTYPE(INTEL_COREI7_ARROWLAKE_S, "arrowlake-s")
	X86_CPU_SUBTYPE(INTEL_COREI7_PANTHERLAKE, "pantherlake")
	+X86_CPU_SUBTYPE(AMDFAM1AH_ZNVER5, "znver5")

	// Alternate names supported by __builtin_cpu_is and target multiversioning.
	X86_CPU_SUBTYPE_ALIAS(INTEL_COREI7_ALDERLAKE, "raptorlake")
	X86_CPU_SUBTYPE_ALIAS(INTEL_COREI7_ALDERLAKE, "meteorlake")
	X86_CPU_SUBTYPE_ALIAS(INTEL_COREI7_SAPPHIRERAPIDS, "emeraldrapids")
	X86_CPU_SUBTYPE_ALIAS(INTEL_COREI7_ARROWLAKE_S,"lunarlake")
	X86_CPU_SUBTYPE_ALIAS(INTEL_COREI7_ALDERLAKE, "gracemont")

	#undef X86_CPU_SUBTYPE_ALIAS
	#undef X86_CPU_SUBTYPE

	// This macro is used for cpu types present in compiler-rt/libgcc. The third
	// parameter PRIORITY is as required by the attribute 'target' checking. Note
	// that not all are supported/prioritized by GCC, so synchronization with GCC's
	// implementation may require changing some existing values.
	//
	// We cannot just re-sort the list though because its order is dictated by the
	// order of bits in CodeGenFunction::GetX86CpuSupportsMask.
	// We cannot re-adjust the position of X86_FEATURE_COMPAT at the whole list.
	#ifndef X86_FEATURE_COMPAT
	#define X86_FEATURE_COMPAT(ENUM, STR, PRIORITY) X86_FEATURE(ENUM, STR)
	#endif

	#ifndef X86_FEATURE
	#define X86_FEATURE(ENUM, STR)
	#endif

	#ifndef X86_MICROARCH_LEVEL
	#define X86_MICROARCH_LEVEL(ENUM, STR, PRIORITY)
	#endif

	X86_FEATURE_COMPAT(CMOV, "cmov", 0)
	X86_FEATURE_COMPAT(MMX, "mmx", 1)
	X86_FEATURE_COMPAT(POPCNT, "popcnt", 9)
	X86_FEATURE_COMPAT(SSE, "sse", 2)
	X86_FEATURE_COMPAT(SSE2, "sse2", 3)
	X86_FEATURE_COMPAT(SSE3, "sse3", 4)
	X86_FEATURE_COMPAT(SSSE3, "ssse3", 5)
	X86_FEATURE_COMPAT(SSE4_1, "sse4.1", 7)
	X86_FEATURE_COMPAT(SSE4_2, "sse4.2", 8)
	X86_FEATURE_COMPAT(AVX, "avx", 12)
	X86_FEATURE_COMPAT(AVX2, "avx2", 18)
	X86_FEATURE_COMPAT(SSE4_A, "sse4a", 6)
	X86_FEATURE_COMPAT(FMA4, "fma4", 14)
	X86_FEATURE_COMPAT(XOP, "xop", 15)
	X86_FEATURE_COMPAT(FMA, "fma", 16)
	X86_FEATURE_COMPAT(AVX512F, "avx512f", 19)
	X86_FEATURE_COMPAT(BMI, "bmi", 13)
	X86_FEATURE_COMPAT(BMI2, "bmi2", 17)
	X86_FEATURE_COMPAT(AES, "aes", 10)
	X86_FEATURE_COMPAT(PCLMUL, "pclmul", 11)
	X86_FEATURE_COMPAT(AVX512VL, "avx512vl", 20)
	X86_FEATURE_COMPAT(AVX512BW, "avx512bw", 21)
	X86_FEATURE_COMPAT(AVX512DQ, "avx512dq", 22)
	X86_FEATURE_COMPAT(AVX512CD, "avx512cd", 23)
	X86_FEATURE (NF, "nf")
	X86_FEATURE (CF, "cf")
	X86_FEATURE_COMPAT(AVX512VBMI, "avx512vbmi", 24)
	X86_FEATURE_COMPAT(AVX512IFMA, "avx512ifma", 25)
	X86_FEATURE_COMPAT(AVX5124VNNIW, "avx5124vnniw", 26)
	X86_FEATURE_COMPAT(AVX5124FMAPS, "avx5124fmaps", 27)
	X86_FEATURE_COMPAT(AVX512VPOPCNTDQ, "avx512vpopcntdq", 28)
	X86_FEATURE_COMPAT(AVX512VBMI2, "avx512vbmi2", 29)
	X86_FEATURE_COMPAT(GFNI, "gfni", 30)
	X86_FEATURE_COMPAT(VPCLMULQDQ, "vpclmulqdq", 31)
	X86_FEATURE_COMPAT(AVX512VNNI, "avx512vnni", 32)
	X86_FEATURE_COMPAT(AVX512BITALG, "avx512bitalg", 33)
	X86_FEATURE_COMPAT(AVX512BF16, "avx512bf16", 34)
	X86_FEATURE_COMPAT(AVX512VP2INTERSECT, "avx512vp2intersect", 35)
	// Below Features has some missings comparing to gcc, it's because gcc has some
	// not one-to-one mapped in llvm.

	// FIXME: dummy features were added to keep the numeric values of later features
	// stable. Since the values need to be ABI stable, they should be changed to
	// have explicitly assigned values, and then these dummy features removed.
	X86_FEATURE (DUMMYFEATURE1, "__dummyfeature1")
	X86_FEATURE (DUMMYFEATURE2, "__dummyfeature2")
	X86_FEATURE_COMPAT(ADX, "adx", 0)
	X86_FEATURE (64BIT, "64bit")
	X86_FEATURE_COMPAT(CLDEMOTE, "cldemote", 0)
	X86_FEATURE_COMPAT(CLFLUSHOPT, "clflushopt", 0)
	X86_FEATURE_COMPAT(CLWB, "clwb", 0)
	X86_FEATURE_COMPAT(CLZERO, "clzero", 0)
	X86_FEATURE_COMPAT(CMPXCHG16B, "cx16", 0)
	X86_FEATURE (CMPXCHG8B, "cx8")
	X86_FEATURE_COMPAT(ENQCMD, "enqcmd", 0)
	X86_FEATURE_COMPAT(F16C, "f16c", 0)
	X86_FEATURE_COMPAT(FSGSBASE, "fsgsbase", 0)
	X86_FEATURE (CRC32, "crc32")
	X86_FEATURE (INVPCID, "invpcid")
	X86_FEATURE (RDPRU, "rdpru")
	X86_FEATURE (SAHF, "sahf")
	X86_FEATURE (VZEROUPPER, "vzeroupper")
	X86_FEATURE_COMPAT(LWP, "lwp", 0)
	X86_FEATURE_COMPAT(LZCNT, "lzcnt", 0)
	X86_FEATURE_COMPAT(MOVBE, "movbe", 0)
	X86_FEATURE_COMPAT(MOVDIR64B, "movdir64b", 0)
	X86_FEATURE_COMPAT(MOVDIRI, "movdiri", 0)
	X86_FEATURE_COMPAT(MWAITX, "mwaitx", 0)
	X86_FEATURE (X87, "x87")
	X86_FEATURE_COMPAT(PCONFIG, "pconfig", 0)
	X86_FEATURE_COMPAT(PKU, "pku", 0)
	X86_FEATURE (EVEX512, "evex512")
	X86_FEATURE_COMPAT(PRFCHW, "prfchw", 0)
	X86_FEATURE_COMPAT(PTWRITE, "ptwrite", 0)
	X86_FEATURE_COMPAT(RDPID, "rdpid", 0)
	X86_FEATURE_COMPAT(RDRND, "rdrnd", 0)
	X86_FEATURE_COMPAT(RDSEED, "rdseed", 0)
	X86_FEATURE_COMPAT(RTM, "rtm", 0)
	X86_FEATURE_COMPAT(SERIALIZE, "serialize", 0)
	X86_FEATURE_COMPAT(SGX, "sgx", 0)
	X86_FEATURE_COMPAT(SHA, "sha", 0)
	X86_FEATURE_COMPAT(SHSTK, "shstk", 0)
	X86_FEATURE_COMPAT(TBM, "tbm", 0)
	X86_FEATURE_COMPAT(TSXLDTRK, "tsxldtrk", 0)
	X86_FEATURE_COMPAT(VAES, "vaes", 0)
	X86_FEATURE_COMPAT(WAITPKG, "waitpkg", 0)
	X86_FEATURE_COMPAT(WBNOINVD, "wbnoinvd", 0)
	X86_FEATURE_COMPAT(XSAVE, "xsave", 0)
	X86_FEATURE_COMPAT(XSAVEC, "xsavec", 0)
	X86_FEATURE_COMPAT(XSAVEOPT, "xsaveopt", 0)
	X86_FEATURE_COMPAT(XSAVES, "xsaves", 0)
	X86_FEATURE_COMPAT(AMX_TILE, "amx-tile", 0)
	X86_FEATURE_COMPAT(AMX_INT8, "amx-int8", 0)
	X86_FEATURE_COMPAT(AMX_BF16, "amx-bf16", 0)
	X86_FEATURE_COMPAT(UINTR, "uintr", 0)
	X86_FEATURE_COMPAT(HRESET, "hreset", 0)
	X86_FEATURE_COMPAT(KL, "kl", 0)
	X86_FEATURE (FXSR, "fxsr")
	X86_FEATURE_COMPAT(WIDEKL, "widekl", 0)
	X86_FEATURE_COMPAT(AVXVNNI, "avxvnni", 0)
	X86_FEATURE_COMPAT(AVX512FP16, "avx512fp16", 0)
	X86_FEATURE (CCMP, "ccmp")
	X86_FEATURE (Push2Pop2, "push2pop2")
	X86_FEATURE (PPX, "ppx")
	X86_FEATURE (NDD, "ndd")
	X86_FEATURE_COMPAT(AVXIFMA, "avxifma", 0)
	X86_FEATURE_COMPAT(AVXVNNIINT8, "avxvnniint8", 0)
	X86_FEATURE_COMPAT(AVXNECONVERT, "avxneconvert", 0)
	X86_FEATURE_COMPAT(CMPCCXADD, "cmpccxadd", 0)
	X86_FEATURE_COMPAT(AMX_FP16, "amx-fp16", 0)
	X86_FEATURE_COMPAT(PREFETCHI, "prefetchi", 0)
	X86_FEATURE_COMPAT(RAOINT, "raoint", 0)
	X86_FEATURE_COMPAT(AMX_COMPLEX, "amx-complex", 0)
	X86_FEATURE_COMPAT(AVXVNNIINT16, "avxvnniint16", 0)
	X86_FEATURE_COMPAT(SM3, "sm3", 0)
	X86_FEATURE_COMPAT(SHA512, "sha512", 0)
	X86_FEATURE_COMPAT(SM4, "sm4", 0)
	X86_FEATURE (EGPR, "egpr")
	X86_FEATURE_COMPAT(USERMSR, "usermsr", 0)
	X86_FEATURE_COMPAT(AVX10_1, "avx10.1-256", 36)
	X86_FEATURE_COMPAT(AVX10_1_512, "avx10.1-512", 37)
	X86_FEATURE (ZU, "zu")
	// These features aren't really CPU features, but the frontend can set them.
	X86_FEATURE (RETPOLINE_EXTERNAL_THUNK, "retpoline-external-thunk")
	X86_FEATURE (RETPOLINE_INDIRECT_BRANCHES, "retpoline-indirect-branches")
	X86_FEATURE (RETPOLINE_INDIRECT_CALLS, "retpoline-indirect-calls")
	X86_FEATURE (LVI_CFI, "lvi-cfi")
	X86_FEATURE (LVI_LOAD_HARDENING, "lvi-load-hardening")

	X86_MICROARCH_LEVEL(X86_64_BASELINE,"x86-64", 95)
	X86_MICROARCH_LEVEL(X86_64_V2, "x86-64-v2", 96)
	X86_MICROARCH_LEVEL(X86_64_V3, "x86-64-v3", 97)
	X86_MICROARCH_LEVEL(X86_64_V4, "x86-64-v4", 98)
	X86_MICROARCH_LEVEL(APXF, "apxf", 111)
	#undef X86_FEATURE_COMPAT
	#undef X86_FEATURE
	#undef X86_MICROARCH_LEVEL
	diff --git a/contrib/llvm-project/llvm/include/llvm/TargetParser/X86TargetParser.h b/contrib/llvm-project/llvm/include/llvm/TargetParser/X86TargetParser.h
	index 2083e585af4a..5468aaa81edb 100644
	--- a/contrib/llvm-project/llvm/include/llvm/TargetParser/X86TargetParser.h
	+++ b/contrib/llvm-project/llvm/include/llvm/TargetParser/X86TargetParser.h
	@@ -1,186 +1,187 @@
	//===-- X86TargetParser - Parser for X86 features ---------------- C++ --===//
	//
	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
	// See https://llvm.org/LICENSE.txt for license information.
	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
	//
	//===----------------------------------------------------------------------===//
	//
	// This file implements a target parser to recognise X86 hardware features.
	//
	//===----------------------------------------------------------------------===//

	#ifndef LLVM_TARGETPARSER_X86TARGETPARSER_H
	#define LLVM_TARGETPARSER_X86TARGETPARSER_H

	#include "llvm/ADT/ArrayRef.h"
	#include "llvm/ADT/StringMap.h"
	#include <array>

	namespace llvm {
	template <typename T> class SmallVectorImpl;
	class StringRef;

	namespace X86 {

	// This should be kept in sync with libcc/compiler-rt as its included by clang
	// as a proxy for what's in libgcc/compiler-rt.
	enum ProcessorVendors : unsigned {
	VENDOR_DUMMY,
	#define X86_VENDOR(ENUM, STRING) \
	ENUM,
	#include "llvm/TargetParser/X86TargetParser.def"
	VENDOR_OTHER
	};

	// This should be kept in sync with libcc/compiler-rt as its included by clang
	// as a proxy for what's in libgcc/compiler-rt.
	enum ProcessorTypes : unsigned {
	CPU_TYPE_DUMMY,
	#define X86_CPU_TYPE(ENUM, STRING) \
	ENUM,
	#include "llvm/TargetParser/X86TargetParser.def"
	CPU_TYPE_MAX
	};

	// This should be kept in sync with libcc/compiler-rt as its included by clang
	// as a proxy for what's in libgcc/compiler-rt.
	enum ProcessorSubtypes : unsigned {
	CPU_SUBTYPE_DUMMY,
	#define X86_CPU_SUBTYPE(ENUM, STRING) \
	ENUM,
	#include "llvm/TargetParser/X86TargetParser.def"
	CPU_SUBTYPE_MAX
	};

	// This should be kept in sync with libcc/compiler-rt as it should be used
	// by clang as a proxy for what's in libgcc/compiler-rt.
	enum ProcessorFeatures {
	#define X86_FEATURE(ENUM, STRING) FEATURE_##ENUM,
	#include "llvm/TargetParser/X86TargetParser.def"
	CPU_FEATURE_MAX,

	#define X86_MICROARCH_LEVEL(ENUM, STRING, PRIORITY) FEATURE_##ENUM = PRIORITY,
	#include "llvm/TargetParser/X86TargetParser.def"
	};

	enum CPUKind {
	CK_None,
	CK_i386,
	CK_i486,
	CK_WinChipC6,
	CK_WinChip2,
	CK_C3,
	CK_i586,
	CK_Pentium,
	CK_PentiumMMX,
	CK_PentiumPro,
	CK_i686,
	CK_Pentium2,
	CK_Pentium3,
	CK_PentiumM,
	CK_C3_2,
	CK_Yonah,
	CK_Pentium4,
	CK_Prescott,
	CK_Nocona,
	CK_Core2,
	CK_Penryn,
	CK_Bonnell,
	CK_Silvermont,
	CK_Goldmont,
	CK_GoldmontPlus,
	CK_Tremont,
	CK_Gracemont,
	CK_Nehalem,
	CK_Westmere,
	CK_SandyBridge,
	CK_IvyBridge,
	CK_Haswell,
	CK_Broadwell,
	CK_SkylakeClient,
	CK_SkylakeServer,
	CK_Cascadelake,
	CK_Cooperlake,
	CK_Cannonlake,
	CK_IcelakeClient,
	CK_Rocketlake,
	CK_IcelakeServer,
	CK_Tigerlake,
	CK_SapphireRapids,
	CK_Alderlake,
	CK_Raptorlake,
	CK_Meteorlake,
	CK_Arrowlake,
	CK_ArrowlakeS,
	CK_Lunarlake,
	CK_Pantherlake,
	CK_Sierraforest,
	CK_Grandridge,
	CK_Graniterapids,
	CK_GraniterapidsD,
	CK_Emeraldrapids,
	CK_Clearwaterforest,
	CK_KNL,
	CK_KNM,
	CK_Lakemont,
	CK_K6,
	CK_K6_2,
	CK_K6_3,
	CK_Athlon,
	CK_AthlonXP,
	CK_K8,
	CK_K8SSE3,
	CK_AMDFAM10,
	CK_BTVER1,
	CK_BTVER2,
	CK_BDVER1,
	CK_BDVER2,
	CK_BDVER3,
	CK_BDVER4,
	CK_ZNVER1,
	CK_ZNVER2,
	CK_ZNVER3,
	CK_ZNVER4,
	CK_x86_64,
	CK_x86_64_v2,
	CK_x86_64_v3,
	CK_x86_64_v4,
	CK_Geode,
	+ CK_ZNVER5,
	};

	/// Parse \p CPU string into a CPUKind. Will only accept 64-bit capable CPUs if
	/// \p Only64Bit is true.
	CPUKind parseArchX86(StringRef CPU, bool Only64Bit = false);
	CPUKind parseTuneCPU(StringRef CPU, bool Only64Bit = false);

	/// Provide a list of valid CPU names. If \p Only64Bit is true, the list will
	/// only contain 64-bit capable CPUs.
	void fillValidCPUArchList(SmallVectorImpl<StringRef> &Values,
	bool Only64Bit = false);
	/// Provide a list of valid -mtune names.
	void fillValidTuneCPUList(SmallVectorImpl<StringRef> &Values,
	bool Only64Bit = false);

	/// Get the key feature prioritizing target multiversioning.
	ProcessorFeatures getKeyFeature(CPUKind Kind);

	/// Fill in the features that \p CPU supports into \p Features.
	/// "+" will be append in front of each feature if NeedPlus is true.
	void getFeaturesForCPU(StringRef CPU, SmallVectorImpl<StringRef> &Features,
	bool NeedPlus = false);

	/// Set or clear entries in \p Features that are implied to be enabled/disabled
	/// by the provided \p Feature.
	void updateImpliedFeatures(StringRef Feature, bool Enabled,
	StringMap<bool> &Features);

	char getCPUDispatchMangling(StringRef Name);
	bool validateCPUSpecificCPUDispatch(StringRef Name);
	std::array<uint32_t, 4> getCpuSupportsMask(ArrayRef<StringRef> FeatureStrs);
	unsigned getFeaturePriority(ProcessorFeatures Feat);

	} // namespace X86
	} // namespace llvm

	#endif
	diff --git a/contrib/llvm-project/llvm/lib/CodeGen/ModuloSchedule.cpp b/contrib/llvm-project/llvm/lib/CodeGen/ModuloSchedule.cpp
	index 0f29ebe3ee79..b1a2bfaf7895 100644
	--- a/contrib/llvm-project/llvm/lib/CodeGen/ModuloSchedule.cpp
	+++ b/contrib/llvm-project/llvm/lib/CodeGen/ModuloSchedule.cpp
	@@ -1,2849 +1,2852 @@
	//===- ModuloSchedule.cpp - Software pipeline schedule expansion ----------===//
	//
	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
	// See https://llvm.org/LICENSE.txt for license information.
	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
	//
	//===----------------------------------------------------------------------===//

	#include "llvm/CodeGen/ModuloSchedule.h"
	#include "llvm/ADT/StringExtras.h"
	#include "llvm/Analysis/MemoryLocation.h"
	#include "llvm/CodeGen/LiveIntervals.h"
	#include "llvm/CodeGen/MachineInstrBuilder.h"
	#include "llvm/CodeGen/MachineLoopInfo.h"
	#include "llvm/CodeGen/MachineRegisterInfo.h"
	#include "llvm/InitializePasses.h"
	#include "llvm/MC/MCContext.h"
	#include "llvm/Support/Debug.h"
	#include "llvm/Support/ErrorHandling.h"
	#include "llvm/Support/raw_ostream.h"

	#define DEBUG_TYPE "pipeliner"
	using namespace llvm;

	static cl::opt<bool> SwapBranchTargetsMVE(
	"pipeliner-swap-branch-targets-mve", cl::Hidden, cl::init(false),
	cl::desc("Swap target blocks of a conditional branch for MVE expander"));

	void ModuloSchedule::print(raw_ostream &OS) {
	for (MachineInstr *MI : ScheduledInstrs)
	OS << "[stage " << getStage(MI) << " @" << getCycle(MI) << "c] " << *MI;
	}

	//===----------------------------------------------------------------------===//
	// ModuloScheduleExpander implementation
	//===----------------------------------------------------------------------===//

	/// Return the register values for the operands of a Phi instruction.
	/// This function assume the instruction is a Phi.
	static void getPhiRegs(MachineInstr &Phi, MachineBasicBlock *Loop,
	unsigned &InitVal, unsigned &LoopVal) {
	assert(Phi.isPHI() && "Expecting a Phi.");

	InitVal = 0;
	LoopVal = 0;
	for (unsigned i = 1, e = Phi.getNumOperands(); i != e; i += 2)
	if (Phi.getOperand(i + 1).getMBB() != Loop)
	InitVal = Phi.getOperand(i).getReg();
	else
	LoopVal = Phi.getOperand(i).getReg();

	assert(InitVal != 0 && LoopVal != 0 && "Unexpected Phi structure.");
	}

	/// Return the Phi register value that comes from the incoming block.
	static unsigned getInitPhiReg(MachineInstr &Phi, MachineBasicBlock *LoopBB) {
	for (unsigned i = 1, e = Phi.getNumOperands(); i != e; i += 2)
	if (Phi.getOperand(i + 1).getMBB() != LoopBB)
	return Phi.getOperand(i).getReg();
	return 0;
	}

	/// Return the Phi register value that comes the loop block.
	static unsigned getLoopPhiReg(MachineInstr &Phi, MachineBasicBlock *LoopBB) {
	for (unsigned i = 1, e = Phi.getNumOperands(); i != e; i += 2)
	if (Phi.getOperand(i + 1).getMBB() == LoopBB)
	return Phi.getOperand(i).getReg();
	return 0;
	}

	void ModuloScheduleExpander::expand() {
	BB = Schedule.getLoop()->getTopBlock();
	Preheader = *BB->pred_begin();
	if (Preheader == BB)
	Preheader = *std::next(BB->pred_begin());

	// Iterate over the definitions in each instruction, and compute the
	// stage difference for each use. Keep the maximum value.
	for (MachineInstr *MI : Schedule.getInstructions()) {
	int DefStage = Schedule.getStage(MI);
	for (const MachineOperand &Op : MI->all_defs()) {
	Register Reg = Op.getReg();
	unsigned MaxDiff = 0;
	bool PhiIsSwapped = false;
	for (MachineOperand &UseOp : MRI.use_operands(Reg)) {
	MachineInstr *UseMI = UseOp.getParent();
	int UseStage = Schedule.getStage(UseMI);
	unsigned Diff = 0;
	if (UseStage != -1 && UseStage >= DefStage)
	Diff = UseStage - DefStage;
	if (MI->isPHI()) {
	if (isLoopCarried(*MI))
	++Diff;
	else
	PhiIsSwapped = true;
	}
	MaxDiff = std::max(Diff, MaxDiff);
	}
	RegToStageDiff[Reg] = std::make_pair(MaxDiff, PhiIsSwapped);
	}
	}

	generatePipelinedLoop();
	}

	void ModuloScheduleExpander::generatePipelinedLoop() {
	LoopInfo = TII->analyzeLoopForPipelining(BB);
	assert(LoopInfo && "Must be able to analyze loop!");

	// Create a new basic block for the kernel and add it to the CFG.
	MachineBasicBlock *KernelBB = MF.CreateMachineBasicBlock(BB->getBasicBlock());

	unsigned MaxStageCount = Schedule.getNumStages() - 1;

	// Remember the registers that are used in different stages. The index is
	// the iteration, or stage, that the instruction is scheduled in. This is
	// a map between register names in the original block and the names created
	// in each stage of the pipelined loop.
	ValueMapTy VRMap = new ValueMapTy[(MaxStageCount + 1) 2];

	// The renaming destination by Phis for the registers across stages.
	// This map is updated during Phis generation to point to the most recent
	// renaming destination.
	ValueMapTy VRMapPhi = new ValueMapTy[(MaxStageCount + 1) 2];

	InstrMapTy InstrMap;

	SmallVector<MachineBasicBlock *, 4> PrologBBs;

	// Generate the prolog instructions that set up the pipeline.
	generateProlog(MaxStageCount, KernelBB, VRMap, PrologBBs);
	MF.insert(BB->getIterator(), KernelBB);
	+ LIS.insertMBBInMaps(KernelBB);

	// Rearrange the instructions to generate the new, pipelined loop,
	// and update register names as needed.
	for (MachineInstr *CI : Schedule.getInstructions()) {
	if (CI->isPHI())
	continue;
	unsigned StageNum = Schedule.getStage(CI);
	MachineInstr *NewMI = cloneInstr(CI, MaxStageCount, StageNum);
	updateInstruction(NewMI, false, MaxStageCount, StageNum, VRMap);
	KernelBB->push_back(NewMI);
	InstrMap[NewMI] = CI;
	}

	// Copy any terminator instructions to the new kernel, and update
	// names as needed.
	for (MachineInstr &MI : BB->terminators()) {
	MachineInstr *NewMI = MF.CloneMachineInstr(&MI);
	updateInstruction(NewMI, false, MaxStageCount, 0, VRMap);
	KernelBB->push_back(NewMI);
	InstrMap[NewMI] = &MI;
	}

	NewKernel = KernelBB;
	KernelBB->transferSuccessors(BB);
	KernelBB->replaceSuccessor(BB, KernelBB);

	generateExistingPhis(KernelBB, PrologBBs.back(), KernelBB, KernelBB, VRMap,
	InstrMap, MaxStageCount, MaxStageCount, false);
	generatePhis(KernelBB, PrologBBs.back(), KernelBB, KernelBB, VRMap, VRMapPhi,
	InstrMap, MaxStageCount, MaxStageCount, false);

	LLVM_DEBUG(dbgs() << "New block\n"; KernelBB->dump(););

	SmallVector<MachineBasicBlock *, 4> EpilogBBs;
	// Generate the epilog instructions to complete the pipeline.
	generateEpilog(MaxStageCount, KernelBB, BB, VRMap, VRMapPhi, EpilogBBs,
	PrologBBs);

	// We need this step because the register allocation doesn't handle some
	// situations well, so we insert copies to help out.
	splitLifetimes(KernelBB, EpilogBBs);

	// Remove dead instructions due to loop induction variables.
	removeDeadInstructions(KernelBB, EpilogBBs);

	// Add branches between prolog and epilog blocks.
	addBranches(*Preheader, PrologBBs, KernelBB, EpilogBBs, VRMap);

	delete[] VRMap;
	delete[] VRMapPhi;
	}

	void ModuloScheduleExpander::cleanup() {
	// Remove the original loop since it's no longer referenced.
	for (auto &I : *BB)
	LIS.RemoveMachineInstrFromMaps(I);
	BB->clear();
	BB->eraseFromParent();
	}

	/// Generate the pipeline prolog code.
	void ModuloScheduleExpander::generateProlog(unsigned LastStage,
	MachineBasicBlock *KernelBB,
	ValueMapTy *VRMap,
	MBBVectorTy &PrologBBs) {
	MachineBasicBlock *PredBB = Preheader;
	InstrMapTy InstrMap;

	// Generate a basic block for each stage, not including the last stage,
	// which will be generated in the kernel. Each basic block may contain
	// instructions from multiple stages/iterations.
	for (unsigned i = 0; i < LastStage; ++i) {
	// Create and insert the prolog basic block prior to the original loop
	// basic block. The original loop is removed later.
	MachineBasicBlock *NewBB = MF.CreateMachineBasicBlock(BB->getBasicBlock());
	PrologBBs.push_back(NewBB);
	MF.insert(BB->getIterator(), NewBB);
	NewBB->transferSuccessors(PredBB);
	PredBB->addSuccessor(NewBB);
	PredBB = NewBB;
	+ LIS.insertMBBInMaps(NewBB);

	// Generate instructions for each appropriate stage. Process instructions
	// in original program order.
	for (int StageNum = i; StageNum >= 0; --StageNum) {
	for (MachineBasicBlock::iterator BBI = BB->instr_begin(),
	BBE = BB->getFirstTerminator();
	BBI != BBE; ++BBI) {
	if (Schedule.getStage(&*BBI) == StageNum) {
	if (BBI->isPHI())
	continue;
	MachineInstr *NewMI =
	cloneAndChangeInstr(&*BBI, i, (unsigned)StageNum);
	updateInstruction(NewMI, false, i, (unsigned)StageNum, VRMap);
	NewBB->push_back(NewMI);
	InstrMap[NewMI] = &*BBI;
	}
	}
	}
	rewritePhiValues(NewBB, i, VRMap, InstrMap);
	LLVM_DEBUG({
	dbgs() << "prolog:\n";
	NewBB->dump();
	});
	}

	PredBB->replaceSuccessor(BB, KernelBB);

	// Check if we need to remove the branch from the preheader to the original
	// loop, and replace it with a branch to the new loop.
	unsigned numBranches = TII->removeBranch(*Preheader);
	if (numBranches) {
	SmallVector<MachineOperand, 0> Cond;
	TII->insertBranch(*Preheader, PrologBBs[0], nullptr, Cond, DebugLoc());
	}
	}

	/// Generate the pipeline epilog code. The epilog code finishes the iterations
	/// that were started in either the prolog or the kernel. We create a basic
	/// block for each stage that needs to complete.
	void ModuloScheduleExpander::generateEpilog(
	unsigned LastStage, MachineBasicBlock KernelBB, MachineBasicBlock OrigBB,
	ValueMapTy VRMap, ValueMapTy VRMapPhi, MBBVectorTy &EpilogBBs,
	MBBVectorTy &PrologBBs) {
	// We need to change the branch from the kernel to the first epilog block, so
	// this call to analyze branch uses the kernel rather than the original BB.
	MachineBasicBlock TBB = nullptr, FBB = nullptr;
	SmallVector<MachineOperand, 4> Cond;
	bool checkBranch = TII->analyzeBranch(*KernelBB, TBB, FBB, Cond);
	assert(!checkBranch && "generateEpilog must be able to analyze the branch");
	if (checkBranch)
	return;

	MachineBasicBlock::succ_iterator LoopExitI = KernelBB->succ_begin();
	if (*LoopExitI == KernelBB)
	++LoopExitI;
	assert(LoopExitI != KernelBB->succ_end() && "Expecting a successor");
	MachineBasicBlock LoopExitBB = LoopExitI;

	MachineBasicBlock *PredBB = KernelBB;
	MachineBasicBlock *EpilogStart = LoopExitBB;
	InstrMapTy InstrMap;

	// Generate a basic block for each stage, not including the last stage,
	// which was generated for the kernel. Each basic block may contain
	// instructions from multiple stages/iterations.
	int EpilogStage = LastStage + 1;
	for (unsigned i = LastStage; i >= 1; --i, ++EpilogStage) {
	MachineBasicBlock *NewBB = MF.CreateMachineBasicBlock();
	EpilogBBs.push_back(NewBB);
	MF.insert(BB->getIterator(), NewBB);

	PredBB->replaceSuccessor(LoopExitBB, NewBB);
	NewBB->addSuccessor(LoopExitBB);
	+ LIS.insertMBBInMaps(NewBB);

	if (EpilogStart == LoopExitBB)
	EpilogStart = NewBB;

	// Add instructions to the epilog depending on the current block.
	// Process instructions in original program order.
	for (unsigned StageNum = i; StageNum <= LastStage; ++StageNum) {
	for (auto &BBI : *BB) {
	if (BBI.isPHI())
	continue;
	MachineInstr *In = &BBI;
	if ((unsigned)Schedule.getStage(In) == StageNum) {
	// Instructions with memoperands in the epilog are updated with
	// conservative values.
	MachineInstr *NewMI = cloneInstr(In, UINT_MAX, 0);
	updateInstruction(NewMI, i == 1, EpilogStage, 0, VRMap);
	NewBB->push_back(NewMI);
	InstrMap[NewMI] = In;
	}
	}
	}
	generateExistingPhis(NewBB, PrologBBs[i - 1], PredBB, KernelBB, VRMap,
	InstrMap, LastStage, EpilogStage, i == 1);
	generatePhis(NewBB, PrologBBs[i - 1], PredBB, KernelBB, VRMap, VRMapPhi,
	InstrMap, LastStage, EpilogStage, i == 1);
	PredBB = NewBB;

	LLVM_DEBUG({
	dbgs() << "epilog:\n";
	NewBB->dump();
	});
	}

	// Fix any Phi nodes in the loop exit block.
	LoopExitBB->replacePhiUsesWith(BB, PredBB);

	// Create a branch to the new epilog from the kernel.
	// Remove the original branch and add a new branch to the epilog.
	TII->removeBranch(*KernelBB);
	assert((OrigBB == TBB \|\| OrigBB == FBB) &&
	"Unable to determine looping branch direction");
	if (OrigBB != TBB)
	TII->insertBranch(*KernelBB, EpilogStart, KernelBB, Cond, DebugLoc());
	else
	TII->insertBranch(*KernelBB, KernelBB, EpilogStart, Cond, DebugLoc());
	// Add a branch to the loop exit.
	if (EpilogBBs.size() > 0) {
	MachineBasicBlock *LastEpilogBB = EpilogBBs.back();
	SmallVector<MachineOperand, 4> Cond1;
	TII->insertBranch(*LastEpilogBB, LoopExitBB, nullptr, Cond1, DebugLoc());
	}
	}

	/// Replace all uses of FromReg that appear outside the specified
	/// basic block with ToReg.
	static void replaceRegUsesAfterLoop(unsigned FromReg, unsigned ToReg,
	MachineBasicBlock *MBB,
	MachineRegisterInfo &MRI,
	LiveIntervals &LIS) {
	for (MachineOperand &O :
	llvm::make_early_inc_range(MRI.use_operands(FromReg)))
	if (O.getParent()->getParent() != MBB)
	O.setReg(ToReg);
	if (!LIS.hasInterval(ToReg))
	LIS.createEmptyInterval(ToReg);
	}

	/// Return true if the register has a use that occurs outside the
	/// specified loop.
	static bool hasUseAfterLoop(unsigned Reg, MachineBasicBlock *BB,
	MachineRegisterInfo &MRI) {
	for (const MachineOperand &MO : MRI.use_operands(Reg))
	if (MO.getParent()->getParent() != BB)
	return true;
	return false;
	}

	/// Generate Phis for the specific block in the generated pipelined code.
	/// This function looks at the Phis from the original code to guide the
	/// creation of new Phis.
	void ModuloScheduleExpander::generateExistingPhis(
	MachineBasicBlock NewBB, MachineBasicBlock BB1, MachineBasicBlock *BB2,
	MachineBasicBlock KernelBB, ValueMapTy VRMap, InstrMapTy &InstrMap,
	unsigned LastStageNum, unsigned CurStageNum, bool IsLast) {
	// Compute the stage number for the initial value of the Phi, which
	// comes from the prolog. The prolog to use depends on to which kernel/
	// epilog that we're adding the Phi.
	unsigned PrologStage = 0;
	unsigned PrevStage = 0;
	bool InKernel = (LastStageNum == CurStageNum);
	if (InKernel) {
	PrologStage = LastStageNum - 1;
	PrevStage = CurStageNum;
	} else {
	PrologStage = LastStageNum - (CurStageNum - LastStageNum);
	PrevStage = LastStageNum + (CurStageNum - LastStageNum) - 1;
	}

	for (MachineBasicBlock::iterator BBI = BB->instr_begin(),
	BBE = BB->getFirstNonPHI();
	BBI != BBE; ++BBI) {
	Register Def = BBI->getOperand(0).getReg();

	unsigned InitVal = 0;
	unsigned LoopVal = 0;
	getPhiRegs(*BBI, BB, InitVal, LoopVal);

	unsigned PhiOp1 = 0;
	// The Phi value from the loop body typically is defined in the loop, but
	// not always. So, we need to check if the value is defined in the loop.
	unsigned PhiOp2 = LoopVal;
	if (VRMap[LastStageNum].count(LoopVal))
	PhiOp2 = VRMap[LastStageNum][LoopVal];

	int StageScheduled = Schedule.getStage(&*BBI);
	int LoopValStage = Schedule.getStage(MRI.getVRegDef(LoopVal));
	unsigned NumStages = getStagesForReg(Def, CurStageNum);
	if (NumStages == 0) {
	// We don't need to generate a Phi anymore, but we need to rename any uses
	// of the Phi value.
	unsigned NewReg = VRMap[PrevStage][LoopVal];
	rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, 0, &*BBI, Def,
	InitVal, NewReg);
	if (VRMap[CurStageNum].count(LoopVal))
	VRMap[CurStageNum][Def] = VRMap[CurStageNum][LoopVal];
	}
	// Adjust the number of Phis needed depending on the number of prologs left,
	// and the distance from where the Phi is first scheduled. The number of
	// Phis cannot exceed the number of prolog stages. Each stage can
	// potentially define two values.
	unsigned MaxPhis = PrologStage + 2;
	if (!InKernel && (int)PrologStage <= LoopValStage)
	MaxPhis = std::max((int)MaxPhis - (int)LoopValStage, 1);
	unsigned NumPhis = std::min(NumStages, MaxPhis);

	unsigned NewReg = 0;
	unsigned AccessStage = (LoopValStage != -1) ? LoopValStage : StageScheduled;
	// In the epilog, we may need to look back one stage to get the correct
	// Phi name, because the epilog and prolog blocks execute the same stage.
	// The correct name is from the previous block only when the Phi has
	// been completely scheduled prior to the epilog, and Phi value is not
	// needed in multiple stages.
	int StageDiff = 0;
	if (!InKernel && StageScheduled >= LoopValStage && AccessStage == 0 &&
	NumPhis == 1)
	StageDiff = 1;
	// Adjust the computations below when the phi and the loop definition
	// are scheduled in different stages.
	if (InKernel && LoopValStage != -1 && StageScheduled > LoopValStage)
	StageDiff = StageScheduled - LoopValStage;
	for (unsigned np = 0; np < NumPhis; ++np) {
	// If the Phi hasn't been scheduled, then use the initial Phi operand
	// value. Otherwise, use the scheduled version of the instruction. This
	// is a little complicated when a Phi references another Phi.
	if (np > PrologStage \|\| StageScheduled >= (int)LastStageNum)
	PhiOp1 = InitVal;
	// Check if the Phi has already been scheduled in a prolog stage.
	else if (PrologStage >= AccessStage + StageDiff + np &&
	VRMap[PrologStage - StageDiff - np].count(LoopVal) != 0)
	PhiOp1 = VRMap[PrologStage - StageDiff - np][LoopVal];
	// Check if the Phi has already been scheduled, but the loop instruction
	// is either another Phi, or doesn't occur in the loop.
	else if (PrologStage >= AccessStage + StageDiff + np) {
	// If the Phi references another Phi, we need to examine the other
	// Phi to get the correct value.
	PhiOp1 = LoopVal;
	MachineInstr *InstOp1 = MRI.getVRegDef(PhiOp1);
	int Indirects = 1;
	while (InstOp1 && InstOp1->isPHI() && InstOp1->getParent() == BB) {
	int PhiStage = Schedule.getStage(InstOp1);
	if ((int)(PrologStage - StageDiff - np) < PhiStage + Indirects)
	PhiOp1 = getInitPhiReg(*InstOp1, BB);
	else
	PhiOp1 = getLoopPhiReg(*InstOp1, BB);
	InstOp1 = MRI.getVRegDef(PhiOp1);
	int PhiOpStage = Schedule.getStage(InstOp1);
	int StageAdj = (PhiOpStage != -1 ? PhiStage - PhiOpStage : 0);
	if (PhiOpStage != -1 && PrologStage - StageAdj >= Indirects + np &&
	VRMap[PrologStage - StageAdj - Indirects - np].count(PhiOp1)) {
	PhiOp1 = VRMap[PrologStage - StageAdj - Indirects - np][PhiOp1];
	break;
	}
	++Indirects;
	}
	} else
	PhiOp1 = InitVal;
	// If this references a generated Phi in the kernel, get the Phi operand
	// from the incoming block.
	if (MachineInstr *InstOp1 = MRI.getVRegDef(PhiOp1))
	if (InstOp1->isPHI() && InstOp1->getParent() == KernelBB)
	PhiOp1 = getInitPhiReg(*InstOp1, KernelBB);

	MachineInstr *PhiInst = MRI.getVRegDef(LoopVal);
	bool LoopDefIsPhi = PhiInst && PhiInst->isPHI();
	// In the epilog, a map lookup is needed to get the value from the kernel,
	// or previous epilog block. How is does this depends on if the
	// instruction is scheduled in the previous block.
	if (!InKernel) {
	int StageDiffAdj = 0;
	if (LoopValStage != -1 && StageScheduled > LoopValStage)
	StageDiffAdj = StageScheduled - LoopValStage;
	// Use the loop value defined in the kernel, unless the kernel
	// contains the last definition of the Phi.
	if (np == 0 && PrevStage == LastStageNum &&
	(StageScheduled != 0 \|\| LoopValStage != 0) &&
	VRMap[PrevStage - StageDiffAdj].count(LoopVal))
	PhiOp2 = VRMap[PrevStage - StageDiffAdj][LoopVal];
	// Use the value defined by the Phi. We add one because we switch
	// from looking at the loop value to the Phi definition.
	else if (np > 0 && PrevStage == LastStageNum &&
	VRMap[PrevStage - np + 1].count(Def))
	PhiOp2 = VRMap[PrevStage - np + 1][Def];
	// Use the loop value defined in the kernel.
	else if (static_cast<unsigned>(LoopValStage) > PrologStage + 1 &&
	VRMap[PrevStage - StageDiffAdj - np].count(LoopVal))
	PhiOp2 = VRMap[PrevStage - StageDiffAdj - np][LoopVal];
	// Use the value defined by the Phi, unless we're generating the first
	// epilog and the Phi refers to a Phi in a different stage.
	else if (VRMap[PrevStage - np].count(Def) &&
	(!LoopDefIsPhi \|\| (PrevStage != LastStageNum) \|\|
	(LoopValStage == StageScheduled)))
	PhiOp2 = VRMap[PrevStage - np][Def];
	}

	// Check if we can reuse an existing Phi. This occurs when a Phi
	// references another Phi, and the other Phi is scheduled in an
	// earlier stage. We can try to reuse an existing Phi up until the last
	// stage of the current Phi.
	if (LoopDefIsPhi) {
	if (static_cast<int>(PrologStage - np) >= StageScheduled) {
	int LVNumStages = getStagesForPhi(LoopVal);
	int StageDiff = (StageScheduled - LoopValStage);
	LVNumStages -= StageDiff;
	// Make sure the loop value Phi has been processed already.
	if (LVNumStages > (int)np && VRMap[CurStageNum].count(LoopVal)) {
	NewReg = PhiOp2;
	unsigned ReuseStage = CurStageNum;
	if (isLoopCarried(*PhiInst))
	ReuseStage -= LVNumStages;
	// Check if the Phi to reuse has been generated yet. If not, then
	// there is nothing to reuse.
	if (VRMap[ReuseStage - np].count(LoopVal)) {
	NewReg = VRMap[ReuseStage - np][LoopVal];

	rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, np, &*BBI,
	Def, NewReg);
	// Update the map with the new Phi name.
	VRMap[CurStageNum - np][Def] = NewReg;
	PhiOp2 = NewReg;
	if (VRMap[LastStageNum - np - 1].count(LoopVal))
	PhiOp2 = VRMap[LastStageNum - np - 1][LoopVal];

	if (IsLast && np == NumPhis - 1)
	replaceRegUsesAfterLoop(Def, NewReg, BB, MRI, LIS);
	continue;
	}
	}
	}
	if (InKernel && StageDiff > 0 &&
	VRMap[CurStageNum - StageDiff - np].count(LoopVal))
	PhiOp2 = VRMap[CurStageNum - StageDiff - np][LoopVal];
	}

	const TargetRegisterClass *RC = MRI.getRegClass(Def);
	NewReg = MRI.createVirtualRegister(RC);

	MachineInstrBuilder NewPhi =
	BuildMI(*NewBB, NewBB->getFirstNonPHI(), DebugLoc(),
	TII->get(TargetOpcode::PHI), NewReg);
	NewPhi.addReg(PhiOp1).addMBB(BB1);
	NewPhi.addReg(PhiOp2).addMBB(BB2);
	if (np == 0)
	InstrMap[NewPhi] = &*BBI;

	// We define the Phis after creating the new pipelined code, so
	// we need to rename the Phi values in scheduled instructions.

	unsigned PrevReg = 0;
	if (InKernel && VRMap[PrevStage - np].count(LoopVal))
	PrevReg = VRMap[PrevStage - np][LoopVal];
	rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, np, &*BBI, Def,
	NewReg, PrevReg);
	// If the Phi has been scheduled, use the new name for rewriting.
	if (VRMap[CurStageNum - np].count(Def)) {
	unsigned R = VRMap[CurStageNum - np][Def];
	rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, np, &*BBI, R,
	NewReg);
	}

	// Check if we need to rename any uses that occurs after the loop. The
	// register to replace depends on whether the Phi is scheduled in the
	// epilog.
	if (IsLast && np == NumPhis - 1)
	replaceRegUsesAfterLoop(Def, NewReg, BB, MRI, LIS);

	// In the kernel, a dependent Phi uses the value from this Phi.
	if (InKernel)
	PhiOp2 = NewReg;

	// Update the map with the new Phi name.
	VRMap[CurStageNum - np][Def] = NewReg;
	}

	while (NumPhis++ < NumStages) {
	rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, NumPhis, &*BBI, Def,
	NewReg, 0);
	}

	// Check if we need to rename a Phi that has been eliminated due to
	// scheduling.
	if (NumStages == 0 && IsLast && VRMap[CurStageNum].count(LoopVal))
	replaceRegUsesAfterLoop(Def, VRMap[CurStageNum][LoopVal], BB, MRI, LIS);
	}
	}

	/// Generate Phis for the specified block in the generated pipelined code.
	/// These are new Phis needed because the definition is scheduled after the
	/// use in the pipelined sequence.
	void ModuloScheduleExpander::generatePhis(
	MachineBasicBlock NewBB, MachineBasicBlock BB1, MachineBasicBlock *BB2,
	MachineBasicBlock KernelBB, ValueMapTy VRMap, ValueMapTy *VRMapPhi,
	InstrMapTy &InstrMap, unsigned LastStageNum, unsigned CurStageNum,
	bool IsLast) {
	// Compute the stage number that contains the initial Phi value, and
	// the Phi from the previous stage.
	unsigned PrologStage = 0;
	unsigned PrevStage = 0;
	unsigned StageDiff = CurStageNum - LastStageNum;
	bool InKernel = (StageDiff == 0);
	if (InKernel) {
	PrologStage = LastStageNum - 1;
	PrevStage = CurStageNum;
	} else {
	PrologStage = LastStageNum - StageDiff;
	PrevStage = LastStageNum + StageDiff - 1;
	}

	for (MachineBasicBlock::iterator BBI = BB->getFirstNonPHI(),
	BBE = BB->instr_end();
	BBI != BBE; ++BBI) {
	for (unsigned i = 0, e = BBI->getNumOperands(); i != e; ++i) {
	MachineOperand &MO = BBI->getOperand(i);
	if (!MO.isReg() \|\| !MO.isDef() \|\| !MO.getReg().isVirtual())
	continue;

	int StageScheduled = Schedule.getStage(&*BBI);
	assert(StageScheduled != -1 && "Expecting scheduled instruction.");
	Register Def = MO.getReg();
	unsigned NumPhis = getStagesForReg(Def, CurStageNum);
	// An instruction scheduled in stage 0 and is used after the loop
	// requires a phi in the epilog for the last definition from either
	// the kernel or prolog.
	if (!InKernel && NumPhis == 0 && StageScheduled == 0 &&
	hasUseAfterLoop(Def, BB, MRI))
	NumPhis = 1;
	if (!InKernel && (unsigned)StageScheduled > PrologStage)
	continue;

	unsigned PhiOp2;
	if (InKernel) {
	PhiOp2 = VRMap[PrevStage][Def];
	if (MachineInstr *InstOp2 = MRI.getVRegDef(PhiOp2))
	if (InstOp2->isPHI() && InstOp2->getParent() == NewBB)
	PhiOp2 = getLoopPhiReg(*InstOp2, BB2);
	}
	// The number of Phis can't exceed the number of prolog stages. The
	// prolog stage number is zero based.
	if (NumPhis > PrologStage + 1 - StageScheduled)
	NumPhis = PrologStage + 1 - StageScheduled;
	for (unsigned np = 0; np < NumPhis; ++np) {
	// Example for
	// Org:
	// %Org = ... (Scheduled at Stage#0, NumPhi = 2)
	//
	// Prolog0 (Stage0):
	// %Clone0 = ...
	// Prolog1 (Stage1):
	// %Clone1 = ...
	// Kernel (Stage2):
	// %Phi0 = Phi %Clone1, Prolog1, %Clone2, Kernel
	// %Phi1 = Phi %Clone0, Prolog1, %Phi0, Kernel
	// %Clone2 = ...
	// Epilog0 (Stage3):
	// %Phi2 = Phi %Clone1, Prolog1, %Clone2, Kernel
	// %Phi3 = Phi %Clone0, Prolog1, %Phi0, Kernel
	// Epilog1 (Stage4):
	// %Phi4 = Phi %Clone0, Prolog0, %Phi2, Epilog0
	//
	// VRMap = {0: %Clone0, 1: %Clone1, 2: %Clone2}
	// VRMapPhi (after Kernel) = {0: %Phi1, 1: %Phi0}
	// VRMapPhi (after Epilog0) = {0: %Phi3, 1: %Phi2}

	unsigned PhiOp1 = VRMap[PrologStage][Def];
	if (np <= PrologStage)
	PhiOp1 = VRMap[PrologStage - np][Def];
	if (!InKernel) {
	if (PrevStage == LastStageNum && np == 0)
	PhiOp2 = VRMap[LastStageNum][Def];
	else
	PhiOp2 = VRMapPhi[PrevStage - np][Def];
	}

	const TargetRegisterClass *RC = MRI.getRegClass(Def);
	Register NewReg = MRI.createVirtualRegister(RC);

	MachineInstrBuilder NewPhi =
	BuildMI(*NewBB, NewBB->getFirstNonPHI(), DebugLoc(),
	TII->get(TargetOpcode::PHI), NewReg);
	NewPhi.addReg(PhiOp1).addMBB(BB1);
	NewPhi.addReg(PhiOp2).addMBB(BB2);
	if (np == 0)
	InstrMap[NewPhi] = &*BBI;

	// Rewrite uses and update the map. The actions depend upon whether
	// we generating code for the kernel or epilog blocks.
	if (InKernel) {
	rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, np, &*BBI, PhiOp1,
	NewReg);
	rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, np, &*BBI, PhiOp2,
	NewReg);

	PhiOp2 = NewReg;
	VRMapPhi[PrevStage - np - 1][Def] = NewReg;
	} else {
	VRMapPhi[CurStageNum - np][Def] = NewReg;
	if (np == NumPhis - 1)
	rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, np, &*BBI, Def,
	NewReg);
	}
	if (IsLast && np == NumPhis - 1)
	replaceRegUsesAfterLoop(Def, NewReg, BB, MRI, LIS);
	}
	}
	}
	}

	/// Remove instructions that generate values with no uses.
	/// Typically, these are induction variable operations that generate values
	/// used in the loop itself. A dead instruction has a definition with
	/// no uses, or uses that occur in the original loop only.
	void ModuloScheduleExpander::removeDeadInstructions(MachineBasicBlock *KernelBB,
	MBBVectorTy &EpilogBBs) {
	// For each epilog block, check that the value defined by each instruction
	// is used. If not, delete it.
	for (MachineBasicBlock *MBB : llvm::reverse(EpilogBBs))
	for (MachineBasicBlock::reverse_instr_iterator MI = MBB->instr_rbegin(),
	ME = MBB->instr_rend();
	MI != ME;) {
	// From DeadMachineInstructionElem. Don't delete inline assembly.
	if (MI->isInlineAsm()) {
	++MI;
	continue;
	}
	bool SawStore = false;
	// Check if it's safe to remove the instruction due to side effects.
	// We can, and want to, remove Phis here.
	if (!MI->isSafeToMove(nullptr, SawStore) && !MI->isPHI()) {
	++MI;
	continue;
	}
	bool used = true;
	for (const MachineOperand &MO : MI->all_defs()) {
	Register reg = MO.getReg();
	// Assume physical registers are used, unless they are marked dead.
	if (reg.isPhysical()) {
	used = !MO.isDead();
	if (used)
	break;
	continue;
	}
	unsigned realUses = 0;
	for (const MachineOperand &U : MRI.use_operands(reg)) {
	// Check if there are any uses that occur only in the original
	// loop. If so, that's not a real use.
	if (U.getParent()->getParent() != BB) {
	realUses++;
	used = true;
	break;
	}
	}
	if (realUses > 0)
	break;
	used = false;
	}
	if (!used) {
	LIS.RemoveMachineInstrFromMaps(*MI);
	MI++->eraseFromParent();
	continue;
	}
	++MI;
	}
	// In the kernel block, check if we can remove a Phi that generates a value
	// used in an instruction removed in the epilog block.
	for (MachineInstr &MI : llvm::make_early_inc_range(KernelBB->phis())) {
	Register reg = MI.getOperand(0).getReg();
	if (MRI.use_begin(reg) == MRI.use_end()) {
	LIS.RemoveMachineInstrFromMaps(MI);
	MI.eraseFromParent();
	}
	}
	}

	/// For loop carried definitions, we split the lifetime of a virtual register
	/// that has uses past the definition in the next iteration. A copy with a new
	/// virtual register is inserted before the definition, which helps with
	/// generating a better register assignment.
	///
	/// v1 = phi(a, v2) v1 = phi(a, v2)
	/// v2 = phi(b, v3) v2 = phi(b, v3)
	/// v3 = .. v4 = copy v1
	/// .. = V1 v3 = ..
	/// .. = v4
	void ModuloScheduleExpander::splitLifetimes(MachineBasicBlock *KernelBB,
	MBBVectorTy &EpilogBBs) {
	const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
	for (auto &PHI : KernelBB->phis()) {
	Register Def = PHI.getOperand(0).getReg();
	// Check for any Phi definition that used as an operand of another Phi
	// in the same block.
	for (MachineRegisterInfo::use_instr_iterator I = MRI.use_instr_begin(Def),
	E = MRI.use_instr_end();
	I != E; ++I) {
	if (I->isPHI() && I->getParent() == KernelBB) {
	// Get the loop carried definition.
	unsigned LCDef = getLoopPhiReg(PHI, KernelBB);
	if (!LCDef)
	continue;
	MachineInstr *MI = MRI.getVRegDef(LCDef);
	if (!MI \|\| MI->getParent() != KernelBB \|\| MI->isPHI())
	continue;
	// Search through the rest of the block looking for uses of the Phi
	// definition. If one occurs, then split the lifetime.
	unsigned SplitReg = 0;
	for (auto &BBJ : make_range(MachineBasicBlock::instr_iterator(MI),
	KernelBB->instr_end()))
	if (BBJ.readsRegister(Def, /TRI=/nullptr)) {
	// We split the lifetime when we find the first use.
	if (SplitReg == 0) {
	SplitReg = MRI.createVirtualRegister(MRI.getRegClass(Def));
	BuildMI(*KernelBB, MI, MI->getDebugLoc(),
	TII->get(TargetOpcode::COPY), SplitReg)
	.addReg(Def);
	}
	BBJ.substituteRegister(Def, SplitReg, 0, *TRI);
	}
	if (!SplitReg)
	continue;
	// Search through each of the epilog blocks for any uses to be renamed.
	for (auto &Epilog : EpilogBBs)
	for (auto &I : *Epilog)
	if (I.readsRegister(Def, /TRI=/nullptr))
	I.substituteRegister(Def, SplitReg, 0, *TRI);
	break;
	}
	}
	}
	}

	/// Remove the incoming block from the Phis in a basic block.
	static void removePhis(MachineBasicBlock BB, MachineBasicBlock Incoming) {
	for (MachineInstr &MI : *BB) {
	if (!MI.isPHI())
	break;
	for (unsigned i = 1, e = MI.getNumOperands(); i != e; i += 2)
	if (MI.getOperand(i + 1).getMBB() == Incoming) {
	MI.removeOperand(i + 1);
	MI.removeOperand(i);
	break;
	}
	}
	}

	/// Create branches from each prolog basic block to the appropriate epilog
	/// block. These edges are needed if the loop ends before reaching the
	/// kernel.
	void ModuloScheduleExpander::addBranches(MachineBasicBlock &PreheaderBB,
	MBBVectorTy &PrologBBs,
	MachineBasicBlock *KernelBB,
	MBBVectorTy &EpilogBBs,
	ValueMapTy *VRMap) {
	assert(PrologBBs.size() == EpilogBBs.size() && "Prolog/Epilog mismatch");
	MachineBasicBlock *LastPro = KernelBB;
	MachineBasicBlock *LastEpi = KernelBB;

	// Start from the blocks connected to the kernel and work "out"
	// to the first prolog and the last epilog blocks.
	SmallVector<MachineInstr *, 4> PrevInsts;
	unsigned MaxIter = PrologBBs.size() - 1;
	for (unsigned i = 0, j = MaxIter; i <= MaxIter; ++i, --j) {
	// Add branches to the prolog that go to the corresponding
	// epilog, and the fall-thru prolog/kernel block.
	MachineBasicBlock *Prolog = PrologBBs[j];
	MachineBasicBlock *Epilog = EpilogBBs[i];

	SmallVector<MachineOperand, 4> Cond;
	std::optional<bool> StaticallyGreater =
	LoopInfo->createTripCountGreaterCondition(j + 1, *Prolog, Cond);
	unsigned numAdded = 0;
	if (!StaticallyGreater) {
	Prolog->addSuccessor(Epilog);
	numAdded = TII->insertBranch(*Prolog, Epilog, LastPro, Cond, DebugLoc());
	} else if (*StaticallyGreater == false) {
	Prolog->addSuccessor(Epilog);
	Prolog->removeSuccessor(LastPro);
	LastEpi->removeSuccessor(Epilog);
	numAdded = TII->insertBranch(*Prolog, Epilog, nullptr, Cond, DebugLoc());
	removePhis(Epilog, LastEpi);
	// Remove the blocks that are no longer referenced.
	if (LastPro != LastEpi) {
	LastEpi->clear();
	LastEpi->eraseFromParent();
	}
	if (LastPro == KernelBB) {
	LoopInfo->disposed();
	NewKernel = nullptr;
	}
	LastPro->clear();
	LastPro->eraseFromParent();
	} else {
	numAdded = TII->insertBranch(*Prolog, LastPro, nullptr, Cond, DebugLoc());
	removePhis(Epilog, Prolog);
	}
	LastPro = Prolog;
	LastEpi = Epilog;
	for (MachineBasicBlock::reverse_instr_iterator I = Prolog->instr_rbegin(),
	E = Prolog->instr_rend();
	I != E && numAdded > 0; ++I, --numAdded)
	updateInstruction(&*I, false, j, 0, VRMap);
	}

	if (NewKernel) {
	LoopInfo->setPreheader(PrologBBs[MaxIter]);
	LoopInfo->adjustTripCount(-(MaxIter + 1));
	}
	}

	/// Return true if we can compute the amount the instruction changes
	/// during each iteration. Set Delta to the amount of the change.
	bool ModuloScheduleExpander::computeDelta(MachineInstr &MI, unsigned &Delta) {
	const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
	const MachineOperand *BaseOp;
	int64_t Offset;
	bool OffsetIsScalable;
	if (!TII->getMemOperandWithOffset(MI, BaseOp, Offset, OffsetIsScalable, TRI))
	return false;

	// FIXME: This algorithm assumes instructions have fixed-size offsets.
	if (OffsetIsScalable)
	return false;

	if (!BaseOp->isReg())
	return false;

	Register BaseReg = BaseOp->getReg();

	MachineRegisterInfo &MRI = MF.getRegInfo();
	// Check if there is a Phi. If so, get the definition in the loop.
	MachineInstr *BaseDef = MRI.getVRegDef(BaseReg);
	if (BaseDef && BaseDef->isPHI()) {
	BaseReg = getLoopPhiReg(*BaseDef, MI.getParent());
	BaseDef = MRI.getVRegDef(BaseReg);
	}
	if (!BaseDef)
	return false;

	int D = 0;
	if (!TII->getIncrementValue(*BaseDef, D) && D >= 0)
	return false;

	Delta = D;
	return true;
	}

	/// Update the memory operand with a new offset when the pipeliner
	/// generates a new copy of the instruction that refers to a
	/// different memory location.
	void ModuloScheduleExpander::updateMemOperands(MachineInstr &NewMI,
	MachineInstr &OldMI,
	unsigned Num) {
	if (Num == 0)
	return;
	// If the instruction has memory operands, then adjust the offset
	// when the instruction appears in different stages.
	if (NewMI.memoperands_empty())
	return;
	SmallVector<MachineMemOperand *, 2> NewMMOs;
	for (MachineMemOperand *MMO : NewMI.memoperands()) {
	// TODO: Figure out whether isAtomic is really necessary (see D57601).
	if (MMO->isVolatile() \|\| MMO->isAtomic() \|\|
	(MMO->isInvariant() && MMO->isDereferenceable()) \|\|
	(!MMO->getValue())) {
	NewMMOs.push_back(MMO);
	continue;
	}
	unsigned Delta;
	if (Num != UINT_MAX && computeDelta(OldMI, Delta)) {
	int64_t AdjOffset = Delta * Num;
	NewMMOs.push_back(
	MF.getMachineMemOperand(MMO, AdjOffset, MMO->getSize()));
	} else {
	NewMMOs.push_back(MF.getMachineMemOperand(
	MMO, 0, LocationSize::beforeOrAfterPointer()));
	}
	}
	NewMI.setMemRefs(MF, NewMMOs);
	}

	/// Clone the instruction for the new pipelined loop and update the
	/// memory operands, if needed.
	MachineInstr ModuloScheduleExpander::cloneInstr(MachineInstr OldMI,
	unsigned CurStageNum,
	unsigned InstStageNum) {
	MachineInstr *NewMI = MF.CloneMachineInstr(OldMI);
	updateMemOperands(NewMI, OldMI, CurStageNum - InstStageNum);
	return NewMI;
	}

	/// Clone the instruction for the new pipelined loop. If needed, this
	/// function updates the instruction using the values saved in the
	/// InstrChanges structure.
	MachineInstr *ModuloScheduleExpander::cloneAndChangeInstr(
	MachineInstr *OldMI, unsigned CurStageNum, unsigned InstStageNum) {
	MachineInstr *NewMI = MF.CloneMachineInstr(OldMI);
	auto It = InstrChanges.find(OldMI);
	if (It != InstrChanges.end()) {
	std::pair<unsigned, int64_t> RegAndOffset = It->second;
	unsigned BasePos, OffsetPos;
	if (!TII->getBaseAndOffsetPosition(*OldMI, BasePos, OffsetPos))
	return nullptr;
	int64_t NewOffset = OldMI->getOperand(OffsetPos).getImm();
	MachineInstr *LoopDef = findDefInLoop(RegAndOffset.first);
	if (Schedule.getStage(LoopDef) > (signed)InstStageNum)
	NewOffset += RegAndOffset.second * (CurStageNum - InstStageNum);
	NewMI->getOperand(OffsetPos).setImm(NewOffset);
	}
	updateMemOperands(NewMI, OldMI, CurStageNum - InstStageNum);
	return NewMI;
	}

	/// Update the machine instruction with new virtual registers. This
	/// function may change the definitions and/or uses.
	void ModuloScheduleExpander::updateInstruction(MachineInstr *NewMI,
	bool LastDef,
	unsigned CurStageNum,
	unsigned InstrStageNum,
	ValueMapTy *VRMap) {
	for (MachineOperand &MO : NewMI->operands()) {
	if (!MO.isReg() \|\| !MO.getReg().isVirtual())
	continue;
	Register reg = MO.getReg();
	if (MO.isDef()) {
	// Create a new virtual register for the definition.
	const TargetRegisterClass *RC = MRI.getRegClass(reg);
	Register NewReg = MRI.createVirtualRegister(RC);
	MO.setReg(NewReg);
	VRMap[CurStageNum][reg] = NewReg;
	if (LastDef)
	replaceRegUsesAfterLoop(reg, NewReg, BB, MRI, LIS);
	} else if (MO.isUse()) {
	MachineInstr *Def = MRI.getVRegDef(reg);
	// Compute the stage that contains the last definition for instruction.
	int DefStageNum = Schedule.getStage(Def);
	unsigned StageNum = CurStageNum;
	if (DefStageNum != -1 && (int)InstrStageNum > DefStageNum) {
	// Compute the difference in stages between the defintion and the use.
	unsigned StageDiff = (InstrStageNum - DefStageNum);
	// Make an adjustment to get the last definition.
	StageNum -= StageDiff;
	}
	if (VRMap[StageNum].count(reg))
	MO.setReg(VRMap[StageNum][reg]);
	}
	}
	}

	/// Return the instruction in the loop that defines the register.
	/// If the definition is a Phi, then follow the Phi operand to
	/// the instruction in the loop.
	MachineInstr *ModuloScheduleExpander::findDefInLoop(unsigned Reg) {
	SmallPtrSet<MachineInstr *, 8> Visited;
	MachineInstr *Def = MRI.getVRegDef(Reg);
	while (Def->isPHI()) {
	if (!Visited.insert(Def).second)
	break;
	for (unsigned i = 1, e = Def->getNumOperands(); i < e; i += 2)
	if (Def->getOperand(i + 1).getMBB() == BB) {
	Def = MRI.getVRegDef(Def->getOperand(i).getReg());
	break;
	}
	}
	return Def;
	}

	/// Return the new name for the value from the previous stage.
	unsigned ModuloScheduleExpander::getPrevMapVal(
	unsigned StageNum, unsigned PhiStage, unsigned LoopVal, unsigned LoopStage,
	ValueMapTy VRMap, MachineBasicBlock BB) {
	unsigned PrevVal = 0;
	if (StageNum > PhiStage) {
	MachineInstr *LoopInst = MRI.getVRegDef(LoopVal);
	if (PhiStage == LoopStage && VRMap[StageNum - 1].count(LoopVal))
	// The name is defined in the previous stage.
	PrevVal = VRMap[StageNum - 1][LoopVal];
	else if (VRMap[StageNum].count(LoopVal))
	// The previous name is defined in the current stage when the instruction
	// order is swapped.
	PrevVal = VRMap[StageNum][LoopVal];
	else if (!LoopInst->isPHI() \|\| LoopInst->getParent() != BB)
	// The loop value hasn't yet been scheduled.
	PrevVal = LoopVal;
	else if (StageNum == PhiStage + 1)
	// The loop value is another phi, which has not been scheduled.
	PrevVal = getInitPhiReg(*LoopInst, BB);
	else if (StageNum > PhiStage + 1 && LoopInst->getParent() == BB)
	// The loop value is another phi, which has been scheduled.
	PrevVal =
	getPrevMapVal(StageNum - 1, PhiStage, getLoopPhiReg(*LoopInst, BB),
	LoopStage, VRMap, BB);
	}
	return PrevVal;
	}

	/// Rewrite the Phi values in the specified block to use the mappings
	/// from the initial operand. Once the Phi is scheduled, we switch
	/// to using the loop value instead of the Phi value, so those names
	/// do not need to be rewritten.
	void ModuloScheduleExpander::rewritePhiValues(MachineBasicBlock *NewBB,
	unsigned StageNum,
	ValueMapTy *VRMap,
	InstrMapTy &InstrMap) {
	for (auto &PHI : BB->phis()) {
	unsigned InitVal = 0;
	unsigned LoopVal = 0;
	getPhiRegs(PHI, BB, InitVal, LoopVal);
	Register PhiDef = PHI.getOperand(0).getReg();

	unsigned PhiStage = (unsigned)Schedule.getStage(MRI.getVRegDef(PhiDef));
	unsigned LoopStage = (unsigned)Schedule.getStage(MRI.getVRegDef(LoopVal));
	unsigned NumPhis = getStagesForPhi(PhiDef);
	if (NumPhis > StageNum)
	NumPhis = StageNum;
	for (unsigned np = 0; np <= NumPhis; ++np) {
	unsigned NewVal =
	getPrevMapVal(StageNum - np, PhiStage, LoopVal, LoopStage, VRMap, BB);
	if (!NewVal)
	NewVal = InitVal;
	rewriteScheduledInstr(NewBB, InstrMap, StageNum - np, np, &PHI, PhiDef,
	NewVal);
	}
	}
	}

	/// Rewrite a previously scheduled instruction to use the register value
	/// from the new instruction. Make sure the instruction occurs in the
	/// basic block, and we don't change the uses in the new instruction.
	void ModuloScheduleExpander::rewriteScheduledInstr(
	MachineBasicBlock *BB, InstrMapTy &InstrMap, unsigned CurStageNum,
	unsigned PhiNum, MachineInstr *Phi, unsigned OldReg, unsigned NewReg,
	unsigned PrevReg) {
	bool InProlog = (CurStageNum < (unsigned)Schedule.getNumStages() - 1);
	int StagePhi = Schedule.getStage(Phi) + PhiNum;
	// Rewrite uses that have been scheduled already to use the new
	// Phi register.
	for (MachineOperand &UseOp :
	llvm::make_early_inc_range(MRI.use_operands(OldReg))) {
	MachineInstr *UseMI = UseOp.getParent();
	if (UseMI->getParent() != BB)
	continue;
	if (UseMI->isPHI()) {
	if (!Phi->isPHI() && UseMI->getOperand(0).getReg() == NewReg)
	continue;
	if (getLoopPhiReg(*UseMI, BB) != OldReg)
	continue;
	}
	InstrMapTy::iterator OrigInstr = InstrMap.find(UseMI);
	assert(OrigInstr != InstrMap.end() && "Instruction not scheduled.");
	MachineInstr *OrigMI = OrigInstr->second;
	int StageSched = Schedule.getStage(OrigMI);
	int CycleSched = Schedule.getCycle(OrigMI);
	unsigned ReplaceReg = 0;
	// This is the stage for the scheduled instruction.
	if (StagePhi == StageSched && Phi->isPHI()) {
	int CyclePhi = Schedule.getCycle(Phi);
	if (PrevReg && InProlog)
	ReplaceReg = PrevReg;
	else if (PrevReg && !isLoopCarried(*Phi) &&
	(CyclePhi <= CycleSched \|\| OrigMI->isPHI()))
	ReplaceReg = PrevReg;
	else
	ReplaceReg = NewReg;
	}
	// The scheduled instruction occurs before the scheduled Phi, and the
	// Phi is not loop carried.
	if (!InProlog && StagePhi + 1 == StageSched && !isLoopCarried(*Phi))
	ReplaceReg = NewReg;
	if (StagePhi > StageSched && Phi->isPHI())
	ReplaceReg = NewReg;
	if (!InProlog && !Phi->isPHI() && StagePhi < StageSched)
	ReplaceReg = NewReg;
	if (ReplaceReg) {
	const TargetRegisterClass *NRC =
	MRI.constrainRegClass(ReplaceReg, MRI.getRegClass(OldReg));
	if (NRC)
	UseOp.setReg(ReplaceReg);
	else {
	Register SplitReg = MRI.createVirtualRegister(MRI.getRegClass(OldReg));
	BuildMI(*BB, UseMI, UseMI->getDebugLoc(), TII->get(TargetOpcode::COPY),
	SplitReg)
	.addReg(ReplaceReg);
	UseOp.setReg(SplitReg);
	}
	}
	}
	}

	bool ModuloScheduleExpander::isLoopCarried(MachineInstr &Phi) {
	if (!Phi.isPHI())
	return false;
	int DefCycle = Schedule.getCycle(&Phi);
	int DefStage = Schedule.getStage(&Phi);

	unsigned InitVal = 0;
	unsigned LoopVal = 0;
	getPhiRegs(Phi, Phi.getParent(), InitVal, LoopVal);
	MachineInstr *Use = MRI.getVRegDef(LoopVal);
	if (!Use \|\| Use->isPHI())
	return true;
	int LoopCycle = Schedule.getCycle(Use);
	int LoopStage = Schedule.getStage(Use);
	return (LoopCycle > DefCycle) \|\| (LoopStage <= DefStage);
	}

	//===----------------------------------------------------------------------===//
	// PeelingModuloScheduleExpander implementation
	//===----------------------------------------------------------------------===//
	// This is a reimplementation of ModuloScheduleExpander that works by creating
	// a fully correct steady-state kernel and peeling off the prolog and epilogs.
	//===----------------------------------------------------------------------===//

	namespace {
	// Remove any dead phis in MBB. Dead phis either have only one block as input
	// (in which case they are the identity) or have no uses.
	void EliminateDeadPhis(MachineBasicBlock *MBB, MachineRegisterInfo &MRI,
	LiveIntervals *LIS, bool KeepSingleSrcPhi = false) {
	bool Changed = true;
	while (Changed) {
	Changed = false;
	for (MachineInstr &MI : llvm::make_early_inc_range(MBB->phis())) {
	assert(MI.isPHI());
	if (MRI.use_empty(MI.getOperand(0).getReg())) {
	if (LIS)
	LIS->RemoveMachineInstrFromMaps(MI);
	MI.eraseFromParent();
	Changed = true;
	} else if (!KeepSingleSrcPhi && MI.getNumExplicitOperands() == 3) {
	const TargetRegisterClass *ConstrainRegClass =
	MRI.constrainRegClass(MI.getOperand(1).getReg(),
	MRI.getRegClass(MI.getOperand(0).getReg()));
	assert(ConstrainRegClass &&
	"Expected a valid constrained register class!");
	(void)ConstrainRegClass;
	MRI.replaceRegWith(MI.getOperand(0).getReg(),
	MI.getOperand(1).getReg());
	if (LIS)
	LIS->RemoveMachineInstrFromMaps(MI);
	MI.eraseFromParent();
	Changed = true;
	}
	}
	}
	}

	/// Rewrites the kernel block in-place to adhere to the given schedule.
	/// KernelRewriter holds all of the state required to perform the rewriting.
	class KernelRewriter {
	ModuloSchedule &S;
	MachineBasicBlock *BB;
	MachineBasicBlock PreheaderBB, ExitBB;
	MachineRegisterInfo &MRI;
	const TargetInstrInfo *TII;
	LiveIntervals *LIS;

	// Map from register class to canonical undef register for that class.
	DenseMap<const TargetRegisterClass *, Register> Undefs;
	// Map from <LoopReg, InitReg> to phi register for all created phis. Note that
	// this map is only used when InitReg is non-undef.
	DenseMap<std::pair<unsigned, unsigned>, Register> Phis;
	// Map from LoopReg to phi register where the InitReg is undef.
	DenseMap<Register, Register> UndefPhis;

	// Reg is used by MI. Return the new register MI should use to adhere to the
	// schedule. Insert phis as necessary.
	Register remapUse(Register Reg, MachineInstr &MI);
	// Insert a phi that carries LoopReg from the loop body and InitReg otherwise.
	// If InitReg is not given it is chosen arbitrarily. It will either be undef
	// or will be chosen so as to share another phi.
	Register phi(Register LoopReg, std::optional<Register> InitReg = {},
	const TargetRegisterClass *RC = nullptr);
	// Create an undef register of the given register class.
	Register undef(const TargetRegisterClass *RC);

	public:
	KernelRewriter(MachineLoop &L, ModuloSchedule &S, MachineBasicBlock *LoopBB,
	LiveIntervals *LIS = nullptr);
	void rewrite();
	};
	} // namespace

	KernelRewriter::KernelRewriter(MachineLoop &L, ModuloSchedule &S,
	MachineBasicBlock LoopBB, LiveIntervals LIS)
	: S(S), BB(LoopBB), PreheaderBB(L.getLoopPreheader()),
	ExitBB(L.getExitBlock()), MRI(BB->getParent()->getRegInfo()),
	TII(BB->getParent()->getSubtarget().getInstrInfo()), LIS(LIS) {
	PreheaderBB = *BB->pred_begin();
	if (PreheaderBB == BB)
	PreheaderBB = *std::next(BB->pred_begin());
	}

	void KernelRewriter::rewrite() {
	// Rearrange the loop to be in schedule order. Note that the schedule may
	// contain instructions that are not owned by the loop block (InstrChanges and
	// friends), so we gracefully handle unowned instructions and delete any
	// instructions that weren't in the schedule.
	auto InsertPt = BB->getFirstTerminator();
	MachineInstr *FirstMI = nullptr;
	for (MachineInstr *MI : S.getInstructions()) {
	if (MI->isPHI())
	continue;
	if (MI->getParent())
	MI->removeFromParent();
	BB->insert(InsertPt, MI);
	if (!FirstMI)
	FirstMI = MI;
	}
	assert(FirstMI && "Failed to find first MI in schedule");

	// At this point all of the scheduled instructions are between FirstMI
	// and the end of the block. Kill from the first non-phi to FirstMI.
	for (auto I = BB->getFirstNonPHI(); I != FirstMI->getIterator();) {
	if (LIS)
	LIS->RemoveMachineInstrFromMaps(*I);
	(I++)->eraseFromParent();
	}

	// Now remap every instruction in the loop.
	for (MachineInstr &MI : *BB) {
	if (MI.isPHI() \|\| MI.isTerminator())
	continue;
	for (MachineOperand &MO : MI.uses()) {
	if (!MO.isReg() \|\| MO.getReg().isPhysical() \|\| MO.isImplicit())
	continue;
	Register Reg = remapUse(MO.getReg(), MI);
	MO.setReg(Reg);
	}
	}
	EliminateDeadPhis(BB, MRI, LIS);

	// Ensure a phi exists for all instructions that are either referenced by
	// an illegal phi or by an instruction outside the loop. This allows us to
	// treat remaps of these values the same as "normal" values that come from
	// loop-carried phis.
	for (auto MI = BB->getFirstNonPHI(); MI != BB->end(); ++MI) {
	if (MI->isPHI()) {
	Register R = MI->getOperand(0).getReg();
	phi(R);
	continue;
	}

	for (MachineOperand &Def : MI->defs()) {
	for (MachineInstr &MI : MRI.use_instructions(Def.getReg())) {
	if (MI.getParent() != BB) {
	phi(Def.getReg());
	break;
	}
	}
	}
	}
	}

	Register KernelRewriter::remapUse(Register Reg, MachineInstr &MI) {
	MachineInstr *Producer = MRI.getUniqueVRegDef(Reg);
	if (!Producer)
	return Reg;

	int ConsumerStage = S.getStage(&MI);
	if (!Producer->isPHI()) {
	// Non-phi producers are simple to remap. Insert as many phis as the
	// difference between the consumer and producer stages.
	if (Producer->getParent() != BB)
	// Producer was not inside the loop. Use the register as-is.
	return Reg;
	int ProducerStage = S.getStage(Producer);
	assert(ConsumerStage != -1 &&
	"In-loop consumer should always be scheduled!");
	assert(ConsumerStage >= ProducerStage);
	unsigned StageDiff = ConsumerStage - ProducerStage;

	for (unsigned I = 0; I < StageDiff; ++I)
	Reg = phi(Reg);
	return Reg;
	}

	// First, dive through the phi chain to find the defaults for the generated
	// phis.
	SmallVector<std::optional<Register>, 4> Defaults;
	Register LoopReg = Reg;
	auto LoopProducer = Producer;
	while (LoopProducer->isPHI() && LoopProducer->getParent() == BB) {
	LoopReg = getLoopPhiReg(*LoopProducer, BB);
	Defaults.emplace_back(getInitPhiReg(*LoopProducer, BB));
	LoopProducer = MRI.getUniqueVRegDef(LoopReg);
	assert(LoopProducer);
	}
	int LoopProducerStage = S.getStage(LoopProducer);

	std::optional<Register> IllegalPhiDefault;

	if (LoopProducerStage == -1) {
	// Do nothing.
	} else if (LoopProducerStage > ConsumerStage) {
	// This schedule is only representable if ProducerStage == ConsumerStage+1.
	// In addition, Consumer's cycle must be scheduled after Producer in the
	// rescheduled loop. This is enforced by the pipeliner's ASAP and ALAP
	// functions.
	#ifndef NDEBUG // Silence unused variables in non-asserts mode.
	int LoopProducerCycle = S.getCycle(LoopProducer);
	int ConsumerCycle = S.getCycle(&MI);
	#endif
	assert(LoopProducerCycle <= ConsumerCycle);
	assert(LoopProducerStage == ConsumerStage + 1);
	// Peel off the first phi from Defaults and insert a phi between producer
	// and consumer. This phi will not be at the front of the block so we
	// consider it illegal. It will only exist during the rewrite process; it
	// needs to exist while we peel off prologs because these could take the
	// default value. After that we can replace all uses with the loop producer
	// value.
	IllegalPhiDefault = Defaults.front();
	Defaults.erase(Defaults.begin());
	} else {
	assert(ConsumerStage >= LoopProducerStage);
	int StageDiff = ConsumerStage - LoopProducerStage;
	if (StageDiff > 0) {
	LLVM_DEBUG(dbgs() << " -- padding defaults array from " << Defaults.size()
	<< " to " << (Defaults.size() + StageDiff) << "\n");
	// If we need more phis than we have defaults for, pad out with undefs for
	// the earliest phis, which are at the end of the defaults chain (the
	// chain is in reverse order).
	Defaults.resize(Defaults.size() + StageDiff,
	Defaults.empty() ? std::optional<Register>()
	: Defaults.back());
	}
	}

	// Now we know the number of stages to jump back, insert the phi chain.
	auto DefaultI = Defaults.rbegin();
	while (DefaultI != Defaults.rend())
	LoopReg = phi(LoopReg, *DefaultI++, MRI.getRegClass(Reg));

	if (IllegalPhiDefault) {
	// The consumer optionally consumes LoopProducer in the same iteration
	// (because the producer is scheduled at an earlier cycle than the consumer)
	// or the initial value. To facilitate this we create an illegal block here
	// by embedding a phi in the middle of the block. We will fix this up
	// immediately prior to pruning.
	auto RC = MRI.getRegClass(Reg);
	Register R = MRI.createVirtualRegister(RC);
	MachineInstr *IllegalPhi =
	BuildMI(*BB, MI, DebugLoc(), TII->get(TargetOpcode::PHI), R)
	.addReg(*IllegalPhiDefault)
	.addMBB(PreheaderBB) // Block choice is arbitrary and has no effect.
	.addReg(LoopReg)
	.addMBB(BB); // Block choice is arbitrary and has no effect.
	// Illegal phi should belong to the producer stage so that it can be
	// filtered correctly during peeling.
	S.setStage(IllegalPhi, LoopProducerStage);
	return R;
	}

	return LoopReg;
	}

	Register KernelRewriter::phi(Register LoopReg, std::optional<Register> InitReg,
	const TargetRegisterClass *RC) {
	// If the init register is not undef, try and find an existing phi.
	if (InitReg) {
	auto I = Phis.find({LoopReg, *InitReg});
	if (I != Phis.end())
	return I->second;
	} else {
	for (auto &KV : Phis) {
	if (KV.first.first == LoopReg)
	return KV.second;
	}
	}

	// InitReg is either undef or no existing phi takes InitReg as input. Try and
	// find a phi that takes undef as input.
	auto I = UndefPhis.find(LoopReg);
	if (I != UndefPhis.end()) {
	Register R = I->second;
	if (!InitReg)
	// Found a phi taking undef as input, and this input is undef so return
	// without any more changes.
	return R;
	// Found a phi taking undef as input, so rewrite it to take InitReg.
	MachineInstr *MI = MRI.getVRegDef(R);
	MI->getOperand(1).setReg(*InitReg);
	Phis.insert({{LoopReg, *InitReg}, R});
	const TargetRegisterClass *ConstrainRegClass =
	MRI.constrainRegClass(R, MRI.getRegClass(*InitReg));
	assert(ConstrainRegClass && "Expected a valid constrained register class!");
	(void)ConstrainRegClass;
	UndefPhis.erase(I);
	return R;
	}

	// Failed to find any existing phi to reuse, so create a new one.
	if (!RC)
	RC = MRI.getRegClass(LoopReg);
	Register R = MRI.createVirtualRegister(RC);
	if (InitReg) {
	const TargetRegisterClass *ConstrainRegClass =
	MRI.constrainRegClass(R, MRI.getRegClass(*InitReg));
	assert(ConstrainRegClass && "Expected a valid constrained register class!");
	(void)ConstrainRegClass;
	}
	BuildMI(*BB, BB->getFirstNonPHI(), DebugLoc(), TII->get(TargetOpcode::PHI), R)
	.addReg(InitReg ? *InitReg : undef(RC))
	.addMBB(PreheaderBB)
	.addReg(LoopReg)
	.addMBB(BB);
	if (!InitReg)
	UndefPhis[LoopReg] = R;
	else
	Phis[{LoopReg, *InitReg}] = R;
	return R;
	}

	Register KernelRewriter::undef(const TargetRegisterClass *RC) {
	Register &R = Undefs[RC];
	if (R == 0) {
	// Create an IMPLICIT_DEF that defines this register if we need it.
	// All uses of this should be removed by the time we have finished unrolling
	// prologs and epilogs.
	R = MRI.createVirtualRegister(RC);
	auto *InsertBB = &PreheaderBB->getParent()->front();
	BuildMI(*InsertBB, InsertBB->getFirstTerminator(), DebugLoc(),
	TII->get(TargetOpcode::IMPLICIT_DEF), R);
	}
	return R;
	}

	namespace {
	/// Describes an operand in the kernel of a pipelined loop. Characteristics of
	/// the operand are discovered, such as how many in-loop PHIs it has to jump
	/// through and defaults for these phis.
	class KernelOperandInfo {
	MachineBasicBlock *BB;
	MachineRegisterInfo &MRI;
	SmallVector<Register, 4> PhiDefaults;
	MachineOperand *Source;
	MachineOperand *Target;

	public:
	KernelOperandInfo(MachineOperand *MO, MachineRegisterInfo &MRI,
	const SmallPtrSetImpl<MachineInstr *> &IllegalPhis)
	: MRI(MRI) {
	Source = MO;
	BB = MO->getParent()->getParent();
	while (isRegInLoop(MO)) {
	MachineInstr *MI = MRI.getVRegDef(MO->getReg());
	if (MI->isFullCopy()) {
	MO = &MI->getOperand(1);
	continue;
	}
	if (!MI->isPHI())
	break;
	// If this is an illegal phi, don't count it in distance.
	if (IllegalPhis.count(MI)) {
	MO = &MI->getOperand(3);
	continue;
	}

	Register Default = getInitPhiReg(*MI, BB);
	MO = MI->getOperand(2).getMBB() == BB ? &MI->getOperand(1)
	: &MI->getOperand(3);
	PhiDefaults.push_back(Default);
	}
	Target = MO;
	}

	bool operator==(const KernelOperandInfo &Other) const {
	return PhiDefaults.size() == Other.PhiDefaults.size();
	}

	void print(raw_ostream &OS) const {
	OS << "use of " << *Source << ": distance(" << PhiDefaults.size() << ") in "
	<< *Source->getParent();
	}

	private:
	bool isRegInLoop(MachineOperand *MO) {
	return MO->isReg() && MO->getReg().isVirtual() &&
	MRI.getVRegDef(MO->getReg())->getParent() == BB;
	}
	};
	} // namespace

	MachineBasicBlock *
	PeelingModuloScheduleExpander::peelKernel(LoopPeelDirection LPD) {
	MachineBasicBlock *NewBB = PeelSingleBlockLoop(LPD, BB, MRI, TII);
	if (LPD == LPD_Front)
	PeeledFront.push_back(NewBB);
	else
	PeeledBack.push_front(NewBB);
	for (auto I = BB->begin(), NI = NewBB->begin(); !I->isTerminator();
	++I, ++NI) {
	CanonicalMIs[&I] = &I;
	CanonicalMIs[&NI] = &I;
	BlockMIs[{NewBB, &I}] = &NI;
	BlockMIs[{BB, &I}] = &I;
	}
	return NewBB;
	}

	void PeelingModuloScheduleExpander::filterInstructions(MachineBasicBlock *MB,
	int MinStage) {
	for (auto I = MB->getFirstInstrTerminator()->getReverseIterator();
	I != std::next(MB->getFirstNonPHI()->getReverseIterator());) {
	MachineInstr MI = &I++;
	int Stage = getStage(MI);
	if (Stage == -1 \|\| Stage >= MinStage)
	continue;

	for (MachineOperand &DefMO : MI->defs()) {
	SmallVector<std::pair<MachineInstr *, Register>, 4> Subs;
	for (MachineInstr &UseMI : MRI.use_instructions(DefMO.getReg())) {
	// Only PHIs can use values from this block by construction.
	// Match with the equivalent PHI in B.
	assert(UseMI.isPHI());
	Register Reg = getEquivalentRegisterIn(UseMI.getOperand(0).getReg(),
	MI->getParent());
	Subs.emplace_back(&UseMI, Reg);
	}
	for (auto &Sub : Subs)
	Sub.first->substituteRegister(DefMO.getReg(), Sub.second, /SubIdx=/0,
	*MRI.getTargetRegisterInfo());
	}
	if (LIS)
	LIS->RemoveMachineInstrFromMaps(*MI);
	MI->eraseFromParent();
	}
	}

	void PeelingModuloScheduleExpander::moveStageBetweenBlocks(
	MachineBasicBlock DestBB, MachineBasicBlock SourceBB, unsigned Stage) {
	auto InsertPt = DestBB->getFirstNonPHI();
	DenseMap<Register, Register> Remaps;
	for (MachineInstr &MI : llvm::make_early_inc_range(
	llvm::make_range(SourceBB->getFirstNonPHI(), SourceBB->end()))) {
	if (MI.isPHI()) {
	// This is an illegal PHI. If we move any instructions using an illegal
	// PHI, we need to create a legal Phi.
	if (getStage(&MI) != Stage) {
	// The legal Phi is not necessary if the illegal phi's stage
	// is being moved.
	Register PhiR = MI.getOperand(0).getReg();
	auto RC = MRI.getRegClass(PhiR);
	Register NR = MRI.createVirtualRegister(RC);
	MachineInstr NI = BuildMI(DestBB, DestBB->getFirstNonPHI(),
	DebugLoc(), TII->get(TargetOpcode::PHI), NR)
	.addReg(PhiR)
	.addMBB(SourceBB);
	BlockMIs[{DestBB, CanonicalMIs[&MI]}] = NI;
	CanonicalMIs[NI] = CanonicalMIs[&MI];
	Remaps[PhiR] = NR;
	}
	}
	if (getStage(&MI) != Stage)
	continue;
	MI.removeFromParent();
	DestBB->insert(InsertPt, &MI);
	auto *KernelMI = CanonicalMIs[&MI];
	BlockMIs[{DestBB, KernelMI}] = &MI;
	BlockMIs.erase({SourceBB, KernelMI});
	}
	SmallVector<MachineInstr *, 4> PhiToDelete;
	for (MachineInstr &MI : DestBB->phis()) {
	assert(MI.getNumOperands() == 3);
	MachineInstr *Def = MRI.getVRegDef(MI.getOperand(1).getReg());
	// If the instruction referenced by the phi is moved inside the block
	// we don't need the phi anymore.
	if (getStage(Def) == Stage) {
	Register PhiReg = MI.getOperand(0).getReg();
	assert(Def->findRegisterDefOperandIdx(MI.getOperand(1).getReg(),
	/TRI=/nullptr) != -1);
	MRI.replaceRegWith(MI.getOperand(0).getReg(), MI.getOperand(1).getReg());
	MI.getOperand(0).setReg(PhiReg);
	PhiToDelete.push_back(&MI);
	}
	}
	for (auto *P : PhiToDelete)
	P->eraseFromParent();
	InsertPt = DestBB->getFirstNonPHI();
	// Helper to clone Phi instructions into the destination block. We clone Phi
	// greedily to avoid combinatorial explosion of Phi instructions.
	auto clonePhi = [&](MachineInstr *Phi) {
	MachineInstr *NewMI = MF.CloneMachineInstr(Phi);
	DestBB->insert(InsertPt, NewMI);
	Register OrigR = Phi->getOperand(0).getReg();
	Register R = MRI.createVirtualRegister(MRI.getRegClass(OrigR));
	NewMI->getOperand(0).setReg(R);
	NewMI->getOperand(1).setReg(OrigR);
	NewMI->getOperand(2).setMBB(*DestBB->pred_begin());
	Remaps[OrigR] = R;
	CanonicalMIs[NewMI] = CanonicalMIs[Phi];
	BlockMIs[{DestBB, CanonicalMIs[Phi]}] = NewMI;
	PhiNodeLoopIteration[NewMI] = PhiNodeLoopIteration[Phi];
	return R;
	};
	for (auto I = DestBB->getFirstNonPHI(); I != DestBB->end(); ++I) {
	for (MachineOperand &MO : I->uses()) {
	if (!MO.isReg())
	continue;
	if (Remaps.count(MO.getReg()))
	MO.setReg(Remaps[MO.getReg()]);
	else {
	// If we are using a phi from the source block we need to add a new phi
	// pointing to the old one.
	MachineInstr *Use = MRI.getUniqueVRegDef(MO.getReg());
	if (Use && Use->isPHI() && Use->getParent() == SourceBB) {
	Register R = clonePhi(Use);
	MO.setReg(R);
	}
	}
	}
	}
	}

	Register
	PeelingModuloScheduleExpander::getPhiCanonicalReg(MachineInstr *CanonicalPhi,
	MachineInstr *Phi) {
	unsigned distance = PhiNodeLoopIteration[Phi];
	MachineInstr *CanonicalUse = CanonicalPhi;
	Register CanonicalUseReg = CanonicalUse->getOperand(0).getReg();
	for (unsigned I = 0; I < distance; ++I) {
	assert(CanonicalUse->isPHI());
	assert(CanonicalUse->getNumOperands() == 5);
	unsigned LoopRegIdx = 3, InitRegIdx = 1;
	if (CanonicalUse->getOperand(2).getMBB() == CanonicalUse->getParent())
	std::swap(LoopRegIdx, InitRegIdx);
	CanonicalUseReg = CanonicalUse->getOperand(LoopRegIdx).getReg();
	CanonicalUse = MRI.getVRegDef(CanonicalUseReg);
	}
	return CanonicalUseReg;
	}

	void PeelingModuloScheduleExpander::peelPrologAndEpilogs() {
	BitVector LS(Schedule.getNumStages(), true);
	BitVector AS(Schedule.getNumStages(), true);
	LiveStages[BB] = LS;
	AvailableStages[BB] = AS;

	// Peel out the prologs.
	LS.reset();
	for (int I = 0; I < Schedule.getNumStages() - 1; ++I) {
	LS[I] = true;
	Prologs.push_back(peelKernel(LPD_Front));
	LiveStages[Prologs.back()] = LS;
	AvailableStages[Prologs.back()] = LS;
	}

	// Create a block that will end up as the new loop exiting block (dominated by
	// all prologs and epilogs). It will only contain PHIs, in the same order as
	// BB's PHIs. This gives us a poor-man's LCSSA with the inductive property
	// that the exiting block is a (sub) clone of BB. This in turn gives us the
	// property that any value deffed in BB but used outside of BB is used by a
	// PHI in the exiting block.
	MachineBasicBlock *ExitingBB = CreateLCSSAExitingBlock();
	EliminateDeadPhis(ExitingBB, MRI, LIS, /KeepSingleSrcPhi=/true);
	// Push out the epilogs, again in reverse order.
	// We can't assume anything about the minumum loop trip count at this point,
	// so emit a fairly complex epilog.

	// We first peel number of stages minus one epilogue. Then we remove dead
	// stages and reorder instructions based on their stage. If we have 3 stages
	// we generate first:
	// E0[3, 2, 1]
	// E1[3', 2']
	// E2[3'']
	// And then we move instructions based on their stages to have:
	// E0[3]
	// E1[2, 3']
	// E2[1, 2', 3'']
	// The transformation is legal because we only move instructions past
	// instructions of a previous loop iteration.
	for (int I = 1; I <= Schedule.getNumStages() - 1; ++I) {
	Epilogs.push_back(peelKernel(LPD_Back));
	MachineBasicBlock *B = Epilogs.back();
	filterInstructions(B, Schedule.getNumStages() - I);
	// Keep track at which iteration each phi belongs to. We need it to know
	// what version of the variable to use during prologue/epilogue stitching.
	EliminateDeadPhis(B, MRI, LIS, /KeepSingleSrcPhi=/true);
	for (MachineInstr &Phi : B->phis())
	PhiNodeLoopIteration[&Phi] = Schedule.getNumStages() - I;
	}
	for (size_t I = 0; I < Epilogs.size(); I++) {
	LS.reset();
	for (size_t J = I; J < Epilogs.size(); J++) {
	int Iteration = J;
	unsigned Stage = Schedule.getNumStages() - 1 + I - J;
	// Move stage one block at a time so that Phi nodes are updated correctly.
	for (size_t K = Iteration; K > I; K--)
	moveStageBetweenBlocks(Epilogs[K - 1], Epilogs[K], Stage);
	LS[Stage] = true;
	}
	LiveStages[Epilogs[I]] = LS;
	AvailableStages[Epilogs[I]] = AS;
	}

	// Now we've defined all the prolog and epilog blocks as a fallthrough
	// sequence, add the edges that will be followed if the loop trip count is
	// lower than the number of stages (connecting prologs directly with epilogs).
	auto PI = Prologs.begin();
	auto EI = Epilogs.begin();
	assert(Prologs.size() == Epilogs.size());
	for (; PI != Prologs.end(); ++PI, ++EI) {
	MachineBasicBlock Pred = (*EI)->pred_begin();
	(PI)->addSuccessor(EI);
	for (MachineInstr &MI : (*EI)->phis()) {
	Register Reg = MI.getOperand(1).getReg();
	MachineInstr *Use = MRI.getUniqueVRegDef(Reg);
	if (Use && Use->getParent() == Pred) {
	MachineInstr *CanonicalUse = CanonicalMIs[Use];
	if (CanonicalUse->isPHI()) {
	// If the use comes from a phi we need to skip as many phi as the
	// distance between the epilogue and the kernel. Trace through the phi
	// chain to find the right value.
	Reg = getPhiCanonicalReg(CanonicalUse, Use);
	}
	Reg = getEquivalentRegisterIn(Reg, *PI);
	}
	MI.addOperand(MachineOperand::CreateReg(Reg, /isDef=/false));
	MI.addOperand(MachineOperand::CreateMBB(*PI));
	}
	}

	// Create a list of all blocks in order.
	SmallVector<MachineBasicBlock *, 8> Blocks;
	llvm::copy(PeeledFront, std::back_inserter(Blocks));
	Blocks.push_back(BB);
	llvm::copy(PeeledBack, std::back_inserter(Blocks));

	// Iterate in reverse order over all instructions, remapping as we go.
	for (MachineBasicBlock *B : reverse(Blocks)) {
	for (auto I = B->instr_rbegin();
	I != std::next(B->getFirstNonPHI()->getReverseIterator());) {
	MachineBasicBlock::reverse_instr_iterator MI = I++;
	rewriteUsesOf(&*MI);
	}
	}
	for (auto *MI : IllegalPhisToDelete) {
	if (LIS)
	LIS->RemoveMachineInstrFromMaps(*MI);
	MI->eraseFromParent();
	}
	IllegalPhisToDelete.clear();

	// Now all remapping has been done, we're free to optimize the generated code.
	for (MachineBasicBlock *B : reverse(Blocks))
	EliminateDeadPhis(B, MRI, LIS);
	EliminateDeadPhis(ExitingBB, MRI, LIS);
	}

	MachineBasicBlock *PeelingModuloScheduleExpander::CreateLCSSAExitingBlock() {
	MachineFunction &MF = *BB->getParent();
	MachineBasicBlock Exit = BB->succ_begin();
	if (Exit == BB)
	Exit = *std::next(BB->succ_begin());

	MachineBasicBlock *NewBB = MF.CreateMachineBasicBlock(BB->getBasicBlock());
	MF.insert(std::next(BB->getIterator()), NewBB);

	// Clone all phis in BB into NewBB and rewrite.
	for (MachineInstr &MI : BB->phis()) {
	auto RC = MRI.getRegClass(MI.getOperand(0).getReg());
	Register OldR = MI.getOperand(3).getReg();
	Register R = MRI.createVirtualRegister(RC);
	SmallVector<MachineInstr *, 4> Uses;
	for (MachineInstr &Use : MRI.use_instructions(OldR))
	if (Use.getParent() != BB)
	Uses.push_back(&Use);
	for (MachineInstr *Use : Uses)
	Use->substituteRegister(OldR, R, /SubIdx=/0,
	*MRI.getTargetRegisterInfo());
	MachineInstr *NI = BuildMI(NewBB, DebugLoc(), TII->get(TargetOpcode::PHI), R)
	.addReg(OldR)
	.addMBB(BB);
	BlockMIs[{NewBB, &MI}] = NI;
	CanonicalMIs[NI] = &MI;
	}
	BB->replaceSuccessor(Exit, NewBB);
	Exit->replacePhiUsesWith(BB, NewBB);
	NewBB->addSuccessor(Exit);

	MachineBasicBlock TBB = nullptr, FBB = nullptr;
	SmallVector<MachineOperand, 4> Cond;
	bool CanAnalyzeBr = !TII->analyzeBranch(*BB, TBB, FBB, Cond);
	(void)CanAnalyzeBr;
	assert(CanAnalyzeBr && "Must be able to analyze the loop branch!");
	TII->removeBranch(*BB);
	TII->insertBranch(*BB, TBB == Exit ? NewBB : TBB, FBB == Exit ? NewBB : FBB,
	Cond, DebugLoc());
	TII->insertUnconditionalBranch(*NewBB, Exit, DebugLoc());
	return NewBB;
	}

	Register
	PeelingModuloScheduleExpander::getEquivalentRegisterIn(Register Reg,
	MachineBasicBlock *BB) {
	MachineInstr *MI = MRI.getUniqueVRegDef(Reg);
	unsigned OpIdx = MI->findRegisterDefOperandIdx(Reg, /TRI=/nullptr);
	return BlockMIs[{BB, CanonicalMIs[MI]}]->getOperand(OpIdx).getReg();
	}

	void PeelingModuloScheduleExpander::rewriteUsesOf(MachineInstr *MI) {
	if (MI->isPHI()) {
	// This is an illegal PHI. The loop-carried (desired) value is operand 3,
	// and it is produced by this block.
	Register PhiR = MI->getOperand(0).getReg();
	Register R = MI->getOperand(3).getReg();
	int RMIStage = getStage(MRI.getUniqueVRegDef(R));
	if (RMIStage != -1 && !AvailableStages[MI->getParent()].test(RMIStage))
	R = MI->getOperand(1).getReg();
	MRI.setRegClass(R, MRI.getRegClass(PhiR));
	MRI.replaceRegWith(PhiR, R);
	// Postpone deleting the Phi as it may be referenced by BlockMIs and used
	// later to figure out how to remap registers.
	MI->getOperand(0).setReg(PhiR);
	IllegalPhisToDelete.push_back(MI);
	return;
	}

	int Stage = getStage(MI);
	if (Stage == -1 \|\| LiveStages.count(MI->getParent()) == 0 \|\|
	LiveStages[MI->getParent()].test(Stage))
	// Instruction is live, no rewriting to do.
	return;

	for (MachineOperand &DefMO : MI->defs()) {
	SmallVector<std::pair<MachineInstr *, Register>, 4> Subs;
	for (MachineInstr &UseMI : MRI.use_instructions(DefMO.getReg())) {
	// Only PHIs can use values from this block by construction.
	// Match with the equivalent PHI in B.
	assert(UseMI.isPHI());
	Register Reg = getEquivalentRegisterIn(UseMI.getOperand(0).getReg(),
	MI->getParent());
	Subs.emplace_back(&UseMI, Reg);
	}
	for (auto &Sub : Subs)
	Sub.first->substituteRegister(DefMO.getReg(), Sub.second, /SubIdx=/0,
	*MRI.getTargetRegisterInfo());
	}
	if (LIS)
	LIS->RemoveMachineInstrFromMaps(*MI);
	MI->eraseFromParent();
	}

	void PeelingModuloScheduleExpander::fixupBranches() {
	// Work outwards from the kernel.
	bool KernelDisposed = false;
	int TC = Schedule.getNumStages() - 1;
	for (auto PI = Prologs.rbegin(), EI = Epilogs.rbegin(); PI != Prologs.rend();
	++PI, ++EI, --TC) {
	MachineBasicBlock Prolog = PI;
	MachineBasicBlock Fallthrough = Prolog->succ_begin();
	MachineBasicBlock Epilog = EI;
	SmallVector<MachineOperand, 4> Cond;
	TII->removeBranch(*Prolog);
	std::optional<bool> StaticallyGreater =
	LoopInfo->createTripCountGreaterCondition(TC, *Prolog, Cond);
	if (!StaticallyGreater) {
	LLVM_DEBUG(dbgs() << "Dynamic: TC > " << TC << "\n");
	// Dynamically branch based on Cond.
	TII->insertBranch(*Prolog, Epilog, Fallthrough, Cond, DebugLoc());
	} else if (*StaticallyGreater == false) {
	LLVM_DEBUG(dbgs() << "Static-false: TC > " << TC << "\n");
	// Prolog never falls through; branch to epilog and orphan interior
	// blocks. Leave it to unreachable-block-elim to clean up.
	Prolog->removeSuccessor(Fallthrough);
	for (MachineInstr &P : Fallthrough->phis()) {
	P.removeOperand(2);
	P.removeOperand(1);
	}
	TII->insertUnconditionalBranch(*Prolog, Epilog, DebugLoc());
	KernelDisposed = true;
	} else {
	LLVM_DEBUG(dbgs() << "Static-true: TC > " << TC << "\n");
	// Prolog always falls through; remove incoming values in epilog.
	Prolog->removeSuccessor(Epilog);
	for (MachineInstr &P : Epilog->phis()) {
	P.removeOperand(4);
	P.removeOperand(3);
	}
	}
	}

	if (!KernelDisposed) {
	LoopInfo->adjustTripCount(-(Schedule.getNumStages() - 1));
	LoopInfo->setPreheader(Prologs.back());
	} else {
	LoopInfo->disposed();
	}
	}

	void PeelingModuloScheduleExpander::rewriteKernel() {
	KernelRewriter KR(*Schedule.getLoop(), Schedule, BB);
	KR.rewrite();
	}

	void PeelingModuloScheduleExpander::expand() {
	BB = Schedule.getLoop()->getTopBlock();
	Preheader = Schedule.getLoop()->getLoopPreheader();
	LLVM_DEBUG(Schedule.dump());
	LoopInfo = TII->analyzeLoopForPipelining(BB);
	assert(LoopInfo);

	rewriteKernel();
	peelPrologAndEpilogs();
	fixupBranches();
	}

	void PeelingModuloScheduleExpander::validateAgainstModuloScheduleExpander() {
	BB = Schedule.getLoop()->getTopBlock();
	Preheader = Schedule.getLoop()->getLoopPreheader();

	// Dump the schedule before we invalidate and remap all its instructions.
	// Stash it in a string so we can print it if we found an error.
	std::string ScheduleDump;
	raw_string_ostream OS(ScheduleDump);
	Schedule.print(OS);
	OS.flush();

	// First, run the normal ModuleScheduleExpander. We don't support any
	// InstrChanges.
	assert(LIS && "Requires LiveIntervals!");
	ModuloScheduleExpander MSE(MF, Schedule, *LIS,
	ModuloScheduleExpander::InstrChangesTy());
	MSE.expand();
	MachineBasicBlock *ExpandedKernel = MSE.getRewrittenKernel();
	if (!ExpandedKernel) {
	// The expander optimized away the kernel. We can't do any useful checking.
	MSE.cleanup();
	return;
	}
	// Before running the KernelRewriter, re-add BB into the CFG.
	Preheader->addSuccessor(BB);

	// Now run the new expansion algorithm.
	KernelRewriter KR(*Schedule.getLoop(), Schedule, BB);
	KR.rewrite();
	peelPrologAndEpilogs();

	// Collect all illegal phis that the new algorithm created. We'll give these
	// to KernelOperandInfo.
	SmallPtrSet<MachineInstr *, 4> IllegalPhis;
	for (auto NI = BB->getFirstNonPHI(); NI != BB->end(); ++NI) {
	if (NI->isPHI())
	IllegalPhis.insert(&*NI);
	}

	// Co-iterate across both kernels. We expect them to be identical apart from
	// phis and full COPYs (we look through both).
	SmallVector<std::pair<KernelOperandInfo, KernelOperandInfo>, 8> KOIs;
	auto OI = ExpandedKernel->begin();
	auto NI = BB->begin();
	for (; !OI->isTerminator() && !NI->isTerminator(); ++OI, ++NI) {
	while (OI->isPHI() \|\| OI->isFullCopy())
	++OI;
	while (NI->isPHI() \|\| NI->isFullCopy())
	++NI;
	assert(OI->getOpcode() == NI->getOpcode() && "Opcodes don't match?!");
	// Analyze every operand separately.
	for (auto OOpI = OI->operands_begin(), NOpI = NI->operands_begin();
	OOpI != OI->operands_end(); ++OOpI, ++NOpI)
	KOIs.emplace_back(KernelOperandInfo(&*OOpI, MRI, IllegalPhis),
	KernelOperandInfo(&*NOpI, MRI, IllegalPhis));
	}

	bool Failed = false;
	for (auto &OldAndNew : KOIs) {
	if (OldAndNew.first == OldAndNew.second)
	continue;
	Failed = true;
	errs() << "Modulo kernel validation error: [\n";
	errs() << " [golden] ";
	OldAndNew.first.print(errs());
	errs() << " ";
	OldAndNew.second.print(errs());
	errs() << "]\n";
	}

	if (Failed) {
	errs() << "Golden reference kernel:\n";
	ExpandedKernel->print(errs());
	errs() << "New kernel:\n";
	BB->print(errs());
	errs() << ScheduleDump;
	report_fatal_error(
	"Modulo kernel validation (-pipeliner-experimental-cg) failed");
	}

	// Cleanup by removing BB from the CFG again as the original
	// ModuloScheduleExpander intended.
	Preheader->removeSuccessor(BB);
	MSE.cleanup();
	}

	MachineInstr ModuloScheduleExpanderMVE::cloneInstr(MachineInstr OldMI) {
	MachineInstr *NewMI = MF.CloneMachineInstr(OldMI);

	// TODO: Offset information needs to be corrected.
	NewMI->dropMemRefs(MF);

	return NewMI;
	}

	/// Create a dedicated exit for Loop. Exit is the original exit for Loop.
	/// If it is already dedicated exit, return it. Otherwise, insert a new
	/// block between them and return the new block.
	static MachineBasicBlock createDedicatedExit(MachineBasicBlock Loop,
	MachineBasicBlock *Exit) {
	if (Exit->pred_size() == 1)
	return Exit;

	MachineFunction *MF = Loop->getParent();
	const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();

	MachineBasicBlock *NewExit =
	MF->CreateMachineBasicBlock(Loop->getBasicBlock());
	MF->insert(Loop->getIterator(), NewExit);

	MachineBasicBlock TBB = nullptr, FBB = nullptr;
	SmallVector<MachineOperand, 4> Cond;
	TII->analyzeBranch(*Loop, TBB, FBB, Cond);
	if (TBB == Loop)
	FBB = NewExit;
	else if (FBB == Loop)
	TBB = NewExit;
	else
	llvm_unreachable("unexpected loop structure");
	TII->removeBranch(*Loop);
	TII->insertBranch(*Loop, TBB, FBB, Cond, DebugLoc());
	Loop->replaceSuccessor(Exit, NewExit);
	TII->insertUnconditionalBranch(*NewExit, Exit, DebugLoc());
	NewExit->addSuccessor(Exit);

	Exit->replacePhiUsesWith(Loop, NewExit);

	return NewExit;
	}

	/// Insert branch code into the end of MBB. It branches to GreaterThan if the
	/// remaining trip count for instructions in LastStage0Insts is greater than
	/// RequiredTC, and to Otherwise otherwise.
	void ModuloScheduleExpanderMVE::insertCondBranch(MachineBasicBlock &MBB,
	int RequiredTC,
	InstrMapTy &LastStage0Insts,
	MachineBasicBlock &GreaterThan,
	MachineBasicBlock &Otherwise) {
	SmallVector<MachineOperand, 4> Cond;
	LoopInfo->createRemainingIterationsGreaterCondition(RequiredTC, MBB, Cond,
	LastStage0Insts);

	if (SwapBranchTargetsMVE) {
	// Set SwapBranchTargetsMVE to true if a target prefers to replace TBB and
	// FBB for optimal performance.
	if (TII->reverseBranchCondition(Cond))
	llvm_unreachable("can not reverse branch condition");
	TII->insertBranch(MBB, &Otherwise, &GreaterThan, Cond, DebugLoc());
	} else {
	TII->insertBranch(MBB, &GreaterThan, &Otherwise, Cond, DebugLoc());
	}
	}

	/// Generate a pipelined loop that is unrolled by using MVE algorithm and any
	/// other necessary blocks. The control flow is modified to execute the
	/// pipelined loop if the trip count satisfies the condition, otherwise the
	/// original loop. The original loop is also used to execute the remainder
	/// iterations which occur due to unrolling.
	void ModuloScheduleExpanderMVE::generatePipelinedLoop() {
	// The control flow for pipelining with MVE:
	//
	// OrigPreheader:
	// // The block that is originally the loop preheader
	// goto Check
	//
	// Check:
	// // Check whether the trip count satisfies the requirements to pipeline.
	// if (LoopCounter > NumStages + NumUnroll - 2)
	// // The minimum number of iterations to pipeline =
	// // iterations executed in prolog/epilog (NumStages-1) +
	// // iterations executed in one kernel run (NumUnroll)
	// goto Prolog
	// // fallback to the original loop
	// goto NewPreheader
	//
	// Prolog:
	// // All prolog stages. There are no direct branches to the epilogue.
	// goto NewKernel
	//
	// NewKernel:
	// // NumUnroll copies of the kernel
	// if (LoopCounter > MVE-1)
	// goto NewKernel
	// goto Epilog
	//
	// Epilog:
	// // All epilog stages.
	// if (LoopCounter > 0)
	// // The remainder is executed in the original loop
	// goto NewPreheader
	// goto NewExit
	//
	// NewPreheader:
	// // Newly created preheader for the original loop.
	// // The initial values of the phis in the loop are merged from two paths.
	// NewInitVal = Phi OrigInitVal, Check, PipelineLastVal, Epilog
	// goto OrigKernel
	//
	// OrigKernel:
	// // The original loop block.
	// if (LoopCounter != 0)
	// goto OrigKernel
	// goto NewExit
	//
	// NewExit:
	// // Newly created dedicated exit for the original loop.
	// // Merge values which are referenced after the loop
	// Merged = Phi OrigVal, OrigKernel, PipelineVal, Epilog
	// goto OrigExit
	//
	// OrigExit:
	// // The block that is originally the loop exit.
	// // If it is already deicated exit, NewExit is not created.

	// An example of where each stage is executed:
	// Assume #Stages 3, #MVE 4, #Iterations 12
	// Iter 0 1 2 3 4 5 6 7 8 9 10-11
	// -------------------------------------------------
	// Stage 0 Prolog#0
	// Stage 1 0 Prolog#1
	// Stage 2 1 0 Kernel Unroll#0 Iter#0
	// Stage 2 1 0 Kernel Unroll#1 Iter#0
	// Stage 2 1 0 Kernel Unroll#2 Iter#0
	// Stage 2 1 0 Kernel Unroll#3 Iter#0
	// Stage 2 1 0 Kernel Unroll#0 Iter#1
	// Stage 2 1 0 Kernel Unroll#1 Iter#1
	// Stage 2 1 0 Kernel Unroll#2 Iter#1
	// Stage 2 1 0 Kernel Unroll#3 Iter#1
	// Stage 2 1 Epilog#0
	// Stage 2 Epilog#1
	// Stage 0-2 OrigKernel

	LoopInfo = TII->analyzeLoopForPipelining(OrigKernel);
	assert(LoopInfo && "Must be able to analyze loop!");

	calcNumUnroll();

	Check = MF.CreateMachineBasicBlock(OrigKernel->getBasicBlock());
	Prolog = MF.CreateMachineBasicBlock(OrigKernel->getBasicBlock());
	NewKernel = MF.CreateMachineBasicBlock(OrigKernel->getBasicBlock());
	Epilog = MF.CreateMachineBasicBlock(OrigKernel->getBasicBlock());
	NewPreheader = MF.CreateMachineBasicBlock(OrigKernel->getBasicBlock());

	MF.insert(OrigKernel->getIterator(), Check);
	MF.insert(OrigKernel->getIterator(), Prolog);
	MF.insert(OrigKernel->getIterator(), NewKernel);
	MF.insert(OrigKernel->getIterator(), Epilog);
	MF.insert(OrigKernel->getIterator(), NewPreheader);

	NewExit = createDedicatedExit(OrigKernel, OrigExit);

	NewPreheader->transferSuccessorsAndUpdatePHIs(OrigPreheader);
	TII->insertUnconditionalBranch(*NewPreheader, OrigKernel, DebugLoc());

	OrigPreheader->addSuccessor(Check);
	TII->removeBranch(*OrigPreheader);
	TII->insertUnconditionalBranch(*OrigPreheader, Check, DebugLoc());

	Check->addSuccessor(Prolog);
	Check->addSuccessor(NewPreheader);

	Prolog->addSuccessor(NewKernel);

	NewKernel->addSuccessor(NewKernel);
	NewKernel->addSuccessor(Epilog);

	Epilog->addSuccessor(NewPreheader);
	Epilog->addSuccessor(NewExit);

	InstrMapTy LastStage0Insts;
	insertCondBranch(*Check, Schedule.getNumStages() + NumUnroll - 2,
	LastStage0Insts, Prolog, NewPreheader);

	// VRMaps map (prolog/kernel/epilog phase#, original register#) to new
	// register#
	SmallVector<ValueMapTy> PrologVRMap, KernelVRMap, EpilogVRMap;
	generateProlog(PrologVRMap);
	generateKernel(PrologVRMap, KernelVRMap, LastStage0Insts);
	generateEpilog(KernelVRMap, EpilogVRMap, LastStage0Insts);
	}

	/// Replace MI's use operands according to the maps.
	void ModuloScheduleExpanderMVE::updateInstrUse(
	MachineInstr *MI, int StageNum, int PhaseNum,
	SmallVectorImpl<ValueMapTy> &CurVRMap,
	SmallVectorImpl<ValueMapTy> *PrevVRMap) {
	// If MI is in the prolog/kernel/epilog block, CurVRMap is
	// PrologVRMap/KernelVRMap/EpilogVRMap respectively.
	// PrevVRMap is nullptr/PhiVRMap/KernelVRMap respectively.
	// Refer to the appropriate map according to the stage difference between
	// MI and the definition of an operand.

	for (MachineOperand &UseMO : MI->uses()) {
	if (!UseMO.isReg() \|\| !UseMO.getReg().isVirtual())
	continue;
	int DiffStage = 0;
	Register OrigReg = UseMO.getReg();
	MachineInstr *DefInst = MRI.getVRegDef(OrigReg);
	if (!DefInst \|\| DefInst->getParent() != OrigKernel)
	continue;
	unsigned InitReg = 0;
	unsigned DefReg = OrigReg;
	if (DefInst->isPHI()) {
	++DiffStage;
	unsigned LoopReg;
	getPhiRegs(*DefInst, OrigKernel, InitReg, LoopReg);
	// LoopReg is guaranteed to be defined within the loop by canApply()
	DefReg = LoopReg;
	DefInst = MRI.getVRegDef(LoopReg);
	}
	unsigned DefStageNum = Schedule.getStage(DefInst);
	DiffStage += StageNum - DefStageNum;
	Register NewReg;
	if (PhaseNum >= DiffStage && CurVRMap[PhaseNum - DiffStage].count(DefReg))
	// NewReg is defined in a previous phase of the same block
	NewReg = CurVRMap[PhaseNum - DiffStage][DefReg];
	else if (!PrevVRMap)
	// Since this is the first iteration, refer the initial register of the
	// loop
	NewReg = InitReg;
	else
	// Cases where DiffStage is larger than PhaseNum.
	// If MI is in the kernel block, the value is defined by the previous
	// iteration and PhiVRMap is referenced. If MI is in the epilog block, the
	// value is defined in the kernel block and KernelVRMap is referenced.
	NewReg = (*PrevVRMap)[PrevVRMap->size() - (DiffStage - PhaseNum)][DefReg];

	const TargetRegisterClass *NRC =
	MRI.constrainRegClass(NewReg, MRI.getRegClass(OrigReg));
	if (NRC)
	UseMO.setReg(NewReg);
	else {
	Register SplitReg = MRI.createVirtualRegister(MRI.getRegClass(OrigReg));
	BuildMI(*OrigKernel, MI, MI->getDebugLoc(), TII->get(TargetOpcode::COPY),
	SplitReg)
	.addReg(NewReg);
	UseMO.setReg(SplitReg);
	}
	}
	}

	/// Return a phi if Reg is referenced by the phi.
	/// canApply() guarantees that at most only one such phi exists.
	static MachineInstr getLoopPhiUser(Register Reg, MachineBasicBlock Loop) {
	for (MachineInstr &Phi : Loop->phis()) {
	unsigned InitVal, LoopVal;
	getPhiRegs(Phi, Loop, InitVal, LoopVal);
	if (LoopVal == Reg)
	return Φ
	}
	return nullptr;
	}

	/// Generate phis for registers defined by OrigMI.
	void ModuloScheduleExpanderMVE::generatePhi(
	MachineInstr *OrigMI, int UnrollNum,
	SmallVectorImpl<ValueMapTy> &PrologVRMap,
	SmallVectorImpl<ValueMapTy> &KernelVRMap,
	SmallVectorImpl<ValueMapTy> &PhiVRMap) {
	int StageNum = Schedule.getStage(OrigMI);
	bool UsePrologReg;
	if (Schedule.getNumStages() - NumUnroll + UnrollNum - 1 >= StageNum)
	UsePrologReg = true;
	else if (Schedule.getNumStages() - NumUnroll + UnrollNum == StageNum)
	UsePrologReg = false;
	else
	return;

	// Examples that show which stages are merged by phi.
	// Meaning of the symbol following the stage number:
	// a/b: Stages with the same letter are merged (UsePrologReg == true)
	// +: Merged with the initial value (UsePrologReg == false)
	// *: No phis required
	//
	// #Stages 3, #MVE 4
	// Iter 0 1 2 3 4 5 6 7 8
	// -----------------------------------------
	// Stage 0a Prolog#0
	// Stage 1a 0b Prolog#1
	// Stage 2* 1* 0* Kernel Unroll#0
	// Stage 2* 1* 0+ Kernel Unroll#1
	// Stage 2* 1+ 0a Kernel Unroll#2
	// Stage 2+ 1a 0b Kernel Unroll#3
	//
	// #Stages 3, #MVE 2
	// Iter 0 1 2 3 4 5 6 7 8
	// -----------------------------------------
	// Stage 0a Prolog#0
	// Stage 1a 0b Prolog#1
	// Stage 2* 1+ 0a Kernel Unroll#0
	// Stage 2+ 1a 0b Kernel Unroll#1
	//
	// #Stages 3, #MVE 1
	// Iter 0 1 2 3 4 5 6 7 8
	// -----------------------------------------
	// Stage 0* Prolog#0
	// Stage 1a 0b Prolog#1
	// Stage 2+ 1a 0b Kernel Unroll#0

	for (MachineOperand &DefMO : OrigMI->defs()) {
	if (!DefMO.isReg() \|\| DefMO.isDead())
	continue;
	Register OrigReg = DefMO.getReg();
	auto NewReg = KernelVRMap[UnrollNum].find(OrigReg);
	if (NewReg == KernelVRMap[UnrollNum].end())
	continue;
	Register CorrespondReg;
	if (UsePrologReg) {
	int PrologNum = Schedule.getNumStages() - NumUnroll + UnrollNum - 1;
	CorrespondReg = PrologVRMap[PrologNum][OrigReg];
	} else {
	MachineInstr *Phi = getLoopPhiUser(OrigReg, OrigKernel);
	if (!Phi)
	continue;
	CorrespondReg = getInitPhiReg(*Phi, OrigKernel);
	}

	assert(CorrespondReg.isValid());
	Register PhiReg = MRI.createVirtualRegister(MRI.getRegClass(OrigReg));
	BuildMI(*NewKernel, NewKernel->getFirstNonPHI(), DebugLoc(),
	TII->get(TargetOpcode::PHI), PhiReg)
	.addReg(NewReg->second)
	.addMBB(NewKernel)
	.addReg(CorrespondReg)
	.addMBB(Prolog);
	PhiVRMap[UnrollNum][OrigReg] = PhiReg;
	}
	}

	static void replacePhiSrc(MachineInstr &Phi, Register OrigReg, Register NewReg,
	MachineBasicBlock *NewMBB) {
	for (unsigned Idx = 1; Idx < Phi.getNumOperands(); Idx += 2) {
	if (Phi.getOperand(Idx).getReg() == OrigReg) {
	Phi.getOperand(Idx).setReg(NewReg);
	Phi.getOperand(Idx + 1).setMBB(NewMBB);
	return;
	}
	}
	}

	/// Generate phis that merge values from multiple routes
	void ModuloScheduleExpanderMVE::mergeRegUsesAfterPipeline(Register OrigReg,
	Register NewReg) {
	SmallVector<MachineOperand *> UsesAfterLoop;
	SmallVector<MachineInstr *> LoopPhis;
	for (MachineRegisterInfo::use_iterator I = MRI.use_begin(OrigReg),
	E = MRI.use_end();
	I != E; ++I) {
	MachineOperand &O = *I;
	if (O.getParent()->getParent() != OrigKernel &&
	O.getParent()->getParent() != Prolog &&
	O.getParent()->getParent() != NewKernel &&
	O.getParent()->getParent() != Epilog)
	UsesAfterLoop.push_back(&O);
	if (O.getParent()->getParent() == OrigKernel && O.getParent()->isPHI())
	LoopPhis.push_back(O.getParent());
	}

	// Merge the route that only execute the pipelined loop (when there are no
	// remaining iterations) with the route that execute the original loop.
	if (!UsesAfterLoop.empty()) {
	Register PhiReg = MRI.createVirtualRegister(MRI.getRegClass(OrigReg));
	BuildMI(*NewExit, NewExit->getFirstNonPHI(), DebugLoc(),
	TII->get(TargetOpcode::PHI), PhiReg)
	.addReg(OrigReg)
	.addMBB(OrigKernel)
	.addReg(NewReg)
	.addMBB(Epilog);

	for (MachineOperand *MO : UsesAfterLoop)
	MO->setReg(PhiReg);

	if (!LIS.hasInterval(PhiReg))
	LIS.createEmptyInterval(PhiReg);
	}

	// Merge routes from the pipelined loop and the bypassed route before the
	// original loop
	if (!LoopPhis.empty()) {
	for (MachineInstr *Phi : LoopPhis) {
	unsigned InitReg, LoopReg;
	getPhiRegs(*Phi, OrigKernel, InitReg, LoopReg);
	Register NewInit = MRI.createVirtualRegister(MRI.getRegClass(InitReg));
	BuildMI(*NewPreheader, NewPreheader->getFirstNonPHI(), Phi->getDebugLoc(),
	TII->get(TargetOpcode::PHI), NewInit)
	.addReg(InitReg)
	.addMBB(Check)
	.addReg(NewReg)
	.addMBB(Epilog);
	replacePhiSrc(*Phi, InitReg, NewInit, NewPreheader);
	}
	}
	}

	void ModuloScheduleExpanderMVE::generateProlog(
	SmallVectorImpl<ValueMapTy> &PrologVRMap) {
	PrologVRMap.clear();
	PrologVRMap.resize(Schedule.getNumStages() - 1);
	DenseMap<MachineInstr *, std::pair<int, int>> NewMIMap;
	for (int PrologNum = 0; PrologNum < Schedule.getNumStages() - 1;
	++PrologNum) {
	for (MachineInstr *MI : Schedule.getInstructions()) {
	if (MI->isPHI())
	continue;
	int StageNum = Schedule.getStage(MI);
	if (StageNum > PrologNum)
	continue;
	MachineInstr *NewMI = cloneInstr(MI);
	updateInstrDef(NewMI, PrologVRMap[PrologNum], false);
	NewMIMap[NewMI] = {PrologNum, StageNum};
	Prolog->push_back(NewMI);
	}
	}

	for (auto I : NewMIMap) {
	MachineInstr *MI = I.first;
	int PrologNum = I.second.first;
	int StageNum = I.second.second;
	updateInstrUse(MI, StageNum, PrologNum, PrologVRMap, nullptr);
	}

	LLVM_DEBUG({
	dbgs() << "prolog:\n";
	Prolog->dump();
	});
	}

	void ModuloScheduleExpanderMVE::generateKernel(
	SmallVectorImpl<ValueMapTy> &PrologVRMap,
	SmallVectorImpl<ValueMapTy> &KernelVRMap, InstrMapTy &LastStage0Insts) {
	KernelVRMap.clear();
	KernelVRMap.resize(NumUnroll);
	SmallVector<ValueMapTy> PhiVRMap;
	PhiVRMap.resize(NumUnroll);
	DenseMap<MachineInstr *, std::pair<int, int>> NewMIMap;
	DenseMap<MachineInstr , MachineInstr > MIMapLastStage0;
	for (int UnrollNum = 0; UnrollNum < NumUnroll; ++UnrollNum) {
	for (MachineInstr *MI : Schedule.getInstructions()) {
	if (MI->isPHI())
	continue;
	int StageNum = Schedule.getStage(MI);
	MachineInstr *NewMI = cloneInstr(MI);
	if (UnrollNum == NumUnroll - 1)
	LastStage0Insts[MI] = NewMI;
	updateInstrDef(NewMI, KernelVRMap[UnrollNum],
	(UnrollNum == NumUnroll - 1 && StageNum == 0));
	generatePhi(MI, UnrollNum, PrologVRMap, KernelVRMap, PhiVRMap);
	NewMIMap[NewMI] = {UnrollNum, StageNum};
	NewKernel->push_back(NewMI);
	}
	}

	for (auto I : NewMIMap) {
	MachineInstr *MI = I.first;
	int UnrollNum = I.second.first;
	int StageNum = I.second.second;
	updateInstrUse(MI, StageNum, UnrollNum, KernelVRMap, &PhiVRMap);
	}

	// If remaining trip count is greater than NumUnroll-1, loop continues
	insertCondBranch(NewKernel, NumUnroll - 1, LastStage0Insts, NewKernel,
	*Epilog);

	LLVM_DEBUG({
	dbgs() << "kernel:\n";
	NewKernel->dump();
	});
	}

	void ModuloScheduleExpanderMVE::generateEpilog(
	SmallVectorImpl<ValueMapTy> &KernelVRMap,
	SmallVectorImpl<ValueMapTy> &EpilogVRMap, InstrMapTy &LastStage0Insts) {
	EpilogVRMap.clear();
	EpilogVRMap.resize(Schedule.getNumStages() - 1);
	DenseMap<MachineInstr *, std::pair<int, int>> NewMIMap;
	for (int EpilogNum = 0; EpilogNum < Schedule.getNumStages() - 1;
	++EpilogNum) {
	for (MachineInstr *MI : Schedule.getInstructions()) {
	if (MI->isPHI())
	continue;
	int StageNum = Schedule.getStage(MI);
	if (StageNum <= EpilogNum)
	continue;
	MachineInstr *NewMI = cloneInstr(MI);
	updateInstrDef(NewMI, EpilogVRMap[EpilogNum], StageNum - 1 == EpilogNum);
	NewMIMap[NewMI] = {EpilogNum, StageNum};
	Epilog->push_back(NewMI);
	}
	}

	for (auto I : NewMIMap) {
	MachineInstr *MI = I.first;
	int EpilogNum = I.second.first;
	int StageNum = I.second.second;
	updateInstrUse(MI, StageNum, EpilogNum, EpilogVRMap, &KernelVRMap);
	}

	// If there are remaining iterations, they are executed in the original loop.
	// Instructions related to loop control, such as loop counter comparison,
	// are indicated by shouldIgnoreForPipelining() and are assumed to be placed
	// in stage 0. Thus, the map is for the last one in the kernel.
	insertCondBranch(Epilog, 0, LastStage0Insts, NewPreheader, *NewExit);

	LLVM_DEBUG({
	dbgs() << "epilog:\n";
	Epilog->dump();
	});
	}

	/// Calculate the number of unroll required and set it to NumUnroll
	void ModuloScheduleExpanderMVE::calcNumUnroll() {
	DenseMap<MachineInstr *, unsigned> Inst2Idx;
	NumUnroll = 1;
	for (unsigned I = 0; I < Schedule.getInstructions().size(); ++I)
	Inst2Idx[Schedule.getInstructions()[I]] = I;

	for (MachineInstr *MI : Schedule.getInstructions()) {
	if (MI->isPHI())
	continue;
	int StageNum = Schedule.getStage(MI);
	for (const MachineOperand &MO : MI->uses()) {
	if (!MO.isReg() \|\| !MO.getReg().isVirtual())
	continue;
	MachineInstr *DefMI = MRI.getVRegDef(MO.getReg());
	if (DefMI->getParent() != OrigKernel)
	continue;

	int NumUnrollLocal = 1;
	if (DefMI->isPHI()) {
	++NumUnrollLocal;
	// canApply() guarantees that DefMI is not phi and is an instruction in
	// the loop
	DefMI = MRI.getVRegDef(getLoopPhiReg(*DefMI, OrigKernel));
	}
	NumUnrollLocal += StageNum - Schedule.getStage(DefMI);
	if (Inst2Idx[MI] <= Inst2Idx[DefMI])
	--NumUnrollLocal;
	NumUnroll = std::max(NumUnroll, NumUnrollLocal);
	}
	}
	LLVM_DEBUG(dbgs() << "NumUnroll: " << NumUnroll << "\n");
	}

	/// Create new virtual registers for definitions of NewMI and update NewMI.
	/// If the definitions are referenced after the pipelined loop, phis are
	/// created to merge with other routes.
	void ModuloScheduleExpanderMVE::updateInstrDef(MachineInstr *NewMI,
	ValueMapTy &VRMap,
	bool LastDef) {
	for (MachineOperand &MO : NewMI->operands()) {
	if (!MO.isReg() \|\| !MO.getReg().isVirtual() \|\| !MO.isDef())
	continue;
	Register Reg = MO.getReg();
	const TargetRegisterClass *RC = MRI.getRegClass(Reg);
	Register NewReg = MRI.createVirtualRegister(RC);
	MO.setReg(NewReg);
	VRMap[Reg] = NewReg;
	if (LastDef)
	mergeRegUsesAfterPipeline(Reg, NewReg);
	}
	}

	void ModuloScheduleExpanderMVE::expand() {
	OrigKernel = Schedule.getLoop()->getTopBlock();
	OrigPreheader = Schedule.getLoop()->getLoopPreheader();
	OrigExit = Schedule.getLoop()->getExitBlock();

	LLVM_DEBUG(Schedule.dump());

	generatePipelinedLoop();
	}

	/// Check if ModuloScheduleExpanderMVE can be applied to L
	bool ModuloScheduleExpanderMVE::canApply(MachineLoop &L) {
	if (!L.getExitBlock()) {
	LLVM_DEBUG(
	dbgs() << "Can not apply MVE expander: No single exit block.\n";);
	return false;
	}

	MachineBasicBlock *BB = L.getTopBlock();
	MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();

	// Put some constraints on the operands of the phis to simplify the
	// transformation
	DenseSet<unsigned> UsedByPhi;
	for (MachineInstr &MI : BB->phis()) {
	// Registers defined by phis must be used only inside the loop and be never
	// used by phis.
	for (MachineOperand &MO : MI.defs())
	if (MO.isReg())
	for (MachineInstr &Ref : MRI.use_instructions(MO.getReg()))
	if (Ref.getParent() != BB \|\| Ref.isPHI()) {
	LLVM_DEBUG(dbgs()
	<< "Can not apply MVE expander: A phi result is "
	"referenced outside of the loop or by phi.\n";);
	return false;
	}

	// A source register from the loop block must be defined inside the loop.
	// A register defined inside the loop must be referenced by only one phi at
	// most.
	unsigned InitVal, LoopVal;
	getPhiRegs(MI, MI.getParent(), InitVal, LoopVal);
	if (!Register(LoopVal).isVirtual() \|\|
	MRI.getVRegDef(LoopVal)->getParent() != BB) {
	LLVM_DEBUG(
	dbgs() << "Can not apply MVE expander: A phi source value coming "
	"from the loop is not defined in the loop.\n";);
	return false;
	}
	if (UsedByPhi.count(LoopVal)) {
	LLVM_DEBUG(dbgs() << "Can not apply MVE expander: A value defined in the "
	"loop is referenced by two or more phis.\n";);
	return false;
	}
	UsedByPhi.insert(LoopVal);
	}

	return true;
	}

	//===----------------------------------------------------------------------===//
	// ModuloScheduleTestPass implementation
	//===----------------------------------------------------------------------===//
	// This pass constructs a ModuloSchedule from its module and runs
	// ModuloScheduleExpander.
	//
	// The module is expected to contain a single-block analyzable loop.
	// The total order of instructions is taken from the loop as-is.
	// Instructions are expected to be annotated with a PostInstrSymbol.
	// This PostInstrSymbol must have the following format:
	// "Stage=%d Cycle=%d".
	//===----------------------------------------------------------------------===//

	namespace {
	class ModuloScheduleTest : public MachineFunctionPass {
	public:
	static char ID;

	ModuloScheduleTest() : MachineFunctionPass(ID) {
	initializeModuloScheduleTestPass(*PassRegistry::getPassRegistry());
	}

	bool runOnMachineFunction(MachineFunction &MF) override;
	void runOnLoop(MachineFunction &MF, MachineLoop &L);

	void getAnalysisUsage(AnalysisUsage &AU) const override {
	AU.addRequired<MachineLoopInfoWrapperPass>();
	AU.addRequired<LiveIntervalsWrapperPass>();
	MachineFunctionPass::getAnalysisUsage(AU);
	}
	};
	} // namespace

	char ModuloScheduleTest::ID = 0;

	INITIALIZE_PASS_BEGIN(ModuloScheduleTest, "modulo-schedule-test",
	"Modulo Schedule test pass", false, false)
	INITIALIZE_PASS_DEPENDENCY(MachineLoopInfoWrapperPass)
	INITIALIZE_PASS_DEPENDENCY(LiveIntervalsWrapperPass)
	INITIALIZE_PASS_END(ModuloScheduleTest, "modulo-schedule-test",
	"Modulo Schedule test pass", false, false)

	bool ModuloScheduleTest::runOnMachineFunction(MachineFunction &MF) {
	MachineLoopInfo &MLI = getAnalysis<MachineLoopInfoWrapperPass>().getLI();
	for (auto *L : MLI) {
	if (L->getTopBlock() != L->getBottomBlock())
	continue;
	runOnLoop(MF, *L);
	return false;
	}
	return false;
	}

	static void parseSymbolString(StringRef S, int &Cycle, int &Stage) {
	std::pair<StringRef, StringRef> StageAndCycle = getToken(S, "_");
	std::pair<StringRef, StringRef> StageTokenAndValue =
	getToken(StageAndCycle.first, "-");
	std::pair<StringRef, StringRef> CycleTokenAndValue =
	getToken(StageAndCycle.second, "-");
	if (StageTokenAndValue.first != "Stage" \|\|
	CycleTokenAndValue.first != "_Cycle") {
	llvm_unreachable(
	"Bad post-instr symbol syntax: see comment in ModuloScheduleTest");
	return;
	}

	StageTokenAndValue.second.drop_front().getAsInteger(10, Stage);
	CycleTokenAndValue.second.drop_front().getAsInteger(10, Cycle);

	dbgs() << " Stage=" << Stage << ", Cycle=" << Cycle << "\n";
	}

	void ModuloScheduleTest::runOnLoop(MachineFunction &MF, MachineLoop &L) {
	LiveIntervals &LIS = getAnalysis<LiveIntervalsWrapperPass>().getLIS();
	MachineBasicBlock *BB = L.getTopBlock();
	dbgs() << "--- ModuloScheduleTest running on BB#" << BB->getNumber() << "\n";

	DenseMap<MachineInstr *, int> Cycle, Stage;
	std::vector<MachineInstr *> Instrs;
	for (MachineInstr &MI : *BB) {
	if (MI.isTerminator())
	continue;
	Instrs.push_back(&MI);
	if (MCSymbol *Sym = MI.getPostInstrSymbol()) {
	dbgs() << "Parsing post-instr symbol for " << MI;
	parseSymbolString(Sym->getName(), Cycle[&MI], Stage[&MI]);
	}
	}

	ModuloSchedule MS(MF, &L, std::move(Instrs), std::move(Cycle),
	std::move(Stage));
	ModuloScheduleExpander MSE(
	MF, MS, LIS, /InstrChanges=/ModuloScheduleExpander::InstrChangesTy());
	MSE.expand();
	MSE.cleanup();
	}

	//===----------------------------------------------------------------------===//
	// ModuloScheduleTestAnnotater implementation
	//===----------------------------------------------------------------------===//

	void ModuloScheduleTestAnnotater::annotate() {
	for (MachineInstr *MI : S.getInstructions()) {
	SmallVector<char, 16> SV;
	raw_svector_ostream OS(SV);
	OS << "Stage-" << S.getStage(MI) << "_Cycle-" << S.getCycle(MI);
	MCSymbol *Sym = MF.getContext().getOrCreateSymbol(OS.str());
	MI->setPostInstrSymbol(MF, Sym);
	}
	}
	diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
	index df3d207d85d3..b961d3bb1fec 100644
	--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
	+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
	@@ -1,4381 +1,4385 @@
	//===- SelectionDAGISel.cpp - Implement the SelectionDAGISel class --------===//
	//
	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
	// See https://llvm.org/LICENSE.txt for license information.
	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
	//
	//===----------------------------------------------------------------------===//
	//
	// This implements the SelectionDAGISel class.
	//
	//===----------------------------------------------------------------------===//

	#include "llvm/CodeGen/SelectionDAGISel.h"
	#include "ScheduleDAGSDNodes.h"
	#include "SelectionDAGBuilder.h"
	#include "llvm/ADT/APInt.h"
	#include "llvm/ADT/DenseMap.h"
	#include "llvm/ADT/PostOrderIterator.h"
	#include "llvm/ADT/STLExtras.h"
	#include "llvm/ADT/SmallPtrSet.h"
	#include "llvm/ADT/SmallVector.h"
	#include "llvm/ADT/Statistic.h"
	#include "llvm/ADT/StringRef.h"
	#include "llvm/Analysis/AliasAnalysis.h"
	#include "llvm/Analysis/AssumptionCache.h"
	#include "llvm/Analysis/BranchProbabilityInfo.h"
	#include "llvm/Analysis/CFG.h"
	#include "llvm/Analysis/LazyBlockFrequencyInfo.h"
	#include "llvm/Analysis/OptimizationRemarkEmitter.h"
	#include "llvm/Analysis/ProfileSummaryInfo.h"
	#include "llvm/Analysis/TargetLibraryInfo.h"
	#include "llvm/Analysis/TargetTransformInfo.h"
	#include "llvm/Analysis/UniformityAnalysis.h"
	#include "llvm/CodeGen/AssignmentTrackingAnalysis.h"
	#include "llvm/CodeGen/CodeGenCommonISel.h"
	#include "llvm/CodeGen/FastISel.h"
	#include "llvm/CodeGen/FunctionLoweringInfo.h"
	#include "llvm/CodeGen/GCMetadata.h"
	#include "llvm/CodeGen/ISDOpcodes.h"
	#include "llvm/CodeGen/MachineBasicBlock.h"
	#include "llvm/CodeGen/MachineFrameInfo.h"
	#include "llvm/CodeGen/MachineFunction.h"
	#include "llvm/CodeGen/MachineFunctionPass.h"
	#include "llvm/CodeGen/MachineInstr.h"
	#include "llvm/CodeGen/MachineInstrBuilder.h"
	#include "llvm/CodeGen/MachineMemOperand.h"
	#include "llvm/CodeGen/MachineModuleInfo.h"
	#include "llvm/CodeGen/MachineOperand.h"
	#include "llvm/CodeGen/MachinePassRegistry.h"
	#include "llvm/CodeGen/MachineRegisterInfo.h"
	#include "llvm/CodeGen/SchedulerRegistry.h"
	#include "llvm/CodeGen/SelectionDAG.h"
	#include "llvm/CodeGen/SelectionDAGNodes.h"
	#include "llvm/CodeGen/StackMaps.h"
	#include "llvm/CodeGen/StackProtector.h"
	#include "llvm/CodeGen/SwiftErrorValueTracking.h"
	#include "llvm/CodeGen/TargetInstrInfo.h"
	#include "llvm/CodeGen/TargetLowering.h"
	#include "llvm/CodeGen/TargetRegisterInfo.h"
	#include "llvm/CodeGen/TargetSubtargetInfo.h"
	#include "llvm/CodeGen/ValueTypes.h"
	#include "llvm/CodeGen/WinEHFuncInfo.h"
	#include "llvm/CodeGenTypes/MachineValueType.h"
	#include "llvm/IR/BasicBlock.h"
	#include "llvm/IR/Constants.h"
	#include "llvm/IR/DataLayout.h"
	#include "llvm/IR/DebugInfo.h"
	#include "llvm/IR/DebugInfoMetadata.h"
	#include "llvm/IR/DebugLoc.h"
	#include "llvm/IR/DiagnosticInfo.h"
	#include "llvm/IR/EHPersonalities.h"
	#include "llvm/IR/Function.h"
	#include "llvm/IR/InlineAsm.h"
	#include "llvm/IR/InstIterator.h"
	#include "llvm/IR/Instruction.h"
	#include "llvm/IR/Instructions.h"
	#include "llvm/IR/IntrinsicInst.h"
	#include "llvm/IR/Intrinsics.h"
	#include "llvm/IR/IntrinsicsWebAssembly.h"
	#include "llvm/IR/Metadata.h"
	#include "llvm/IR/Module.h"
	#include "llvm/IR/PrintPasses.h"
	#include "llvm/IR/Statepoint.h"
	#include "llvm/IR/Type.h"
	#include "llvm/IR/User.h"
	#include "llvm/IR/Value.h"
	#include "llvm/InitializePasses.h"
	#include "llvm/MC/MCInstrDesc.h"
	#include "llvm/Pass.h"
	#include "llvm/Support/BranchProbability.h"
	#include "llvm/Support/Casting.h"
	#include "llvm/Support/CodeGen.h"
	#include "llvm/Support/CommandLine.h"
	#include "llvm/Support/Compiler.h"
	#include "llvm/Support/Debug.h"
	#include "llvm/Support/ErrorHandling.h"
	#include "llvm/Support/KnownBits.h"
	#include "llvm/Support/Timer.h"
	#include "llvm/Support/raw_ostream.h"
	#include "llvm/Target/TargetIntrinsicInfo.h"
	#include "llvm/Target/TargetMachine.h"
	#include "llvm/Target/TargetOptions.h"
	#include "llvm/Transforms/Utils/BasicBlockUtils.h"
	#include <algorithm>
	#include <cassert>
	#include <cstdint>
	#include <iterator>
	#include <limits>
	#include <memory>
	#include <optional>
	#include <string>
	#include <utility>
	#include <vector>

	using namespace llvm;

	#define DEBUG_TYPE "isel"
	#define ISEL_DUMP_DEBUG_TYPE DEBUG_TYPE "-dump"

	STATISTIC(NumFastIselFailures, "Number of instructions fast isel failed on");
	STATISTIC(NumFastIselSuccess, "Number of instructions fast isel selected");
	STATISTIC(NumFastIselBlocks, "Number of blocks selected entirely by fast isel");
	STATISTIC(NumDAGBlocks, "Number of blocks selected using DAG");
	STATISTIC(NumDAGIselRetries,"Number of times dag isel has to try another path");
	STATISTIC(NumEntryBlocks, "Number of entry blocks encountered");
	STATISTIC(NumFastIselFailLowerArguments,
	"Number of entry blocks where fast isel failed to lower arguments");

	static cl::opt<int> EnableFastISelAbort(
	"fast-isel-abort", cl::Hidden,
	cl::desc("Enable abort calls when \"fast\" instruction selection "
	"fails to lower an instruction: 0 disable the abort, 1 will "
	"abort but for args, calls and terminators, 2 will also "
	"abort for argument lowering, and 3 will never fallback "
	"to SelectionDAG."));

	static cl::opt<bool> EnableFastISelFallbackReport(
	"fast-isel-report-on-fallback", cl::Hidden,
	cl::desc("Emit a diagnostic when \"fast\" instruction selection "
	"falls back to SelectionDAG."));

	static cl::opt<bool>
	UseMBPI("use-mbpi",
	cl::desc("use Machine Branch Probability Info"),
	cl::init(true), cl::Hidden);

	#ifndef NDEBUG
	static cl::opt<std::string>
	FilterDAGBasicBlockName("filter-view-dags", cl::Hidden,
	cl::desc("Only display the basic block whose name "
	"matches this for all view-*-dags options"));
	static cl::opt<bool>
	ViewDAGCombine1("view-dag-combine1-dags", cl::Hidden,
	cl::desc("Pop up a window to show dags before the first "
	"dag combine pass"));
	static cl::opt<bool>
	ViewLegalizeTypesDAGs("view-legalize-types-dags", cl::Hidden,
	cl::desc("Pop up a window to show dags before legalize types"));
	static cl::opt<bool>
	ViewDAGCombineLT("view-dag-combine-lt-dags", cl::Hidden,
	cl::desc("Pop up a window to show dags before the post "
	"legalize types dag combine pass"));
	static cl::opt<bool>
	ViewLegalizeDAGs("view-legalize-dags", cl::Hidden,
	cl::desc("Pop up a window to show dags before legalize"));
	static cl::opt<bool>
	ViewDAGCombine2("view-dag-combine2-dags", cl::Hidden,
	cl::desc("Pop up a window to show dags before the second "
	"dag combine pass"));
	static cl::opt<bool>
	ViewISelDAGs("view-isel-dags", cl::Hidden,
	cl::desc("Pop up a window to show isel dags as they are selected"));
	static cl::opt<bool>
	ViewSchedDAGs("view-sched-dags", cl::Hidden,
	cl::desc("Pop up a window to show sched dags as they are processed"));
	static cl::opt<bool>
	ViewSUnitDAGs("view-sunit-dags", cl::Hidden,
	cl::desc("Pop up a window to show SUnit dags after they are processed"));
	#else
	static const bool ViewDAGCombine1 = false, ViewLegalizeTypesDAGs = false,
	ViewDAGCombineLT = false, ViewLegalizeDAGs = false,
	ViewDAGCombine2 = false, ViewISelDAGs = false,
	ViewSchedDAGs = false, ViewSUnitDAGs = false;
	#endif

	#ifndef NDEBUG
	#define ISEL_DUMP(X) \
	do { \
	if (llvm::DebugFlag && \
	(isCurrentDebugType(DEBUG_TYPE) \|\| \
	(isCurrentDebugType(ISEL_DUMP_DEBUG_TYPE) && MatchFilterFuncName))) { \
	X; \
	} \
	} while (false)
	#else
	#define ISEL_DUMP(X) do { } while (false)
	#endif

	//===---------------------------------------------------------------------===//
	///
	/// RegisterScheduler class - Track the registration of instruction schedulers.
	///
	//===---------------------------------------------------------------------===//
	MachinePassRegistry<RegisterScheduler::FunctionPassCtor>
	RegisterScheduler::Registry;

	//===---------------------------------------------------------------------===//
	///
	/// ISHeuristic command line option for instruction schedulers.
	///
	//===---------------------------------------------------------------------===//
	static cl::opt<RegisterScheduler::FunctionPassCtor, false,
	RegisterPassParser<RegisterScheduler>>
	ISHeuristic("pre-RA-sched",
	cl::init(&createDefaultScheduler), cl::Hidden,
	cl::desc("Instruction schedulers available (before register"
	" allocation):"));

	static RegisterScheduler
	defaultListDAGScheduler("default", "Best scheduler for the target",
	createDefaultScheduler);

	static bool dontUseFastISelFor(const Function &Fn) {
	// Don't enable FastISel for functions with swiftasync Arguments.
	// Debug info on those is reliant on good Argument lowering, and FastISel is
	// not capable of lowering the entire function. Mixing the two selectors tend
	// to result in poor lowering of Arguments.
	return any_of(Fn.args(), [](const Argument &Arg) {
	return Arg.hasAttribute(Attribute::AttrKind::SwiftAsync);
	});
	}

	namespace llvm {

	//===--------------------------------------------------------------------===//
	/// This class is used by SelectionDAGISel to temporarily override
	/// the optimization level on a per-function basis.
	class OptLevelChanger {
	SelectionDAGISel &IS;
	CodeGenOptLevel SavedOptLevel;
	bool SavedFastISel;

	public:
	OptLevelChanger(SelectionDAGISel &ISel, CodeGenOptLevel NewOptLevel)
	: IS(ISel) {
	SavedOptLevel = IS.OptLevel;
	SavedFastISel = IS.TM.Options.EnableFastISel;
	if (NewOptLevel != SavedOptLevel) {
	IS.OptLevel = NewOptLevel;
	IS.TM.setOptLevel(NewOptLevel);
	LLVM_DEBUG(dbgs() << "\nChanging optimization level for Function "
	<< IS.MF->getFunction().getName() << "\n");
	LLVM_DEBUG(dbgs() << "\tBefore: -O" << static_cast<int>(SavedOptLevel)
	<< " ; After: -O" << static_cast<int>(NewOptLevel)
	<< "\n");
	if (NewOptLevel == CodeGenOptLevel::None)
	IS.TM.setFastISel(IS.TM.getO0WantsFastISel());
	}
	if (dontUseFastISelFor(IS.MF->getFunction()))
	IS.TM.setFastISel(false);
	LLVM_DEBUG(
	dbgs() << "\tFastISel is "
	<< (IS.TM.Options.EnableFastISel ? "enabled" : "disabled")
	<< "\n");
	}

	~OptLevelChanger() {
	if (IS.OptLevel == SavedOptLevel)
	return;
	LLVM_DEBUG(dbgs() << "\nRestoring optimization level for Function "
	<< IS.MF->getFunction().getName() << "\n");
	LLVM_DEBUG(dbgs() << "\tBefore: -O" << static_cast<int>(IS.OptLevel)
	<< " ; After: -O" << static_cast<int>(SavedOptLevel) << "\n");
	IS.OptLevel = SavedOptLevel;
	IS.TM.setOptLevel(SavedOptLevel);
	IS.TM.setFastISel(SavedFastISel);
	}
	};

	//===--------------------------------------------------------------------===//
	/// createDefaultScheduler - This creates an instruction scheduler appropriate
	/// for the target.
	ScheduleDAGSDNodes createDefaultScheduler(SelectionDAGISel IS,
	CodeGenOptLevel OptLevel) {
	const TargetLowering *TLI = IS->TLI;
	const TargetSubtargetInfo &ST = IS->MF->getSubtarget();

	// Try first to see if the Target has its own way of selecting a scheduler
	if (auto *SchedulerCtor = ST.getDAGScheduler(OptLevel)) {
	return SchedulerCtor(IS, OptLevel);
	}

	if (OptLevel == CodeGenOptLevel::None \|\|
	(ST.enableMachineScheduler() && ST.enableMachineSchedDefaultSched()) \|\|
	TLI->getSchedulingPreference() == Sched::Source)
	return createSourceListDAGScheduler(IS, OptLevel);
	if (TLI->getSchedulingPreference() == Sched::RegPressure)
	return createBURRListDAGScheduler(IS, OptLevel);
	if (TLI->getSchedulingPreference() == Sched::Hybrid)
	return createHybridListDAGScheduler(IS, OptLevel);
	if (TLI->getSchedulingPreference() == Sched::VLIW)
	return createVLIWDAGScheduler(IS, OptLevel);
	if (TLI->getSchedulingPreference() == Sched::Fast)
	return createFastDAGScheduler(IS, OptLevel);
	if (TLI->getSchedulingPreference() == Sched::Linearize)
	return createDAGLinearizer(IS, OptLevel);
	assert(TLI->getSchedulingPreference() == Sched::ILP &&
	"Unknown sched type!");
	return createILPListDAGScheduler(IS, OptLevel);
	}

	} // end namespace llvm

	MachineBasicBlock *
	TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
	MachineBasicBlock *MBB) const {
	#ifndef NDEBUG
	dbgs() << "If a target marks an instruction with "
	"'usesCustomInserter', it must implement "
	"TargetLowering::EmitInstrWithCustomInserter!\n";
	#endif
	llvm_unreachable(nullptr);
	}

	void TargetLowering::AdjustInstrPostInstrSelection(MachineInstr &MI,
	SDNode *Node) const {
	assert(!MI.hasPostISelHook() &&
	"If a target marks an instruction with 'hasPostISelHook', "
	"it must implement TargetLowering::AdjustInstrPostInstrSelection!");
	}

	//===----------------------------------------------------------------------===//
	// SelectionDAGISel code
	//===----------------------------------------------------------------------===//

	SelectionDAGISelLegacy::SelectionDAGISelLegacy(
	char &ID, std::unique_ptr<SelectionDAGISel> S)
	: MachineFunctionPass(ID), Selector(std::move(S)) {
	initializeGCModuleInfoPass(*PassRegistry::getPassRegistry());
	initializeBranchProbabilityInfoWrapperPassPass(
	*PassRegistry::getPassRegistry());
	initializeAAResultsWrapperPassPass(*PassRegistry::getPassRegistry());
	initializeTargetLibraryInfoWrapperPassPass(*PassRegistry::getPassRegistry());
	}

	bool SelectionDAGISelLegacy::runOnMachineFunction(MachineFunction &MF) {
	// If we already selected that function, we do not need to run SDISel.
	if (MF.getProperties().hasProperty(
	MachineFunctionProperties::Property::Selected))
	return false;

	// Do some sanity-checking on the command-line options.
	if (EnableFastISelAbort && !Selector->TM.Options.EnableFastISel)
	report_fatal_error("-fast-isel-abort > 0 requires -fast-isel");

	// Decide what flavour of variable location debug-info will be used, before
	// we change the optimisation level.
	MF.setUseDebugInstrRef(MF.shouldUseDebugInstrRef());

	// Reset the target options before resetting the optimization
	// level below.
	// FIXME: This is a horrible hack and should be processed via
	// codegen looking at the optimization level explicitly when
	// it wants to look at it.
	Selector->TM.resetTargetOptions(MF.getFunction());
	// Reset OptLevel to None for optnone functions.
	CodeGenOptLevel NewOptLevel = skipFunction(MF.getFunction())
	? CodeGenOptLevel::None
	: Selector->OptLevel;

	Selector->MF = &MF;
	OptLevelChanger OLC(*Selector, NewOptLevel);
	Selector->initializeAnalysisResults(*this);
	return Selector->runOnMachineFunction(MF);
	}

	SelectionDAGISel::SelectionDAGISel(TargetMachine &tm, CodeGenOptLevel OL)
	: TM(tm), FuncInfo(new FunctionLoweringInfo()),
	SwiftError(new SwiftErrorValueTracking()),
	CurDAG(new SelectionDAG(tm, OL)),
	SDB(std::make_unique<SelectionDAGBuilder>(CurDAG, FuncInfo, *SwiftError,
	OL)),
	OptLevel(OL) {
	initializeGCModuleInfoPass(*PassRegistry::getPassRegistry());
	initializeBranchProbabilityInfoWrapperPassPass(
	*PassRegistry::getPassRegistry());
	initializeAAResultsWrapperPassPass(*PassRegistry::getPassRegistry());
	initializeTargetLibraryInfoWrapperPassPass(*PassRegistry::getPassRegistry());
	}

	SelectionDAGISel::~SelectionDAGISel() {
	delete CurDAG;
	delete SwiftError;
	}

	void SelectionDAGISelLegacy::getAnalysisUsage(AnalysisUsage &AU) const {
	CodeGenOptLevel OptLevel = Selector->OptLevel;
	if (OptLevel != CodeGenOptLevel::None)
	AU.addRequired<AAResultsWrapperPass>();
	AU.addRequired<GCModuleInfo>();
	AU.addRequired<StackProtector>();
	AU.addPreserved<GCModuleInfo>();
	AU.addRequired<TargetLibraryInfoWrapperPass>();
	#ifndef NDEBUG
	AU.addRequired<TargetTransformInfoWrapperPass>();
	#endif
	AU.addRequired<AssumptionCacheTracker>();
	if (UseMBPI && OptLevel != CodeGenOptLevel::None)
	AU.addRequired<BranchProbabilityInfoWrapperPass>();
	AU.addRequired<ProfileSummaryInfoWrapperPass>();
	// AssignmentTrackingAnalysis only runs if assignment tracking is enabled for
	// the module.
	AU.addRequired<AssignmentTrackingAnalysis>();
	AU.addPreserved<AssignmentTrackingAnalysis>();
	if (OptLevel != CodeGenOptLevel::None)
	LazyBlockFrequencyInfoPass::getLazyBFIAnalysisUsage(AU);
	MachineFunctionPass::getAnalysisUsage(AU);
	}

	static void computeUsesMSVCFloatingPoint(const Triple &TT, const Function &F,
	MachineModuleInfo &MMI) {
	// Only needed for MSVC
	if (!TT.isWindowsMSVCEnvironment())
	return;

	// If it's already set, nothing to do.
	if (MMI.usesMSVCFloatingPoint())
	return;

	for (const Instruction &I : instructions(F)) {
	if (I.getType()->isFPOrFPVectorTy()) {
	MMI.setUsesMSVCFloatingPoint(true);
	return;
	}
	for (const auto &Op : I.operands()) {
	if (Op->getType()->isFPOrFPVectorTy()) {
	MMI.setUsesMSVCFloatingPoint(true);
	return;
	}
	}
	}
	}

	PreservedAnalyses
	SelectionDAGISelPass::run(MachineFunction &MF,
	MachineFunctionAnalysisManager &MFAM) {
	// If we already selected that function, we do not need to run SDISel.
	if (MF.getProperties().hasProperty(
	MachineFunctionProperties::Property::Selected))
	return PreservedAnalyses::all();

	// Do some sanity-checking on the command-line options.
	if (EnableFastISelAbort && !Selector->TM.Options.EnableFastISel)
	report_fatal_error("-fast-isel-abort > 0 requires -fast-isel");

	// Decide what flavour of variable location debug-info will be used, before
	// we change the optimisation level.
	MF.setUseDebugInstrRef(MF.shouldUseDebugInstrRef());

	// Reset the target options before resetting the optimization
	// level below.
	// FIXME: This is a horrible hack and should be processed via
	// codegen looking at the optimization level explicitly when
	// it wants to look at it.
	Selector->TM.resetTargetOptions(MF.getFunction());
	// Reset OptLevel to None for optnone functions.
	// TODO: Add a function analysis to handle this.
	Selector->MF = &MF;
	// Reset OptLevel to None for optnone functions.
	CodeGenOptLevel NewOptLevel = MF.getFunction().hasOptNone()
	? CodeGenOptLevel::None
	: Selector->OptLevel;

	OptLevelChanger OLC(*Selector, NewOptLevel);
	Selector->initializeAnalysisResults(MFAM);
	Selector->runOnMachineFunction(MF);

	return getMachineFunctionPassPreservedAnalyses();
	}

	void SelectionDAGISel::initializeAnalysisResults(
	MachineFunctionAnalysisManager &MFAM) {
	auto &FAM = MFAM.getResult<FunctionAnalysisManagerMachineFunctionProxy>(*MF)
	.getManager();
	auto &MAMP = MFAM.getResult<ModuleAnalysisManagerMachineFunctionProxy>(*MF);
	Function &Fn = MF->getFunction();
	#ifndef NDEBUG
	FuncName = Fn.getName();
	MatchFilterFuncName = isFunctionInPrintList(FuncName);
	#else
	(void)MatchFilterFuncName;
	#endif

	TII = MF->getSubtarget().getInstrInfo();
	TLI = MF->getSubtarget().getTargetLowering();
	RegInfo = &MF->getRegInfo();
	LibInfo = &FAM.getResult<TargetLibraryAnalysis>(Fn);
	GFI = Fn.hasGC() ? &FAM.getResult<GCFunctionAnalysis>(Fn) : nullptr;
	ORE = std::make_unique<OptimizationRemarkEmitter>(&Fn);
	AC = &FAM.getResult<AssumptionAnalysis>(Fn);
	auto PSI = MAMP.getCachedResult<ProfileSummaryAnalysis>(Fn.getParent());
	BlockFrequencyInfo *BFI = nullptr;
	FAM.getResult<BlockFrequencyAnalysis>(Fn);
	if (PSI && PSI->hasProfileSummary() && OptLevel != CodeGenOptLevel::None)
	BFI = &FAM.getResult<BlockFrequencyAnalysis>(Fn);

	FunctionVarLocs const *FnVarLocs = nullptr;
	if (isAssignmentTrackingEnabled(*Fn.getParent()))
	FnVarLocs = &FAM.getResult<DebugAssignmentTrackingAnalysis>(Fn);

	auto *UA = FAM.getCachedResult<UniformityInfoAnalysis>(Fn);
	CurDAG->init(MF, ORE, MFAM, LibInfo, UA, PSI, BFI, FnVarLocs);

	// Now get the optional analyzes if we want to.
	// This is based on the possibly changed OptLevel (after optnone is taken
	// into account). That's unfortunate but OK because it just means we won't
	// ask for passes that have been required anyway.

	if (UseMBPI && OptLevel != CodeGenOptLevel::None)
	FuncInfo->BPI = &FAM.getResult<BranchProbabilityAnalysis>(Fn);
	else
	FuncInfo->BPI = nullptr;

	if (OptLevel != CodeGenOptLevel::None)
	AA = &FAM.getResult<AAManager>(Fn);
	else
	AA = nullptr;

	SP = &FAM.getResult<SSPLayoutAnalysis>(Fn);

	#if !defined(NDEBUG) && LLVM_ENABLE_ABI_BREAKING_CHECKS
	TTI = &FAM.getResult<TargetIRAnalysis>(Fn);
	#endif
	}

	void SelectionDAGISel::initializeAnalysisResults(MachineFunctionPass &MFP) {
	Function &Fn = MF->getFunction();
	#ifndef NDEBUG
	FuncName = Fn.getName();
	MatchFilterFuncName = isFunctionInPrintList(FuncName);
	#else
	(void)MatchFilterFuncName;
	#endif

	TII = MF->getSubtarget().getInstrInfo();
	TLI = MF->getSubtarget().getTargetLowering();
	RegInfo = &MF->getRegInfo();
	LibInfo = &MFP.getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(Fn);
	GFI = Fn.hasGC() ? &MFP.getAnalysis<GCModuleInfo>().getFunctionInfo(Fn)
	: nullptr;
	ORE = std::make_unique<OptimizationRemarkEmitter>(&Fn);
	AC = &MFP.getAnalysis<AssumptionCacheTracker>().getAssumptionCache(Fn);
	auto *PSI = &MFP.getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
	BlockFrequencyInfo *BFI = nullptr;
	if (PSI && PSI->hasProfileSummary() && OptLevel != CodeGenOptLevel::None)
	BFI = &MFP.getAnalysis<LazyBlockFrequencyInfoPass>().getBFI();

	FunctionVarLocs const *FnVarLocs = nullptr;
	if (isAssignmentTrackingEnabled(*Fn.getParent()))
	FnVarLocs = MFP.getAnalysis<AssignmentTrackingAnalysis>().getResults();

	UniformityInfo *UA = nullptr;
	if (auto *UAPass = MFP.getAnalysisIfAvailable<UniformityInfoWrapperPass>())
	UA = &UAPass->getUniformityInfo();
	CurDAG->init(MF, ORE, &MFP, LibInfo, UA, PSI, BFI, FnVarLocs);

	// Now get the optional analyzes if we want to.
	// This is based on the possibly changed OptLevel (after optnone is taken
	// into account). That's unfortunate but OK because it just means we won't
	// ask for passes that have been required anyway.

	if (UseMBPI && OptLevel != CodeGenOptLevel::None)
	FuncInfo->BPI =
	&MFP.getAnalysis<BranchProbabilityInfoWrapperPass>().getBPI();
	else
	FuncInfo->BPI = nullptr;

	if (OptLevel != CodeGenOptLevel::None)
	AA = &MFP.getAnalysis<AAResultsWrapperPass>().getAAResults();
	else
	AA = nullptr;

	SP = &MFP.getAnalysis<StackProtector>().getLayoutInfo();

	#if !defined(NDEBUG) && LLVM_ENABLE_ABI_BREAKING_CHECKS
	TTI = &MFP.getAnalysis<TargetTransformInfoWrapperPass>().getTTI(Fn);
	#endif
	}

	bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
	SwiftError->setFunction(mf);
	const Function &Fn = mf.getFunction();

	bool InstrRef = mf.shouldUseDebugInstrRef();

	FuncInfo->set(MF->getFunction(), *MF, CurDAG);

	ISEL_DUMP(dbgs() << "\n\n\n=== " << FuncName << '\n');

	SDB->init(GFI, AA, AC, LibInfo);

	MF->setHasInlineAsm(false);

	FuncInfo->SplitCSR = false;

	// We split CSR if the target supports it for the given function
	// and the function has only return exits.
	if (OptLevel != CodeGenOptLevel::None && TLI->supportSplitCSR(MF)) {
	FuncInfo->SplitCSR = true;

	// Collect all the return blocks.
	for (const BasicBlock &BB : Fn) {
	if (!succ_empty(&BB))
	continue;

	const Instruction *Term = BB.getTerminator();
	if (isa<UnreachableInst>(Term) \|\| isa<ReturnInst>(Term))
	continue;

	// Bail out if the exit block is not Return nor Unreachable.
	FuncInfo->SplitCSR = false;
	break;
	}
	}

	MachineBasicBlock *EntryMBB = &MF->front();
	if (FuncInfo->SplitCSR)
	// This performs initialization so lowering for SplitCSR will be correct.
	TLI->initializeSplitCSR(EntryMBB);

	SelectAllBasicBlocks(Fn);
	if (FastISelFailed && EnableFastISelFallbackReport) {
	DiagnosticInfoISelFallback DiagFallback(Fn);
	Fn.getContext().diagnose(DiagFallback);
	}

	// Replace forward-declared registers with the registers containing
	// the desired value.
	// Note: it is important that this happens before the call to
	// EmitLiveInCopies, since implementations can skip copies of unused
	// registers. If we don't apply the reg fixups before, some registers may
	// appear as unused and will be skipped, resulting in bad MI.
	MachineRegisterInfo &MRI = MF->getRegInfo();
	for (DenseMap<Register, Register>::iterator I = FuncInfo->RegFixups.begin(),
	E = FuncInfo->RegFixups.end();
	I != E; ++I) {
	Register From = I->first;
	Register To = I->second;
	// If To is also scheduled to be replaced, find what its ultimate
	// replacement is.
	while (true) {
	DenseMap<Register, Register>::iterator J = FuncInfo->RegFixups.find(To);
	if (J == E)
	break;
	To = J->second;
	}
	// Make sure the new register has a sufficiently constrained register class.
	if (From.isVirtual() && To.isVirtual())
	MRI.constrainRegClass(To, MRI.getRegClass(From));
	// Replace it.

	// Replacing one register with another won't touch the kill flags.
	// We need to conservatively clear the kill flags as a kill on the old
	// register might dominate existing uses of the new register.
	if (!MRI.use_empty(To))
	MRI.clearKillFlags(From);
	MRI.replaceRegWith(From, To);
	}

	// If the first basic block in the function has live ins that need to be
	// copied into vregs, emit the copies into the top of the block before
	// emitting the code for the block.
	const TargetRegisterInfo &TRI = *MF->getSubtarget().getRegisterInfo();
	RegInfo->EmitLiveInCopies(EntryMBB, TRI, *TII);

	// Insert copies in the entry block and the return blocks.
	if (FuncInfo->SplitCSR) {
	SmallVector<MachineBasicBlock*, 4> Returns;
	// Collect all the return blocks.
	for (MachineBasicBlock &MBB : mf) {
	if (!MBB.succ_empty())
	continue;

	MachineBasicBlock::iterator Term = MBB.getFirstTerminator();
	if (Term != MBB.end() && Term->isReturn()) {
	Returns.push_back(&MBB);
	continue;
	}
	}
	TLI->insertCopiesSplitCSR(EntryMBB, Returns);
	}

	DenseMap<unsigned, unsigned> LiveInMap;
	if (!FuncInfo->ArgDbgValues.empty())
	for (std::pair<unsigned, unsigned> LI : RegInfo->liveins())
	if (LI.second)
	LiveInMap.insert(LI);

	// Insert DBG_VALUE instructions for function arguments to the entry block.
	for (unsigned i = 0, e = FuncInfo->ArgDbgValues.size(); i != e; ++i) {
	MachineInstr *MI = FuncInfo->ArgDbgValues[e - i - 1];
	assert(MI->getOpcode() != TargetOpcode::DBG_VALUE_LIST &&
	"Function parameters should not be described by DBG_VALUE_LIST.");
	bool hasFI = MI->getDebugOperand(0).isFI();
	Register Reg =
	hasFI ? TRI.getFrameRegister(*MF) : MI->getDebugOperand(0).getReg();
	if (Reg.isPhysical())
	EntryMBB->insert(EntryMBB->begin(), MI);
	else {
	MachineInstr *Def = RegInfo->getVRegDef(Reg);
	if (Def) {
	MachineBasicBlock::iterator InsertPos = Def;
	// FIXME: VR def may not be in entry block.
	Def->getParent()->insert(std::next(InsertPos), MI);
	} else
	LLVM_DEBUG(dbgs() << "Dropping debug info for dead vreg"
	<< Register::virtReg2Index(Reg) << "\n");
	}

	// Don't try and extend through copies in instruction referencing mode.
	if (InstrRef)
	continue;

	// If Reg is live-in then update debug info to track its copy in a vreg.
	DenseMap<unsigned, unsigned>::iterator LDI = LiveInMap.find(Reg);
	if (LDI != LiveInMap.end()) {
	assert(!hasFI && "There's no handling of frame pointer updating here yet "
	"- add if needed");
	MachineInstr *Def = RegInfo->getVRegDef(LDI->second);
	MachineBasicBlock::iterator InsertPos = Def;
	const MDNode *Variable = MI->getDebugVariable();
	const MDNode *Expr = MI->getDebugExpression();
	DebugLoc DL = MI->getDebugLoc();
	bool IsIndirect = MI->isIndirectDebugValue();
	if (IsIndirect)
	assert(MI->getDebugOffset().getImm() == 0 &&
	"DBG_VALUE with nonzero offset");
	assert(cast<DILocalVariable>(Variable)->isValidLocationForIntrinsic(DL) &&
	"Expected inlined-at fields to agree");
	assert(MI->getOpcode() != TargetOpcode::DBG_VALUE_LIST &&
	"Didn't expect to see a DBG_VALUE_LIST here");
	// Def is never a terminator here, so it is ok to increment InsertPos.
	BuildMI(*EntryMBB, ++InsertPos, DL, TII->get(TargetOpcode::DBG_VALUE),
	IsIndirect, LDI->second, Variable, Expr);

	// If this vreg is directly copied into an exported register then
	// that COPY instructions also need DBG_VALUE, if it is the only
	// user of LDI->second.
	MachineInstr *CopyUseMI = nullptr;
	for (MachineInstr &UseMI : RegInfo->use_instructions(LDI->second)) {
	if (UseMI.isDebugValue())
	continue;
	if (UseMI.isCopy() && !CopyUseMI && UseMI.getParent() == EntryMBB) {
	CopyUseMI = &UseMI;
	continue;
	}
	// Otherwise this is another use or second copy use.
	CopyUseMI = nullptr;
	break;
	}
	if (CopyUseMI &&
	TRI.getRegSizeInBits(LDI->second, MRI) ==
	TRI.getRegSizeInBits(CopyUseMI->getOperand(0).getReg(), MRI)) {
	// Use MI's debug location, which describes where Variable was
	// declared, rather than whatever is attached to CopyUseMI.
	MachineInstr *NewMI =
	BuildMI(*MF, DL, TII->get(TargetOpcode::DBG_VALUE), IsIndirect,
	CopyUseMI->getOperand(0).getReg(), Variable, Expr);
	MachineBasicBlock::iterator Pos = CopyUseMI;
	EntryMBB->insertAfter(Pos, NewMI);
	}
	}
	}

	// For debug-info, in instruction referencing mode, we need to perform some
	// post-isel maintenence.
	if (MF->useDebugInstrRef())
	MF->finalizeDebugInstrRefs();

	// Determine if there are any calls in this machine function.
	MachineFrameInfo &MFI = MF->getFrameInfo();
	for (const auto &MBB : *MF) {
	if (MFI.hasCalls() && MF->hasInlineAsm())
	break;

	for (const auto &MI : MBB) {
	const MCInstrDesc &MCID = TII->get(MI.getOpcode());
	if ((MCID.isCall() && !MCID.isReturn()) \|\|
	MI.isStackAligningInlineAsm()) {
	MFI.setHasCalls(true);
	}
	if (MI.isInlineAsm()) {
	MF->setHasInlineAsm(true);
	}
	}
	}

	// Determine if floating point is used for msvc
	computeUsesMSVCFloatingPoint(TM.getTargetTriple(), Fn, MF->getMMI());

	// Release function-specific state. SDB and CurDAG are already cleared
	// at this point.
	FuncInfo->clear();

	ISEL_DUMP(dbgs() << "* MachineFunction at end of ISel *\n");
	ISEL_DUMP(MF->print(dbgs()));

	return true;
	}

	static void reportFastISelFailure(MachineFunction &MF,
	OptimizationRemarkEmitter &ORE,
	OptimizationRemarkMissed &R,
	bool ShouldAbort) {
	// Print the function name explicitly if we don't have a debug location (which
	// makes the diagnostic less useful) or if we're going to emit a raw error.
	if (!R.getLocation().isValid() \|\| ShouldAbort)
	R << (" (in function: " + MF.getName() + ")").str();

	if (ShouldAbort)
	report_fatal_error(Twine(R.getMsg()));

	ORE.emit(R);
	LLVM_DEBUG(dbgs() << R.getMsg() << "\n");
	}

	void SelectionDAGISel::SelectBasicBlock(BasicBlock::const_iterator Begin,
	BasicBlock::const_iterator End,
	bool &HadTailCall) {
	// Allow creating illegal types during DAG building for the basic block.
	CurDAG->NewNodesMustHaveLegalTypes = false;

	// Lower the instructions. If a call is emitted as a tail call, cease emitting
	// nodes for this block. If an instruction is elided, don't emit it, but do
	// handle any debug-info attached to it.
	for (BasicBlock::const_iterator I = Begin; I != End && !SDB->HasTailCall; ++I) {
	if (!ElidedArgCopyInstrs.count(&*I))
	SDB->visit(*I);
	else
	SDB->visitDbgInfo(*I);
	}

	// Make sure the root of the DAG is up-to-date.
	CurDAG->setRoot(SDB->getControlRoot());
	HadTailCall = SDB->HasTailCall;
	SDB->resolveOrClearDbgInfo();
	SDB->clear();

	// Final step, emit the lowered DAG as machine code.
	CodeGenAndEmitDAG();
	}

	void SelectionDAGISel::ComputeLiveOutVRegInfo() {
	SmallPtrSet<SDNode *, 16> Added;
	SmallVector<SDNode*, 128> Worklist;

	Worklist.push_back(CurDAG->getRoot().getNode());
	Added.insert(CurDAG->getRoot().getNode());

	KnownBits Known;

	do {
	SDNode *N = Worklist.pop_back_val();

	// Otherwise, add all chain operands to the worklist.
	for (const SDValue &Op : N->op_values())
	if (Op.getValueType() == MVT::Other && Added.insert(Op.getNode()).second)
	Worklist.push_back(Op.getNode());

	// If this is a CopyToReg with a vreg dest, process it.
	if (N->getOpcode() != ISD::CopyToReg)
	continue;

	unsigned DestReg = cast<RegisterSDNode>(N->getOperand(1))->getReg();
	if (!Register::isVirtualRegister(DestReg))
	continue;

	// Ignore non-integer values.
	SDValue Src = N->getOperand(2);
	EVT SrcVT = Src.getValueType();
	if (!SrcVT.isInteger())
	continue;

	unsigned NumSignBits = CurDAG->ComputeNumSignBits(Src);
	Known = CurDAG->computeKnownBits(Src);
	FuncInfo->AddLiveOutRegInfo(DestReg, NumSignBits, Known);
	} while (!Worklist.empty());
	}

	void SelectionDAGISel::CodeGenAndEmitDAG() {
	StringRef GroupName = "sdag";
	StringRef GroupDescription = "Instruction Selection and Scheduling";
	std::string BlockName;
	bool MatchFilterBB = false;
	(void)MatchFilterBB;

	// Pre-type legalization allow creation of any node types.
	CurDAG->NewNodesMustHaveLegalTypes = false;

	#ifndef NDEBUG
	MatchFilterBB = (FilterDAGBasicBlockName.empty() \|\|
	FilterDAGBasicBlockName ==
	FuncInfo->MBB->getBasicBlock()->getName());
	#endif
	#ifdef NDEBUG
	if (ViewDAGCombine1 \|\| ViewLegalizeTypesDAGs \|\| ViewDAGCombineLT \|\|
	ViewLegalizeDAGs \|\| ViewDAGCombine2 \|\| ViewISelDAGs \|\| ViewSchedDAGs \|\|
	ViewSUnitDAGs)
	#endif
	{
	BlockName =
	(MF->getName() + ":" + FuncInfo->MBB->getBasicBlock()->getName()).str();
	}
	ISEL_DUMP(dbgs() << "\nInitial selection DAG: "
	<< printMBBReference(*FuncInfo->MBB) << " '" << BlockName
	<< "'\n";
	CurDAG->dump());

	#if LLVM_ENABLE_ABI_BREAKING_CHECKS
	if (TTI->hasBranchDivergence())
	CurDAG->VerifyDAGDivergence();
	#endif

	if (ViewDAGCombine1 && MatchFilterBB)
	CurDAG->viewGraph("dag-combine1 input for " + BlockName);

	// Run the DAG combiner in pre-legalize mode.
	{
	NamedRegionTimer T("combine1", "DAG Combining 1", GroupName,
	GroupDescription, TimePassesIsEnabled);
	CurDAG->Combine(BeforeLegalizeTypes, AA, OptLevel);
	}

	ISEL_DUMP(dbgs() << "\nOptimized lowered selection DAG: "
	<< printMBBReference(*FuncInfo->MBB) << " '" << BlockName
	<< "'\n";
	CurDAG->dump());

	#if LLVM_ENABLE_ABI_BREAKING_CHECKS
	if (TTI->hasBranchDivergence())
	CurDAG->VerifyDAGDivergence();
	#endif

	// Second step, hack on the DAG until it only uses operations and types that
	// the target supports.
	if (ViewLegalizeTypesDAGs && MatchFilterBB)
	CurDAG->viewGraph("legalize-types input for " + BlockName);

	bool Changed;
	{
	NamedRegionTimer T("legalize_types", "Type Legalization", GroupName,
	GroupDescription, TimePassesIsEnabled);
	Changed = CurDAG->LegalizeTypes();
	}

	ISEL_DUMP(dbgs() << "\nType-legalized selection DAG: "
	<< printMBBReference(*FuncInfo->MBB) << " '" << BlockName
	<< "'\n";
	CurDAG->dump());

	#if LLVM_ENABLE_ABI_BREAKING_CHECKS
	if (TTI->hasBranchDivergence())
	CurDAG->VerifyDAGDivergence();
	#endif

	// Only allow creation of legal node types.
	CurDAG->NewNodesMustHaveLegalTypes = true;

	if (Changed) {
	if (ViewDAGCombineLT && MatchFilterBB)
	CurDAG->viewGraph("dag-combine-lt input for " + BlockName);

	// Run the DAG combiner in post-type-legalize mode.
	{
	NamedRegionTimer T("combine_lt", "DAG Combining after legalize types",
	GroupName, GroupDescription, TimePassesIsEnabled);
	CurDAG->Combine(AfterLegalizeTypes, AA, OptLevel);
	}

	ISEL_DUMP(dbgs() << "\nOptimized type-legalized selection DAG: "
	<< printMBBReference(*FuncInfo->MBB) << " '" << BlockName
	<< "'\n";
	CurDAG->dump());

	#if LLVM_ENABLE_ABI_BREAKING_CHECKS
	if (TTI->hasBranchDivergence())
	CurDAG->VerifyDAGDivergence();
	#endif
	}

	{
	NamedRegionTimer T("legalize_vec", "Vector Legalization", GroupName,
	GroupDescription, TimePassesIsEnabled);
	Changed = CurDAG->LegalizeVectors();
	}

	if (Changed) {
	ISEL_DUMP(dbgs() << "\nVector-legalized selection DAG: "
	<< printMBBReference(*FuncInfo->MBB) << " '" << BlockName
	<< "'\n";
	CurDAG->dump());

	#if LLVM_ENABLE_ABI_BREAKING_CHECKS
	if (TTI->hasBranchDivergence())
	CurDAG->VerifyDAGDivergence();
	#endif

	{
	NamedRegionTimer T("legalize_types2", "Type Legalization 2", GroupName,
	GroupDescription, TimePassesIsEnabled);
	CurDAG->LegalizeTypes();
	}

	ISEL_DUMP(dbgs() << "\nVector/type-legalized selection DAG: "
	<< printMBBReference(*FuncInfo->MBB) << " '" << BlockName
	<< "'\n";
	CurDAG->dump());

	#if LLVM_ENABLE_ABI_BREAKING_CHECKS
	if (TTI->hasBranchDivergence())
	CurDAG->VerifyDAGDivergence();
	#endif

	if (ViewDAGCombineLT && MatchFilterBB)
	CurDAG->viewGraph("dag-combine-lv input for " + BlockName);

	// Run the DAG combiner in post-type-legalize mode.
	{
	NamedRegionTimer T("combine_lv", "DAG Combining after legalize vectors",
	GroupName, GroupDescription, TimePassesIsEnabled);
	CurDAG->Combine(AfterLegalizeVectorOps, AA, OptLevel);
	}

	ISEL_DUMP(dbgs() << "\nOptimized vector-legalized selection DAG: "
	<< printMBBReference(*FuncInfo->MBB) << " '" << BlockName
	<< "'\n";
	CurDAG->dump());

	#if LLVM_ENABLE_ABI_BREAKING_CHECKS
	if (TTI->hasBranchDivergence())
	CurDAG->VerifyDAGDivergence();
	#endif
	}

	if (ViewLegalizeDAGs && MatchFilterBB)
	CurDAG->viewGraph("legalize input for " + BlockName);

	{
	NamedRegionTimer T("legalize", "DAG Legalization", GroupName,
	GroupDescription, TimePassesIsEnabled);
	CurDAG->Legalize();
	}

	ISEL_DUMP(dbgs() << "\nLegalized selection DAG: "
	<< printMBBReference(*FuncInfo->MBB) << " '" << BlockName
	<< "'\n";
	CurDAG->dump());

	#if LLVM_ENABLE_ABI_BREAKING_CHECKS
	if (TTI->hasBranchDivergence())
	CurDAG->VerifyDAGDivergence();
	#endif

	if (ViewDAGCombine2 && MatchFilterBB)
	CurDAG->viewGraph("dag-combine2 input for " + BlockName);

	// Run the DAG combiner in post-legalize mode.
	{
	NamedRegionTimer T("combine2", "DAG Combining 2", GroupName,
	GroupDescription, TimePassesIsEnabled);
	CurDAG->Combine(AfterLegalizeDAG, AA, OptLevel);
	}

	ISEL_DUMP(dbgs() << "\nOptimized legalized selection DAG: "
	<< printMBBReference(*FuncInfo->MBB) << " '" << BlockName
	<< "'\n";
	CurDAG->dump());

	#if LLVM_ENABLE_ABI_BREAKING_CHECKS
	if (TTI->hasBranchDivergence())
	CurDAG->VerifyDAGDivergence();
	#endif

	if (OptLevel != CodeGenOptLevel::None)
	ComputeLiveOutVRegInfo();

	if (ViewISelDAGs && MatchFilterBB)
	CurDAG->viewGraph("isel input for " + BlockName);

	// Third, instruction select all of the operations to machine code, adding the
	// code to the MachineBasicBlock.
	{
	NamedRegionTimer T("isel", "Instruction Selection", GroupName,
	GroupDescription, TimePassesIsEnabled);
	DoInstructionSelection();
	}

	ISEL_DUMP(dbgs() << "\nSelected selection DAG: "
	<< printMBBReference(*FuncInfo->MBB) << " '" << BlockName
	<< "'\n";
	CurDAG->dump());

	if (ViewSchedDAGs && MatchFilterBB)
	CurDAG->viewGraph("scheduler input for " + BlockName);

	// Schedule machine code.
	ScheduleDAGSDNodes *Scheduler = CreateScheduler();
	{
	NamedRegionTimer T("sched", "Instruction Scheduling", GroupName,
	GroupDescription, TimePassesIsEnabled);
	Scheduler->Run(CurDAG, FuncInfo->MBB);
	}

	if (ViewSUnitDAGs && MatchFilterBB)
	Scheduler->viewGraph();

	// Emit machine code to BB. This can change 'BB' to the last block being
	// inserted into.
	MachineBasicBlock FirstMBB = FuncInfo->MBB, LastMBB;
	{
	NamedRegionTimer T("emit", "Instruction Creation", GroupName,
	GroupDescription, TimePassesIsEnabled);

	// FuncInfo->InsertPt is passed by reference and set to the end of the
	// scheduled instructions.
	LastMBB = FuncInfo->MBB = Scheduler->EmitSchedule(FuncInfo->InsertPt);
	}

	// If the block was split, make sure we update any references that are used to
	// update PHI nodes later on.
	if (FirstMBB != LastMBB)
	SDB->UpdateSplitBlock(FirstMBB, LastMBB);

	// Free the scheduler state.
	{
	NamedRegionTimer T("cleanup", "Instruction Scheduling Cleanup", GroupName,
	GroupDescription, TimePassesIsEnabled);
	delete Scheduler;
	}

	// Free the SelectionDAG state, now that we're finished with it.
	CurDAG->clear();
	}

	namespace {

	/// ISelUpdater - helper class to handle updates of the instruction selection
	/// graph.
	class ISelUpdater : public SelectionDAG::DAGUpdateListener {
	SelectionDAG::allnodes_iterator &ISelPosition;

	public:
	ISelUpdater(SelectionDAG &DAG, SelectionDAG::allnodes_iterator &isp)
	: SelectionDAG::DAGUpdateListener(DAG), ISelPosition(isp) {}

	/// NodeDeleted - Handle nodes deleted from the graph. If the node being
	/// deleted is the current ISelPosition node, update ISelPosition.
	///
	void NodeDeleted(SDNode N, SDNode E) override {
	if (ISelPosition == SelectionDAG::allnodes_iterator(N))
	++ISelPosition;
	}

	/// NodeInserted - Handle new nodes inserted into the graph: propagate
	/// metadata from root nodes that also applies to new nodes, in case the root
	/// is later deleted.
	void NodeInserted(SDNode *N) override {
	SDNode CurNode = &ISelPosition;
	if (MDNode *MD = DAG.getPCSections(CurNode))
	DAG.addPCSections(N, MD);
	if (MDNode *MMRA = DAG.getMMRAMetadata(CurNode))
	DAG.addMMRAMetadata(N, MMRA);
	}
	};

	} // end anonymous namespace

	// This function is used to enforce the topological node id property
	// leveraged during instruction selection. Before the selection process all
	// nodes are given a non-negative id such that all nodes have a greater id than
	// their operands. As this holds transitively we can prune checks that a node N
	// is a predecessor of M another by not recursively checking through M's
	// operands if N's ID is larger than M's ID. This significantly improves
	// performance of various legality checks (e.g. IsLegalToFold / UpdateChains).

	// However, when we fuse multiple nodes into a single node during the
	// selection we may induce a predecessor relationship between inputs and
	// outputs of distinct nodes being merged, violating the topological property.
	// Should a fused node have a successor which has yet to be selected,
	// our legality checks would be incorrect. To avoid this we mark all unselected
	// successor nodes, i.e. id != -1, as invalid for pruning by bit-negating (x =>
	// (-(x+1))) the ids and modify our pruning check to ignore negative Ids of M.
	// We use bit-negation to more clearly enforce that node id -1 can only be
	// achieved by selected nodes. As the conversion is reversable to the original
	// Id, topological pruning can still be leveraged when looking for unselected
	// nodes. This method is called internally in all ISel replacement related
	// functions.
	void SelectionDAGISel::EnforceNodeIdInvariant(SDNode *Node) {
	SmallVector<SDNode *, 4> Nodes;
	Nodes.push_back(Node);

	while (!Nodes.empty()) {
	SDNode *N = Nodes.pop_back_val();
	for (auto *U : N->uses()) {
	auto UId = U->getNodeId();
	if (UId > 0) {
	InvalidateNodeId(U);
	Nodes.push_back(U);
	}
	}
	}
	}

	// InvalidateNodeId - As explained in EnforceNodeIdInvariant, mark a
	// NodeId with the equivalent node id which is invalid for topological
	// pruning.
	void SelectionDAGISel::InvalidateNodeId(SDNode *N) {
	int InvalidId = -(N->getNodeId() + 1);
	N->setNodeId(InvalidId);
	}

	// getUninvalidatedNodeId - get original uninvalidated node id.
	int SelectionDAGISel::getUninvalidatedNodeId(SDNode *N) {
	int Id = N->getNodeId();
	if (Id < -1)
	return -(Id + 1);
	return Id;
	}

	void SelectionDAGISel::DoInstructionSelection() {
	LLVM_DEBUG(dbgs() << "===== Instruction selection begins: "
	<< printMBBReference(*FuncInfo->MBB) << " '"
	<< FuncInfo->MBB->getName() << "'\n");

	PreprocessISelDAG();

	// Select target instructions for the DAG.
	{
	// Number all nodes with a topological order and set DAGSize.
	DAGSize = CurDAG->AssignTopologicalOrder();

	// Create a dummy node (which is not added to allnodes), that adds
	// a reference to the root node, preventing it from being deleted,
	// and tracking any changes of the root.
	HandleSDNode Dummy(CurDAG->getRoot());
	SelectionDAG::allnodes_iterator ISelPosition (CurDAG->getRoot().getNode());
	++ISelPosition;

	// Make sure that ISelPosition gets properly updated when nodes are deleted
	// in calls made from this function. New nodes inherit relevant metadata.
	ISelUpdater ISU(*CurDAG, ISelPosition);

	// The AllNodes list is now topological-sorted. Visit the
	// nodes by starting at the end of the list (the root of the
	// graph) and preceding back toward the beginning (the entry
	// node).
	while (ISelPosition != CurDAG->allnodes_begin()) {
	SDNode Node = &--ISelPosition;
	// Skip dead nodes. DAGCombiner is expected to eliminate all dead nodes,
	// but there are currently some corner cases that it misses. Also, this
	// makes it theoretically possible to disable the DAGCombiner.
	if (Node->use_empty())
	continue;

	#ifndef NDEBUG
	SmallVector<SDNode *, 4> Nodes;
	Nodes.push_back(Node);

	while (!Nodes.empty()) {
	auto N = Nodes.pop_back_val();
	if (N->getOpcode() == ISD::TokenFactor \|\| N->getNodeId() < 0)
	continue;
	for (const SDValue &Op : N->op_values()) {
	if (Op->getOpcode() == ISD::TokenFactor)
	Nodes.push_back(Op.getNode());
	else {
	// We rely on topological ordering of node ids for checking for
	// cycles when fusing nodes during selection. All unselected nodes
	// successors of an already selected node should have a negative id.
	// This assertion will catch such cases. If this assertion triggers
	// it is likely you using DAG-level Value/Node replacement functions
	// (versus equivalent ISEL replacement) in backend-specific
	// selections. See comment in EnforceNodeIdInvariant for more
	// details.
	assert(Op->getNodeId() != -1 &&
	"Node has already selected predecessor node");
	}
	}
	}
	#endif

	// When we are using non-default rounding modes or FP exception behavior
	// FP operations are represented by StrictFP pseudo-operations. For
	// targets that do not (yet) understand strict FP operations directly,
	// we convert them to normal FP opcodes instead at this point. This
	// will allow them to be handled by existing target-specific instruction
	// selectors.
	if (!TLI->isStrictFPEnabled() && Node->isStrictFPOpcode()) {
	// For some opcodes, we need to call TLI->getOperationAction using
	// the first operand type instead of the result type. Note that this
	// must match what SelectionDAGLegalize::LegalizeOp is doing.
	EVT ActionVT;
	switch (Node->getOpcode()) {
	case ISD::STRICT_SINT_TO_FP:
	case ISD::STRICT_UINT_TO_FP:
	case ISD::STRICT_LRINT:
	case ISD::STRICT_LLRINT:
	case ISD::STRICT_LROUND:
	case ISD::STRICT_LLROUND:
	case ISD::STRICT_FSETCC:
	case ISD::STRICT_FSETCCS:
	ActionVT = Node->getOperand(1).getValueType();
	break;
	default:
	ActionVT = Node->getValueType(0);
	break;
	}
	if (TLI->getOperationAction(Node->getOpcode(), ActionVT)
	== TargetLowering::Expand)
	Node = CurDAG->mutateStrictFPToFP(Node);
	}

	LLVM_DEBUG(dbgs() << "\nISEL: Starting selection on root node: ";
	Node->dump(CurDAG));

	Select(Node);
	}

	CurDAG->setRoot(Dummy.getValue());
	}

	LLVM_DEBUG(dbgs() << "\n===== Instruction selection ends:\n");

	PostprocessISelDAG();
	}

	static bool hasExceptionPointerOrCodeUser(const CatchPadInst *CPI) {
	for (const User *U : CPI->users()) {
	if (const IntrinsicInst *EHPtrCall = dyn_cast<IntrinsicInst>(U)) {
	Intrinsic::ID IID = EHPtrCall->getIntrinsicID();
	if (IID == Intrinsic::eh_exceptionpointer \|\|
	IID == Intrinsic::eh_exceptioncode)
	return true;
	}
	}
	return false;
	}

	// wasm.landingpad.index intrinsic is for associating a landing pad index number
	// with a catchpad instruction. Retrieve the landing pad index in the intrinsic
	// and store the mapping in the function.
	static void mapWasmLandingPadIndex(MachineBasicBlock *MBB,
	const CatchPadInst *CPI) {
	MachineFunction *MF = MBB->getParent();
	// In case of single catch (...), we don't emit LSDA, so we don't need
	// this information.
	bool IsSingleCatchAllClause =
	CPI->arg_size() == 1 &&
	cast<Constant>(CPI->getArgOperand(0))->isNullValue();
	// cathchpads for longjmp use an empty type list, e.g. catchpad within %0 []
	// and they don't need LSDA info
	bool IsCatchLongjmp = CPI->arg_size() == 0;
	if (!IsSingleCatchAllClause && !IsCatchLongjmp) {
	// Create a mapping from landing pad label to landing pad index.
	bool IntrFound = false;
	for (const User *U : CPI->users()) {
	if (const auto *Call = dyn_cast<IntrinsicInst>(U)) {
	Intrinsic::ID IID = Call->getIntrinsicID();
	if (IID == Intrinsic::wasm_landingpad_index) {
	Value *IndexArg = Call->getArgOperand(1);
	int Index = cast<ConstantInt>(IndexArg)->getZExtValue();
	MF->setWasmLandingPadIndex(MBB, Index);
	IntrFound = true;
	break;
	}
	}
	}
	assert(IntrFound && "wasm.landingpad.index intrinsic not found!");
	(void)IntrFound;
	}
	}

	/// PrepareEHLandingPad - Emit an EH_LABEL, set up live-in registers, and
	/// do other setup for EH landing-pad blocks.
	bool SelectionDAGISel::PrepareEHLandingPad() {
	MachineBasicBlock *MBB = FuncInfo->MBB;
	const Constant *PersonalityFn = FuncInfo->Fn->getPersonalityFn();
	const BasicBlock *LLVMBB = MBB->getBasicBlock();
	const TargetRegisterClass *PtrRC =
	TLI->getRegClassFor(TLI->getPointerTy(CurDAG->getDataLayout()));

	auto Pers = classifyEHPersonality(PersonalityFn);

	// Catchpads have one live-in register, which typically holds the exception
	// pointer or code.
	if (isFuncletEHPersonality(Pers)) {
	if (const auto *CPI = dyn_cast<CatchPadInst>(LLVMBB->getFirstNonPHI())) {
	if (hasExceptionPointerOrCodeUser(CPI)) {
	// Get or create the virtual register to hold the pointer or code. Mark
	// the live in physreg and copy into the vreg.
	MCPhysReg EHPhysReg = TLI->getExceptionPointerRegister(PersonalityFn);
	assert(EHPhysReg && "target lacks exception pointer register");
	MBB->addLiveIn(EHPhysReg);
	unsigned VReg = FuncInfo->getCatchPadExceptionPointerVReg(CPI, PtrRC);
	BuildMI(*MBB, FuncInfo->InsertPt, SDB->getCurDebugLoc(),
	TII->get(TargetOpcode::COPY), VReg)
	.addReg(EHPhysReg, RegState::Kill);
	}
	}
	return true;
	}

	// Add a label to mark the beginning of the landing pad. Deletion of the
	// landing pad can thus be detected via the MachineModuleInfo.
	MCSymbol *Label = MF->addLandingPad(MBB);

	const MCInstrDesc &II = TII->get(TargetOpcode::EH_LABEL);
	BuildMI(*MBB, FuncInfo->InsertPt, SDB->getCurDebugLoc(), II)
	.addSym(Label);

	// If the unwinder does not preserve all registers, ensure that the
	// function marks the clobbered registers as used.
	const TargetRegisterInfo &TRI = *MF->getSubtarget().getRegisterInfo();
	if (auto RegMask = TRI.getCustomEHPadPreservedMask(MF))
	MF->getRegInfo().addPhysRegsUsedFromRegMask(RegMask);

	if (Pers == EHPersonality::Wasm_CXX) {
	if (const auto *CPI = dyn_cast<CatchPadInst>(LLVMBB->getFirstNonPHI()))
	mapWasmLandingPadIndex(MBB, CPI);
	} else {
	// Assign the call site to the landing pad's begin label.
	MF->setCallSiteLandingPad(Label, SDB->LPadToCallSiteMap[MBB]);
	// Mark exception register as live in.
	if (unsigned Reg = TLI->getExceptionPointerRegister(PersonalityFn))
	FuncInfo->ExceptionPointerVirtReg = MBB->addLiveIn(Reg, PtrRC);
	// Mark exception selector register as live in.
	if (unsigned Reg = TLI->getExceptionSelectorRegister(PersonalityFn))
	FuncInfo->ExceptionSelectorVirtReg = MBB->addLiveIn(Reg, PtrRC);
	}

	return true;
	}

	// Mark and Report IPToState for each Block under IsEHa
	void SelectionDAGISel::reportIPToStateForBlocks(MachineFunction *MF) {
	MachineModuleInfo &MMI = MF->getMMI();
	llvm::WinEHFuncInfo *EHInfo = MF->getWinEHFuncInfo();
	if (!EHInfo)
	return;
	for (MachineBasicBlock &MBB : *MF) {
	const BasicBlock *BB = MBB.getBasicBlock();
	int State = EHInfo->BlockToStateMap[BB];
	if (BB->getFirstMayFaultInst()) {
	// Report IP range only for blocks with Faulty inst
	auto MBBb = MBB.getFirstNonPHI();
	+
	+ if (MBBb == MBB.end())
	+ continue;
	+
	MachineInstr MIb = &MBBb;
	if (MIb->isTerminator())
	continue;

	// Insert EH Labels
	MCSymbol *BeginLabel = MMI.getContext().createTempSymbol();
	MCSymbol *EndLabel = MMI.getContext().createTempSymbol();
	EHInfo->addIPToStateRange(State, BeginLabel, EndLabel);
	BuildMI(MBB, MBBb, SDB->getCurDebugLoc(),
	TII->get(TargetOpcode::EH_LABEL))
	.addSym(BeginLabel);
	auto MBBe = MBB.instr_end();
	MachineInstr MIe = &(--MBBe);
	// insert before (possible multiple) terminators
	while (MIe->isTerminator())
	MIe = &*(--MBBe);
	++MBBe;
	BuildMI(MBB, MBBe, SDB->getCurDebugLoc(),
	TII->get(TargetOpcode::EH_LABEL))
	.addSym(EndLabel);
	}
	}
	}

	/// isFoldedOrDeadInstruction - Return true if the specified instruction is
	/// side-effect free and is either dead or folded into a generated instruction.
	/// Return false if it needs to be emitted.
	static bool isFoldedOrDeadInstruction(const Instruction *I,
	const FunctionLoweringInfo &FuncInfo) {
	return !I->mayWriteToMemory() && // Side-effecting instructions aren't folded.
	!I->isTerminator() && // Terminators aren't folded.
	!isa<DbgInfoIntrinsic>(I) && // Debug instructions aren't folded.
	!I->isEHPad() && // EH pad instructions aren't folded.
	!FuncInfo.isExportedInst(I); // Exported instrs must be computed.
	}

	static bool processIfEntryValueDbgDeclare(FunctionLoweringInfo &FuncInfo,
	const Value Arg, DIExpression Expr,
	DILocalVariable *Var,
	DebugLoc DbgLoc) {
	if (!Expr->isEntryValue() \|\| !isa<Argument>(Arg))
	return false;

	auto ArgIt = FuncInfo.ValueMap.find(Arg);
	if (ArgIt == FuncInfo.ValueMap.end())
	return false;
	Register ArgVReg = ArgIt->getSecond();

	// Find the corresponding livein physical register to this argument.
	for (auto [PhysReg, VirtReg] : FuncInfo.RegInfo->liveins())
	if (VirtReg == ArgVReg) {
	// Append an op deref to account for the fact that this is a dbg_declare.
	Expr = DIExpression::append(Expr, dwarf::DW_OP_deref);
	FuncInfo.MF->setVariableDbgInfo(Var, Expr, PhysReg, DbgLoc);
	LLVM_DEBUG(dbgs() << "processDbgDeclare: setVariableDbgInfo Var=" << *Var
	<< ", Expr=" << *Expr << ", MCRegister=" << PhysReg
	<< ", DbgLoc=" << DbgLoc << "\n");
	return true;
	}
	return false;
	}

	static bool processDbgDeclare(FunctionLoweringInfo &FuncInfo,
	const Value Address, DIExpression Expr,
	DILocalVariable *Var, DebugLoc DbgLoc) {
	if (!Address) {
	LLVM_DEBUG(dbgs() << "processDbgDeclares skipping " << *Var
	<< " (bad address)\n");
	return false;
	}

	if (processIfEntryValueDbgDeclare(FuncInfo, Address, Expr, Var, DbgLoc))
	return true;

	MachineFunction *MF = FuncInfo.MF;
	const DataLayout &DL = MF->getDataLayout();

	assert(Var && "Missing variable");
	assert(DbgLoc && "Missing location");

	// Look through casts and constant offset GEPs. These mostly come from
	// inalloca.
	APInt Offset(DL.getTypeSizeInBits(Address->getType()), 0);
	Address = Address->stripAndAccumulateInBoundsConstantOffsets(DL, Offset);

	// Check if the variable is a static alloca or a byval or inalloca
	// argument passed in memory. If it is not, then we will ignore this
	// intrinsic and handle this during isel like dbg.value.
	int FI = std::numeric_limits<int>::max();
	if (const auto *AI = dyn_cast<AllocaInst>(Address)) {
	auto SI = FuncInfo.StaticAllocaMap.find(AI);
	if (SI != FuncInfo.StaticAllocaMap.end())
	FI = SI->second;
	} else if (const auto *Arg = dyn_cast<Argument>(Address))
	FI = FuncInfo.getArgumentFrameIndex(Arg);

	if (FI == std::numeric_limits<int>::max())
	return false;

	if (Offset.getBoolValue())
	Expr = DIExpression::prepend(Expr, DIExpression::ApplyOffset,
	Offset.getZExtValue());

	LLVM_DEBUG(dbgs() << "processDbgDeclare: setVariableDbgInfo Var=" << *Var
	<< ", Expr=" << *Expr << ", FI=" << FI
	<< ", DbgLoc=" << DbgLoc << "\n");
	MF->setVariableDbgInfo(Var, Expr, FI, DbgLoc);
	return true;
	}

	/// Collect llvm.dbg.declare information. This is done after argument lowering
	/// in case the declarations refer to arguments.
	static void processDbgDeclares(FunctionLoweringInfo &FuncInfo) {
	for (const auto &I : instructions(*FuncInfo.Fn)) {
	const auto *DI = dyn_cast<DbgDeclareInst>(&I);
	if (DI && processDbgDeclare(FuncInfo, DI->getAddress(), DI->getExpression(),
	DI->getVariable(), DI->getDebugLoc()))
	FuncInfo.PreprocessedDbgDeclares.insert(DI);
	for (const DbgVariableRecord &DVR : filterDbgVars(I.getDbgRecordRange())) {
	if (DVR.Type == DbgVariableRecord::LocationType::Declare &&
	processDbgDeclare(FuncInfo, DVR.getVariableLocationOp(0),
	DVR.getExpression(), DVR.getVariable(),
	DVR.getDebugLoc()))
	FuncInfo.PreprocessedDVRDeclares.insert(&DVR);
	}
	}
	}

	/// Collect single location variable information generated with assignment
	/// tracking. This is done after argument lowering in case the declarations
	/// refer to arguments.
	static void processSingleLocVars(FunctionLoweringInfo &FuncInfo,
	FunctionVarLocs const *FnVarLocs) {
	for (auto It = FnVarLocs->single_locs_begin(),
	End = FnVarLocs->single_locs_end();
	It != End; ++It) {
	assert(!It->Values.hasArgList() && "Single loc variadic ops not supported");
	processDbgDeclare(FuncInfo, It->Values.getVariableLocationOp(0), It->Expr,
	FnVarLocs->getDILocalVariable(It->VariableID), It->DL);
	}
	}

	void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
	FastISelFailed = false;
	// Initialize the Fast-ISel state, if needed.
	FastISel *FastIS = nullptr;
	if (TM.Options.EnableFastISel) {
	LLVM_DEBUG(dbgs() << "Enabling fast-isel\n");
	FastIS = TLI->createFastISel(*FuncInfo, LibInfo);
	}

	ReversePostOrderTraversal<const Function*> RPOT(&Fn);

	// Lower arguments up front. An RPO iteration always visits the entry block
	// first.
	assert(*RPOT.begin() == &Fn.getEntryBlock());
	++NumEntryBlocks;

	// Set up FuncInfo for ISel. Entry blocks never have PHIs.
	FuncInfo->MBB = FuncInfo->MBBMap[&Fn.getEntryBlock()];
	FuncInfo->InsertPt = FuncInfo->MBB->begin();

	CurDAG->setFunctionLoweringInfo(FuncInfo.get());

	if (!FastIS) {
	LowerArguments(Fn);
	} else {
	// See if fast isel can lower the arguments.
	FastIS->startNewBlock();
	if (!FastIS->lowerArguments()) {
	FastISelFailed = true;
	// Fast isel failed to lower these arguments
	++NumFastIselFailLowerArguments;

	OptimizationRemarkMissed R("sdagisel", "FastISelFailure",
	Fn.getSubprogram(),
	&Fn.getEntryBlock());
	R << "FastISel didn't lower all arguments: "
	<< ore::NV("Prototype", Fn.getFunctionType());
	reportFastISelFailure(MF, ORE, R, EnableFastISelAbort > 1);

	// Use SelectionDAG argument lowering
	LowerArguments(Fn);
	CurDAG->setRoot(SDB->getControlRoot());
	SDB->clear();
	CodeGenAndEmitDAG();
	}

	// If we inserted any instructions at the beginning, make a note of
	// where they are, so we can be sure to emit subsequent instructions
	// after them.
	if (FuncInfo->InsertPt != FuncInfo->MBB->begin())
	FastIS->setLastLocalValue(&*std::prev(FuncInfo->InsertPt));
	else
	FastIS->setLastLocalValue(nullptr);
	}

	bool Inserted = SwiftError->createEntriesInEntryBlock(SDB->getCurDebugLoc());

	if (FastIS && Inserted)
	FastIS->setLastLocalValue(&*std::prev(FuncInfo->InsertPt));

	if (isAssignmentTrackingEnabled(*Fn.getParent())) {
	assert(CurDAG->getFunctionVarLocs() &&
	"expected AssignmentTrackingAnalysis pass results");
	processSingleLocVars(*FuncInfo, CurDAG->getFunctionVarLocs());
	} else {
	processDbgDeclares(*FuncInfo);
	}

	// Iterate over all basic blocks in the function.
	for (const BasicBlock *LLVMBB : RPOT) {
	if (OptLevel != CodeGenOptLevel::None) {
	bool AllPredsVisited = true;
	for (const BasicBlock *Pred : predecessors(LLVMBB)) {
	if (!FuncInfo->VisitedBBs.count(Pred)) {
	AllPredsVisited = false;
	break;
	}
	}

	if (AllPredsVisited) {
	for (const PHINode &PN : LLVMBB->phis())
	FuncInfo->ComputePHILiveOutRegInfo(&PN);
	} else {
	for (const PHINode &PN : LLVMBB->phis())
	FuncInfo->InvalidatePHILiveOutRegInfo(&PN);
	}

	FuncInfo->VisitedBBs.insert(LLVMBB);
	}

	BasicBlock::const_iterator const Begin =
	LLVMBB->getFirstNonPHI()->getIterator();
	BasicBlock::const_iterator const End = LLVMBB->end();
	BasicBlock::const_iterator BI = End;

	FuncInfo->MBB = FuncInfo->MBBMap[LLVMBB];
	if (!FuncInfo->MBB)
	continue; // Some blocks like catchpads have no code or MBB.

	// Insert new instructions after any phi or argument setup code.
	FuncInfo->InsertPt = FuncInfo->MBB->end();

	// Setup an EH landing-pad block.
	FuncInfo->ExceptionPointerVirtReg = 0;
	FuncInfo->ExceptionSelectorVirtReg = 0;
	if (LLVMBB->isEHPad())
	if (!PrepareEHLandingPad())
	continue;

	// Before doing SelectionDAG ISel, see if FastISel has been requested.
	if (FastIS) {
	if (LLVMBB != &Fn.getEntryBlock())
	FastIS->startNewBlock();

	unsigned NumFastIselRemaining = std::distance(Begin, End);

	// Pre-assign swifterror vregs.
	SwiftError->preassignVRegs(FuncInfo->MBB, Begin, End);

	// Do FastISel on as many instructions as possible.
	for (; BI != Begin; --BI) {
	const Instruction Inst = &std::prev(BI);

	// If we no longer require this instruction, skip it.
	if (isFoldedOrDeadInstruction(Inst, *FuncInfo) \|\|
	ElidedArgCopyInstrs.count(Inst)) {
	--NumFastIselRemaining;
	FastIS->handleDbgInfo(Inst);
	continue;
	}

	// Bottom-up: reset the insert pos at the top, after any local-value
	// instructions.
	FastIS->recomputeInsertPt();

	// Try to select the instruction with FastISel.
	if (FastIS->selectInstruction(Inst)) {
	--NumFastIselRemaining;
	++NumFastIselSuccess;

	FastIS->handleDbgInfo(Inst);
	// If fast isel succeeded, skip over all the folded instructions, and
	// then see if there is a load right before the selected instructions.
	// Try to fold the load if so.
	const Instruction *BeforeInst = Inst;
	while (BeforeInst != &*Begin) {
	BeforeInst = &*std::prev(BasicBlock::const_iterator(BeforeInst));
	if (!isFoldedOrDeadInstruction(BeforeInst, *FuncInfo))
	break;
	}
	if (BeforeInst != Inst && isa<LoadInst>(BeforeInst) &&
	BeforeInst->hasOneUse() &&
	FastIS->tryToFoldLoad(cast<LoadInst>(BeforeInst), Inst)) {
	// If we succeeded, don't re-select the load.
	LLVM_DEBUG(dbgs()
	<< "FastISel folded load: " << *BeforeInst << "\n");
	FastIS->handleDbgInfo(BeforeInst);
	BI = std::next(BasicBlock::const_iterator(BeforeInst));
	--NumFastIselRemaining;
	++NumFastIselSuccess;
	}
	continue;
	}

	FastISelFailed = true;

	// Then handle certain instructions as single-LLVM-Instruction blocks.
	// We cannot separate out GCrelocates to their own blocks since we need
	// to keep track of gc-relocates for a particular gc-statepoint. This is
	// done by SelectionDAGBuilder::LowerAsSTATEPOINT, called before
	// visitGCRelocate.
	if (isa<CallInst>(Inst) && !isa<GCStatepointInst>(Inst) &&
	!isa<GCRelocateInst>(Inst) && !isa<GCResultInst>(Inst)) {
	OptimizationRemarkMissed R("sdagisel", "FastISelFailure",
	Inst->getDebugLoc(), LLVMBB);

	R << "FastISel missed call";

	if (R.isEnabled() \|\| EnableFastISelAbort) {
	std::string InstStrStorage;
	raw_string_ostream InstStr(InstStrStorage);
	InstStr << *Inst;

	R << ": " << InstStrStorage;
	}

	reportFastISelFailure(MF, ORE, R, EnableFastISelAbort > 2);

	if (!Inst->getType()->isVoidTy() && !Inst->getType()->isTokenTy() &&
	!Inst->use_empty()) {
	Register &R = FuncInfo->ValueMap[Inst];
	if (!R)
	R = FuncInfo->CreateRegs(Inst);
	}

	bool HadTailCall = false;
	MachineBasicBlock::iterator SavedInsertPt = FuncInfo->InsertPt;
	SelectBasicBlock(Inst->getIterator(), BI, HadTailCall);

	// If the call was emitted as a tail call, we're done with the block.
	// We also need to delete any previously emitted instructions.
	if (HadTailCall) {
	FastIS->removeDeadCode(SavedInsertPt, FuncInfo->MBB->end());
	--BI;
	break;
	}

	// Recompute NumFastIselRemaining as Selection DAG instruction
	// selection may have handled the call, input args, etc.
	unsigned RemainingNow = std::distance(Begin, BI);
	NumFastIselFailures += NumFastIselRemaining - RemainingNow;
	NumFastIselRemaining = RemainingNow;
	continue;
	}

	OptimizationRemarkMissed R("sdagisel", "FastISelFailure",
	Inst->getDebugLoc(), LLVMBB);

	bool ShouldAbort = EnableFastISelAbort;
	if (Inst->isTerminator()) {
	// Use a different message for terminator misses.
	R << "FastISel missed terminator";
	// Don't abort for terminator unless the level is really high
	ShouldAbort = (EnableFastISelAbort > 2);
	} else {
	R << "FastISel missed";
	}

	if (R.isEnabled() \|\| EnableFastISelAbort) {
	std::string InstStrStorage;
	raw_string_ostream InstStr(InstStrStorage);
	InstStr << *Inst;
	R << ": " << InstStrStorage;
	}

	reportFastISelFailure(MF, ORE, R, ShouldAbort);

	NumFastIselFailures += NumFastIselRemaining;
	break;
	}

	FastIS->recomputeInsertPt();
	}

	if (SP->shouldEmitSDCheck(*LLVMBB)) {
	bool FunctionBasedInstrumentation =
	TLI->getSSPStackGuardCheck(*Fn.getParent());
	SDB->SPDescriptor.initialize(LLVMBB, FuncInfo->MBBMap[LLVMBB],
	FunctionBasedInstrumentation);
	}

	if (Begin != BI)
	++NumDAGBlocks;
	else
	++NumFastIselBlocks;

	if (Begin != BI) {
	// Run SelectionDAG instruction selection on the remainder of the block
	// not handled by FastISel. If FastISel is not run, this is the entire
	// block.
	bool HadTailCall;
	SelectBasicBlock(Begin, BI, HadTailCall);

	// But if FastISel was run, we already selected some of the block.
	// If we emitted a tail-call, we need to delete any previously emitted
	// instruction that follows it.
	if (FastIS && HadTailCall && FuncInfo->InsertPt != FuncInfo->MBB->end())
	FastIS->removeDeadCode(FuncInfo->InsertPt, FuncInfo->MBB->end());
	}

	if (FastIS)
	FastIS->finishBasicBlock();
	FinishBasicBlock();
	FuncInfo->PHINodesToUpdate.clear();
	ElidedArgCopyInstrs.clear();
	}

	// AsynchEH: Report Block State under -AsynchEH
	if (Fn.getParent()->getModuleFlag("eh-asynch"))
	reportIPToStateForBlocks(MF);

	SP->copyToMachineFrameInfo(MF->getFrameInfo());

	SwiftError->propagateVRegs();

	delete FastIS;
	SDB->clearDanglingDebugInfo();
	SDB->SPDescriptor.resetPerFunctionState();
	}

	void
	SelectionDAGISel::FinishBasicBlock() {
	LLVM_DEBUG(dbgs() << "Total amount of phi nodes to update: "
	<< FuncInfo->PHINodesToUpdate.size() << "\n";
	for (unsigned i = 0, e = FuncInfo->PHINodesToUpdate.size(); i != e;
	++i) dbgs()
	<< "Node " << i << " : (" << FuncInfo->PHINodesToUpdate[i].first
	<< ", " << FuncInfo->PHINodesToUpdate[i].second << ")\n");

	// Next, now that we know what the last MBB the LLVM BB expanded is, update
	// PHI nodes in successors.
	for (unsigned i = 0, e = FuncInfo->PHINodesToUpdate.size(); i != e; ++i) {
	MachineInstrBuilder PHI(*MF, FuncInfo->PHINodesToUpdate[i].first);
	assert(PHI->isPHI() &&
	"This is not a machine PHI node that we are updating!");
	if (!FuncInfo->MBB->isSuccessor(PHI->getParent()))
	continue;
	PHI.addReg(FuncInfo->PHINodesToUpdate[i].second).addMBB(FuncInfo->MBB);
	}

	// Handle stack protector.
	if (SDB->SPDescriptor.shouldEmitFunctionBasedCheckStackProtector()) {
	// The target provides a guard check function. There is no need to
	// generate error handling code or to split current basic block.
	MachineBasicBlock *ParentMBB = SDB->SPDescriptor.getParentMBB();

	// Add load and check to the basicblock.
	FuncInfo->MBB = ParentMBB;
	FuncInfo->InsertPt =
	findSplitPointForStackProtector(ParentMBB, *TII);
	SDB->visitSPDescriptorParent(SDB->SPDescriptor, ParentMBB);
	CurDAG->setRoot(SDB->getRoot());
	SDB->clear();
	CodeGenAndEmitDAG();

	// Clear the Per-BB State.
	SDB->SPDescriptor.resetPerBBState();
	} else if (SDB->SPDescriptor.shouldEmitStackProtector()) {
	MachineBasicBlock *ParentMBB = SDB->SPDescriptor.getParentMBB();
	MachineBasicBlock *SuccessMBB = SDB->SPDescriptor.getSuccessMBB();

	// Find the split point to split the parent mbb. At the same time copy all
	// physical registers used in the tail of parent mbb into virtual registers
	// before the split point and back into physical registers after the split
	// point. This prevents us needing to deal with Live-ins and many other
	// register allocation issues caused by us splitting the parent mbb. The
	// register allocator will clean up said virtual copies later on.
	MachineBasicBlock::iterator SplitPoint =
	findSplitPointForStackProtector(ParentMBB, *TII);

	// Splice the terminator of ParentMBB into SuccessMBB.
	SuccessMBB->splice(SuccessMBB->end(), ParentMBB,
	SplitPoint,
	ParentMBB->end());

	// Add compare/jump on neq/jump to the parent BB.
	FuncInfo->MBB = ParentMBB;
	FuncInfo->InsertPt = ParentMBB->end();
	SDB->visitSPDescriptorParent(SDB->SPDescriptor, ParentMBB);
	CurDAG->setRoot(SDB->getRoot());
	SDB->clear();
	CodeGenAndEmitDAG();

	// CodeGen Failure MBB if we have not codegened it yet.
	MachineBasicBlock *FailureMBB = SDB->SPDescriptor.getFailureMBB();
	if (FailureMBB->empty()) {
	FuncInfo->MBB = FailureMBB;
	FuncInfo->InsertPt = FailureMBB->end();
	SDB->visitSPDescriptorFailure(SDB->SPDescriptor);
	CurDAG->setRoot(SDB->getRoot());
	SDB->clear();
	CodeGenAndEmitDAG();
	}

	// Clear the Per-BB State.
	SDB->SPDescriptor.resetPerBBState();
	}

	// Lower each BitTestBlock.
	for (auto &BTB : SDB->SL->BitTestCases) {
	// Lower header first, if it wasn't already lowered
	if (!BTB.Emitted) {
	// Set the current basic block to the mbb we wish to insert the code into
	FuncInfo->MBB = BTB.Parent;
	FuncInfo->InsertPt = FuncInfo->MBB->end();
	// Emit the code
	SDB->visitBitTestHeader(BTB, FuncInfo->MBB);
	CurDAG->setRoot(SDB->getRoot());
	SDB->clear();
	CodeGenAndEmitDAG();
	}

	BranchProbability UnhandledProb = BTB.Prob;
	for (unsigned j = 0, ej = BTB.Cases.size(); j != ej; ++j) {
	UnhandledProb -= BTB.Cases[j].ExtraProb;
	// Set the current basic block to the mbb we wish to insert the code into
	FuncInfo->MBB = BTB.Cases[j].ThisBB;
	FuncInfo->InsertPt = FuncInfo->MBB->end();
	// Emit the code

	// If all cases cover a contiguous range, it is not necessary to jump to
	// the default block after the last bit test fails. This is because the
	// range check during bit test header creation has guaranteed that every
	// case here doesn't go outside the range. In this case, there is no need
	// to perform the last bit test, as it will always be true. Instead, make
	// the second-to-last bit-test fall through to the target of the last bit
	// test, and delete the last bit test.

	MachineBasicBlock *NextMBB;
	if ((BTB.ContiguousRange \|\| BTB.FallthroughUnreachable) && j + 2 == ej) {
	// Second-to-last bit-test with contiguous range or omitted range
	// check: fall through to the target of the final bit test.
	NextMBB = BTB.Cases[j + 1].TargetBB;
	} else if (j + 1 == ej) {
	// For the last bit test, fall through to Default.
	NextMBB = BTB.Default;
	} else {
	// Otherwise, fall through to the next bit test.
	NextMBB = BTB.Cases[j + 1].ThisBB;
	}

	SDB->visitBitTestCase(BTB, NextMBB, UnhandledProb, BTB.Reg, BTB.Cases[j],
	FuncInfo->MBB);

	CurDAG->setRoot(SDB->getRoot());
	SDB->clear();
	CodeGenAndEmitDAG();

	if ((BTB.ContiguousRange \|\| BTB.FallthroughUnreachable) && j + 2 == ej) {
	// Since we're not going to use the final bit test, remove it.
	BTB.Cases.pop_back();
	break;
	}
	}

	// Update PHI Nodes
	for (const std::pair<MachineInstr *, unsigned> &P :
	FuncInfo->PHINodesToUpdate) {
	MachineInstrBuilder PHI(*MF, P.first);
	MachineBasicBlock *PHIBB = PHI->getParent();
	assert(PHI->isPHI() &&
	"This is not a machine PHI node that we are updating!");
	// This is "default" BB. We have two jumps to it. From "header" BB and
	// from last "case" BB, unless the latter was skipped.
	if (PHIBB == BTB.Default) {
	PHI.addReg(P.second).addMBB(BTB.Parent);
	if (!BTB.ContiguousRange) {
	PHI.addReg(P.second).addMBB(BTB.Cases.back().ThisBB);
	}
	}
	// One of "cases" BB.
	for (const SwitchCG::BitTestCase &BT : BTB.Cases) {
	MachineBasicBlock* cBB = BT.ThisBB;
	if (cBB->isSuccessor(PHIBB))
	PHI.addReg(P.second).addMBB(cBB);
	}
	}
	}
	SDB->SL->BitTestCases.clear();

	// If the JumpTable record is filled in, then we need to emit a jump table.
	// Updating the PHI nodes is tricky in this case, since we need to determine
	// whether the PHI is a successor of the range check MBB or the jump table MBB
	for (unsigned i = 0, e = SDB->SL->JTCases.size(); i != e; ++i) {
	// Lower header first, if it wasn't already lowered
	if (!SDB->SL->JTCases[i].first.Emitted) {
	// Set the current basic block to the mbb we wish to insert the code into
	FuncInfo->MBB = SDB->SL->JTCases[i].first.HeaderBB;
	FuncInfo->InsertPt = FuncInfo->MBB->end();
	// Emit the code
	SDB->visitJumpTableHeader(SDB->SL->JTCases[i].second,
	SDB->SL->JTCases[i].first, FuncInfo->MBB);
	CurDAG->setRoot(SDB->getRoot());
	SDB->clear();
	CodeGenAndEmitDAG();
	}

	// Set the current basic block to the mbb we wish to insert the code into
	FuncInfo->MBB = SDB->SL->JTCases[i].second.MBB;
	FuncInfo->InsertPt = FuncInfo->MBB->end();
	// Emit the code
	SDB->visitJumpTable(SDB->SL->JTCases[i].second);
	CurDAG->setRoot(SDB->getRoot());
	SDB->clear();
	CodeGenAndEmitDAG();

	// Update PHI Nodes
	for (unsigned pi = 0, pe = FuncInfo->PHINodesToUpdate.size();
	pi != pe; ++pi) {
	MachineInstrBuilder PHI(*MF, FuncInfo->PHINodesToUpdate[pi].first);
	MachineBasicBlock *PHIBB = PHI->getParent();
	assert(PHI->isPHI() &&
	"This is not a machine PHI node that we are updating!");
	// "default" BB. We can go there only from header BB.
	if (PHIBB == SDB->SL->JTCases[i].second.Default)
	PHI.addReg(FuncInfo->PHINodesToUpdate[pi].second)
	.addMBB(SDB->SL->JTCases[i].first.HeaderBB);
	// JT BB. Just iterate over successors here
	if (FuncInfo->MBB->isSuccessor(PHIBB))
	PHI.addReg(FuncInfo->PHINodesToUpdate[pi].second).addMBB(FuncInfo->MBB);
	}
	}
	SDB->SL->JTCases.clear();

	// If we generated any switch lowering information, build and codegen any
	// additional DAGs necessary.
	for (unsigned i = 0, e = SDB->SL->SwitchCases.size(); i != e; ++i) {
	// Set the current basic block to the mbb we wish to insert the code into
	FuncInfo->MBB = SDB->SL->SwitchCases[i].ThisBB;
	FuncInfo->InsertPt = FuncInfo->MBB->end();

	// Determine the unique successors.
	SmallVector<MachineBasicBlock *, 2> Succs;
	Succs.push_back(SDB->SL->SwitchCases[i].TrueBB);
	if (SDB->SL->SwitchCases[i].TrueBB != SDB->SL->SwitchCases[i].FalseBB)
	Succs.push_back(SDB->SL->SwitchCases[i].FalseBB);

	// Emit the code. Note that this could result in FuncInfo->MBB being split.
	SDB->visitSwitchCase(SDB->SL->SwitchCases[i], FuncInfo->MBB);
	CurDAG->setRoot(SDB->getRoot());
	SDB->clear();
	CodeGenAndEmitDAG();

	// Remember the last block, now that any splitting is done, for use in
	// populating PHI nodes in successors.
	MachineBasicBlock *ThisBB = FuncInfo->MBB;

	// Handle any PHI nodes in successors of this chunk, as if we were coming
	// from the original BB before switch expansion. Note that PHI nodes can
	// occur multiple times in PHINodesToUpdate. We have to be very careful to
	// handle them the right number of times.
	for (MachineBasicBlock *Succ : Succs) {
	FuncInfo->MBB = Succ;
	FuncInfo->InsertPt = FuncInfo->MBB->end();
	// FuncInfo->MBB may have been removed from the CFG if a branch was
	// constant folded.
	if (ThisBB->isSuccessor(FuncInfo->MBB)) {
	for (MachineBasicBlock::iterator
	MBBI = FuncInfo->MBB->begin(), MBBE = FuncInfo->MBB->end();
	MBBI != MBBE && MBBI->isPHI(); ++MBBI) {
	MachineInstrBuilder PHI(*MF, MBBI);
	// This value for this PHI node is recorded in PHINodesToUpdate.
	for (unsigned pn = 0; ; ++pn) {
	assert(pn != FuncInfo->PHINodesToUpdate.size() &&
	"Didn't find PHI entry!");
	if (FuncInfo->PHINodesToUpdate[pn].first == PHI) {
	PHI.addReg(FuncInfo->PHINodesToUpdate[pn].second).addMBB(ThisBB);
	break;
	}
	}
	}
	}
	}
	}
	SDB->SL->SwitchCases.clear();
	}

	/// Create the scheduler. If a specific scheduler was specified
	/// via the SchedulerRegistry, use it, otherwise select the
	/// one preferred by the target.
	///
	ScheduleDAGSDNodes *SelectionDAGISel::CreateScheduler() {
	return ISHeuristic(this, OptLevel);
	}

	//===----------------------------------------------------------------------===//
	// Helper functions used by the generated instruction selector.
	//===----------------------------------------------------------------------===//
	// Calls to these methods are generated by tblgen.

	/// CheckAndMask - The isel is trying to match something like (and X, 255). If
	/// the dag combiner simplified the 255, we still want to match. RHS is the
	/// actual value in the DAG on the RHS of an AND, and DesiredMaskS is the value
	/// specified in the .td file (e.g. 255).
	bool SelectionDAGISel::CheckAndMask(SDValue LHS, ConstantSDNode *RHS,
	int64_t DesiredMaskS) const {
	const APInt &ActualMask = RHS->getAPIntValue();
	const APInt &DesiredMask = APInt(LHS.getValueSizeInBits(), DesiredMaskS);

	// If the actual mask exactly matches, success!
	if (ActualMask == DesiredMask)
	return true;

	// If the actual AND mask is allowing unallowed bits, this doesn't match.
	if (!ActualMask.isSubsetOf(DesiredMask))
	return false;

	// Otherwise, the DAG Combiner may have proven that the value coming in is
	// either already zero or is not demanded. Check for known zero input bits.
	APInt NeededMask = DesiredMask & ~ActualMask;
	if (CurDAG->MaskedValueIsZero(LHS, NeededMask))
	return true;

	// TODO: check to see if missing bits are just not demanded.

	// Otherwise, this pattern doesn't match.
	return false;
	}

	/// CheckOrMask - The isel is trying to match something like (or X, 255). If
	/// the dag combiner simplified the 255, we still want to match. RHS is the
	/// actual value in the DAG on the RHS of an OR, and DesiredMaskS is the value
	/// specified in the .td file (e.g. 255).
	bool SelectionDAGISel::CheckOrMask(SDValue LHS, ConstantSDNode *RHS,
	int64_t DesiredMaskS) const {
	const APInt &ActualMask = RHS->getAPIntValue();
	const APInt &DesiredMask = APInt(LHS.getValueSizeInBits(), DesiredMaskS);

	// If the actual mask exactly matches, success!
	if (ActualMask == DesiredMask)
	return true;

	// If the actual AND mask is allowing unallowed bits, this doesn't match.
	if (!ActualMask.isSubsetOf(DesiredMask))
	return false;

	// Otherwise, the DAG Combiner may have proven that the value coming in is
	// either already zero or is not demanded. Check for known zero input bits.
	APInt NeededMask = DesiredMask & ~ActualMask;
	KnownBits Known = CurDAG->computeKnownBits(LHS);

	// If all the missing bits in the or are already known to be set, match!
	if (NeededMask.isSubsetOf(Known.One))
	return true;

	// TODO: check to see if missing bits are just not demanded.

	// Otherwise, this pattern doesn't match.
	return false;
	}

	/// SelectInlineAsmMemoryOperands - Calls to this are automatically generated
	/// by tblgen. Others should not call it.
	void SelectionDAGISel::SelectInlineAsmMemoryOperands(std::vector<SDValue> &Ops,
	const SDLoc &DL) {
	// Change the vector of SDValue into a list of SDNodeHandle for x86 might call
	// replaceAllUses when matching address.

	std::list<HandleSDNode> Handles;

	Handles.emplace_back(Ops[InlineAsm::Op_InputChain]); // 0
	Handles.emplace_back(Ops[InlineAsm::Op_AsmString]); // 1
	Handles.emplace_back(Ops[InlineAsm::Op_MDNode]); // 2, !srcloc
	Handles.emplace_back(
	Ops[InlineAsm::Op_ExtraInfo]); // 3 (SideEffect, AlignStack)

	unsigned i = InlineAsm::Op_FirstOperand, e = Ops.size();
	if (Ops[e - 1].getValueType() == MVT::Glue)
	--e; // Don't process a glue operand if it is here.

	while (i != e) {
	InlineAsm::Flag Flags(Ops[i]->getAsZExtVal());
	if (!Flags.isMemKind() && !Flags.isFuncKind()) {
	// Just skip over this operand, copying the operands verbatim.
	Handles.insert(Handles.end(), Ops.begin() + i,
	Ops.begin() + i + Flags.getNumOperandRegisters() + 1);
	i += Flags.getNumOperandRegisters() + 1;
	} else {
	assert(Flags.getNumOperandRegisters() == 1 &&
	"Memory operand with multiple values?");

	unsigned TiedToOperand;
	if (Flags.isUseOperandTiedToDef(TiedToOperand)) {
	// We need the constraint ID from the operand this is tied to.
	unsigned CurOp = InlineAsm::Op_FirstOperand;
	Flags = InlineAsm::Flag(Ops[CurOp]->getAsZExtVal());
	for (; TiedToOperand; --TiedToOperand) {
	CurOp += Flags.getNumOperandRegisters() + 1;
	Flags = InlineAsm::Flag(Ops[CurOp]->getAsZExtVal());
	}
	}

	// Otherwise, this is a memory operand. Ask the target to select it.
	std::vector<SDValue> SelOps;
	const InlineAsm::ConstraintCode ConstraintID =
	Flags.getMemoryConstraintID();
	if (SelectInlineAsmMemoryOperand(Ops[i + 1], ConstraintID, SelOps))
	report_fatal_error("Could not match memory address. Inline asm"
	" failure!");

	// Add this to the output node.
	Flags = InlineAsm::Flag(Flags.isMemKind() ? InlineAsm::Kind::Mem
	: InlineAsm::Kind::Func,
	SelOps.size());
	Flags.setMemConstraint(ConstraintID);
	Handles.emplace_back(CurDAG->getTargetConstant(Flags, DL, MVT::i32));
	Handles.insert(Handles.end(), SelOps.begin(), SelOps.end());
	i += 2;
	}
	}

	// Add the glue input back if present.
	if (e != Ops.size())
	Handles.emplace_back(Ops.back());

	Ops.clear();
	for (auto &handle : Handles)
	Ops.push_back(handle.getValue());
	}

	/// findGlueUse - Return use of MVT::Glue value produced by the specified
	/// SDNode.
	///
	static SDNode findGlueUse(SDNode N) {
	unsigned FlagResNo = N->getNumValues()-1;
	for (SDNode::use_iterator I = N->use_begin(), E = N->use_end(); I != E; ++I) {
	SDUse &Use = I.getUse();
	if (Use.getResNo() == FlagResNo)
	return Use.getUser();
	}
	return nullptr;
	}

	/// findNonImmUse - Return true if "Def" is a predecessor of "Root" via a path
	/// beyond "ImmedUse". We may ignore chains as they are checked separately.
	static bool findNonImmUse(SDNode Root, SDNode Def, SDNode *ImmedUse,
	bool IgnoreChains) {
	SmallPtrSet<const SDNode *, 16> Visited;
	SmallVector<const SDNode *, 16> WorkList;
	// Only check if we have non-immediate uses of Def.
	if (ImmedUse->isOnlyUserOf(Def))
	return false;

	// We don't care about paths to Def that go through ImmedUse so mark it
	// visited and mark non-def operands as used.
	Visited.insert(ImmedUse);
	for (const SDValue &Op : ImmedUse->op_values()) {
	SDNode *N = Op.getNode();
	// Ignore chain deps (they are validated by
	// HandleMergeInputChains) and immediate uses
	if ((Op.getValueType() == MVT::Other && IgnoreChains) \|\| N == Def)
	continue;
	if (!Visited.insert(N).second)
	continue;
	WorkList.push_back(N);
	}

	// Initialize worklist to operands of Root.
	if (Root != ImmedUse) {
	for (const SDValue &Op : Root->op_values()) {
	SDNode *N = Op.getNode();
	// Ignore chains (they are validated by HandleMergeInputChains)
	if ((Op.getValueType() == MVT::Other && IgnoreChains) \|\| N == Def)
	continue;
	if (!Visited.insert(N).second)
	continue;
	WorkList.push_back(N);
	}
	}

	return SDNode::hasPredecessorHelper(Def, Visited, WorkList, 0, true);
	}

	/// IsProfitableToFold - Returns true if it's profitable to fold the specific
	/// operand node N of U during instruction selection that starts at Root.
	bool SelectionDAGISel::IsProfitableToFold(SDValue N, SDNode *U,
	SDNode *Root) const {
	if (OptLevel == CodeGenOptLevel::None)
	return false;
	return N.hasOneUse();
	}

	/// IsLegalToFold - Returns true if the specific operand node N of
	/// U can be folded during instruction selection that starts at Root.
	bool SelectionDAGISel::IsLegalToFold(SDValue N, SDNode U, SDNode Root,
	CodeGenOptLevel OptLevel,
	bool IgnoreChains) {
	if (OptLevel == CodeGenOptLevel::None)
	return false;

	// If Root use can somehow reach N through a path that doesn't contain
	// U then folding N would create a cycle. e.g. In the following
	// diagram, Root can reach N through X. If N is folded into Root, then
	// X is both a predecessor and a successor of U.
	//
	// [N*] //
	// ^ ^ //
	// / \ //
	// [U*] [X]? //
	// ^ ^ //
	// \ / //
	// \ / //
	// [Root*] //
	//
	// * indicates nodes to be folded together.
	//
	// If Root produces glue, then it gets (even more) interesting. Since it
	// will be "glued" together with its glue use in the scheduler, we need to
	// check if it might reach N.
	//
	// [N*] //
	// ^ ^ //
	// / \ //
	// [U*] [X]? //
	// ^ ^ //
	// \ \ //
	// \ \| //
	// [Root*] \| //
	// ^ \| //
	// f \| //
	// \| / //
	// [Y] / //
	// ^ / //
	// f / //
	// \| / //
	// [GU] //
	//
	// If GU (glue use) indirectly reaches N (the load), and Root folds N
	// (call it Fold), then X is a predecessor of GU and a successor of
	// Fold. But since Fold and GU are glued together, this will create
	// a cycle in the scheduling graph.

	// If the node has glue, walk down the graph to the "lowest" node in the
	// glueged set.
	EVT VT = Root->getValueType(Root->getNumValues()-1);
	while (VT == MVT::Glue) {
	SDNode *GU = findGlueUse(Root);
	if (!GU)
	break;
	Root = GU;
	VT = Root->getValueType(Root->getNumValues()-1);

	// If our query node has a glue result with a use, we've walked up it. If
	// the user (which has already been selected) has a chain or indirectly uses
	// the chain, HandleMergeInputChains will not consider it. Because of
	// this, we cannot ignore chains in this predicate.
	IgnoreChains = false;
	}

	return !findNonImmUse(Root, N.getNode(), U, IgnoreChains);
	}

	void SelectionDAGISel::Select_INLINEASM(SDNode *N) {
	SDLoc DL(N);

	std::vector<SDValue> Ops(N->op_begin(), N->op_end());
	SelectInlineAsmMemoryOperands(Ops, DL);

	const EVT VTs[] = {MVT::Other, MVT::Glue};
	SDValue New = CurDAG->getNode(N->getOpcode(), DL, VTs, Ops);
	New->setNodeId(-1);
	ReplaceUses(N, New.getNode());
	CurDAG->RemoveDeadNode(N);
	}

	void SelectionDAGISel::Select_READ_REGISTER(SDNode *Op) {
	SDLoc dl(Op);
	MDNodeSDNode *MD = cast<MDNodeSDNode>(Op->getOperand(1));
	const MDString *RegStr = cast<MDString>(MD->getMD()->getOperand(0));

	EVT VT = Op->getValueType(0);
	LLT Ty = VT.isSimple() ? getLLTForMVT(VT.getSimpleVT()) : LLT();
	Register Reg =
	TLI->getRegisterByName(RegStr->getString().data(), Ty,
	CurDAG->getMachineFunction());
	SDValue New = CurDAG->getCopyFromReg(
	Op->getOperand(0), dl, Reg, Op->getValueType(0));
	New->setNodeId(-1);
	ReplaceUses(Op, New.getNode());
	CurDAG->RemoveDeadNode(Op);
	}

	void SelectionDAGISel::Select_WRITE_REGISTER(SDNode *Op) {
	SDLoc dl(Op);
	MDNodeSDNode *MD = cast<MDNodeSDNode>(Op->getOperand(1));
	const MDString *RegStr = cast<MDString>(MD->getMD()->getOperand(0));

	EVT VT = Op->getOperand(2).getValueType();
	LLT Ty = VT.isSimple() ? getLLTForMVT(VT.getSimpleVT()) : LLT();

	Register Reg = TLI->getRegisterByName(RegStr->getString().data(), Ty,
	CurDAG->getMachineFunction());
	SDValue New = CurDAG->getCopyToReg(
	Op->getOperand(0), dl, Reg, Op->getOperand(2));
	New->setNodeId(-1);
	ReplaceUses(Op, New.getNode());
	CurDAG->RemoveDeadNode(Op);
	}

	void SelectionDAGISel::Select_UNDEF(SDNode *N) {
	CurDAG->SelectNodeTo(N, TargetOpcode::IMPLICIT_DEF, N->getValueType(0));
	}

	void SelectionDAGISel::Select_FREEZE(SDNode *N) {
	// TODO: We don't have FREEZE pseudo-instruction in MachineInstr-level now.
	// If FREEZE instruction is added later, the code below must be changed as
	// well.
	CurDAG->SelectNodeTo(N, TargetOpcode::COPY, N->getValueType(0),
	N->getOperand(0));
	}

	void SelectionDAGISel::Select_ARITH_FENCE(SDNode *N) {
	CurDAG->SelectNodeTo(N, TargetOpcode::ARITH_FENCE, N->getValueType(0),
	N->getOperand(0));
	}

	void SelectionDAGISel::Select_MEMBARRIER(SDNode *N) {
	CurDAG->SelectNodeTo(N, TargetOpcode::MEMBARRIER, N->getValueType(0),
	N->getOperand(0));
	}

	void SelectionDAGISel::Select_CONVERGENCECTRL_ANCHOR(SDNode *N) {
	CurDAG->SelectNodeTo(N, TargetOpcode::CONVERGENCECTRL_ANCHOR,
	N->getValueType(0));
	}

	void SelectionDAGISel::Select_CONVERGENCECTRL_ENTRY(SDNode *N) {
	CurDAG->SelectNodeTo(N, TargetOpcode::CONVERGENCECTRL_ENTRY,
	N->getValueType(0));
	}

	void SelectionDAGISel::Select_CONVERGENCECTRL_LOOP(SDNode *N) {
	CurDAG->SelectNodeTo(N, TargetOpcode::CONVERGENCECTRL_LOOP,
	N->getValueType(0), N->getOperand(0));
	}

	void SelectionDAGISel::pushStackMapLiveVariable(SmallVectorImpl<SDValue> &Ops,
	SDValue OpVal, SDLoc DL) {
	SDNode *OpNode = OpVal.getNode();

	// FrameIndex nodes should have been directly emitted to TargetFrameIndex
	// nodes at DAG-construction time.
	assert(OpNode->getOpcode() != ISD::FrameIndex);

	if (OpNode->getOpcode() == ISD::Constant) {
	Ops.push_back(
	CurDAG->getTargetConstant(StackMaps::ConstantOp, DL, MVT::i64));
	Ops.push_back(CurDAG->getTargetConstant(OpNode->getAsZExtVal(), DL,
	OpVal.getValueType()));
	} else {
	Ops.push_back(OpVal);
	}
	}

	void SelectionDAGISel::Select_STACKMAP(SDNode *N) {
	SmallVector<SDValue, 32> Ops;
	auto *It = N->op_begin();
	SDLoc DL(N);

	// Stash the chain and glue operands so we can move them to the end.
	SDValue Chain = *It++;
	SDValue InGlue = *It++;

	// <id> operand.
	SDValue ID = *It++;
	assert(ID.getValueType() == MVT::i64);
	Ops.push_back(ID);

	// <numShadowBytes> operand.
	SDValue Shad = *It++;
	assert(Shad.getValueType() == MVT::i32);
	Ops.push_back(Shad);

	// Live variable operands.
	for (; It != N->op_end(); It++)
	pushStackMapLiveVariable(Ops, *It, DL);

	Ops.push_back(Chain);
	Ops.push_back(InGlue);

	SDVTList NodeTys = CurDAG->getVTList(MVT::Other, MVT::Glue);
	CurDAG->SelectNodeTo(N, TargetOpcode::STACKMAP, NodeTys, Ops);
	}

	void SelectionDAGISel::Select_PATCHPOINT(SDNode *N) {
	SmallVector<SDValue, 32> Ops;
	auto *It = N->op_begin();
	SDLoc DL(N);

	// Cache arguments that will be moved to the end in the target node.
	SDValue Chain = *It++;
	std::optional<SDValue> Glue;
	if (It->getValueType() == MVT::Glue)
	Glue = *It++;
	SDValue RegMask = *It++;

	// <id> operand.
	SDValue ID = *It++;
	assert(ID.getValueType() == MVT::i64);
	Ops.push_back(ID);

	// <numShadowBytes> operand.
	SDValue Shad = *It++;
	assert(Shad.getValueType() == MVT::i32);
	Ops.push_back(Shad);

	// Add the callee.
	Ops.push_back(*It++);

	// Add <numArgs>.
	SDValue NumArgs = *It++;
	assert(NumArgs.getValueType() == MVT::i32);
	Ops.push_back(NumArgs);

	// Calling convention.
	Ops.push_back(*It++);

	// Push the args for the call.
	for (uint64_t I = NumArgs->getAsZExtVal(); I != 0; I--)
	Ops.push_back(*It++);

	// Now push the live variables.
	for (; It != N->op_end(); It++)
	pushStackMapLiveVariable(Ops, *It, DL);

	// Finally, the regmask, chain and (if present) glue are moved to the end.
	Ops.push_back(RegMask);
	Ops.push_back(Chain);
	if (Glue.has_value())
	Ops.push_back(*Glue);

	SDVTList NodeTys = N->getVTList();
	CurDAG->SelectNodeTo(N, TargetOpcode::PATCHPOINT, NodeTys, Ops);
	}

	/// GetVBR - decode a vbr encoding whose top bit is set.
	LLVM_ATTRIBUTE_ALWAYS_INLINE static uint64_t
	GetVBR(uint64_t Val, const unsigned char *MatcherTable, unsigned &Idx) {
	assert(Val >= 128 && "Not a VBR");
	Val &= 127; // Remove first vbr bit.

	unsigned Shift = 7;
	uint64_t NextBits;
	do {
	NextBits = MatcherTable[Idx++];
	Val \|= (NextBits&127) << Shift;
	Shift += 7;
	} while (NextBits & 128);

	return Val;
	}

	void SelectionDAGISel::Select_JUMP_TABLE_DEBUG_INFO(SDNode *N) {
	SDLoc dl(N);
	CurDAG->SelectNodeTo(N, TargetOpcode::JUMP_TABLE_DEBUG_INFO, MVT::Glue,
	CurDAG->getTargetConstant(N->getConstantOperandVal(1),
	dl, MVT::i64, true));
	}

	/// When a match is complete, this method updates uses of interior chain results
	/// to use the new results.
	void SelectionDAGISel::UpdateChains(
	SDNode *NodeToMatch, SDValue InputChain,
	SmallVectorImpl<SDNode *> &ChainNodesMatched, bool isMorphNodeTo) {
	SmallVector<SDNode*, 4> NowDeadNodes;

	// Now that all the normal results are replaced, we replace the chain and
	// glue results if present.
	if (!ChainNodesMatched.empty()) {
	assert(InputChain.getNode() &&
	"Matched input chains but didn't produce a chain");
	// Loop over all of the nodes we matched that produced a chain result.
	// Replace all the chain results with the final chain we ended up with.
	for (unsigned i = 0, e = ChainNodesMatched.size(); i != e; ++i) {
	SDNode *ChainNode = ChainNodesMatched[i];
	// If ChainNode is null, it's because we replaced it on a previous
	// iteration and we cleared it out of the map. Just skip it.
	if (!ChainNode)
	continue;

	assert(ChainNode->getOpcode() != ISD::DELETED_NODE &&
	"Deleted node left in chain");

	// Don't replace the results of the root node if we're doing a
	// MorphNodeTo.
	if (ChainNode == NodeToMatch && isMorphNodeTo)
	continue;

	SDValue ChainVal = SDValue(ChainNode, ChainNode->getNumValues()-1);
	if (ChainVal.getValueType() == MVT::Glue)
	ChainVal = ChainVal.getValue(ChainVal->getNumValues()-2);
	assert(ChainVal.getValueType() == MVT::Other && "Not a chain?");
	SelectionDAG::DAGNodeDeletedListener NDL(
	CurDAG, [&](SDNode N, SDNode *E) {
	std::replace(ChainNodesMatched.begin(), ChainNodesMatched.end(), N,
	static_cast<SDNode *>(nullptr));
	});
	if (ChainNode->getOpcode() != ISD::TokenFactor)
	ReplaceUses(ChainVal, InputChain);

	// If the node became dead and we haven't already seen it, delete it.
	if (ChainNode != NodeToMatch && ChainNode->use_empty() &&
	!llvm::is_contained(NowDeadNodes, ChainNode))
	NowDeadNodes.push_back(ChainNode);
	}
	}

	if (!NowDeadNodes.empty())
	CurDAG->RemoveDeadNodes(NowDeadNodes);

	LLVM_DEBUG(dbgs() << "ISEL: Match complete!\n");
	}

	/// HandleMergeInputChains - This implements the OPC_EmitMergeInputChains
	/// operation for when the pattern matched at least one node with a chains. The
	/// input vector contains a list of all of the chained nodes that we match. We
	/// must determine if this is a valid thing to cover (i.e. matching it won't
	/// induce cycles in the DAG) and if so, creating a TokenFactor node. that will
	/// be used as the input node chain for the generated nodes.
	static SDValue
	HandleMergeInputChains(SmallVectorImpl<SDNode*> &ChainNodesMatched,
	SelectionDAG *CurDAG) {

	SmallPtrSet<const SDNode *, 16> Visited;
	SmallVector<const SDNode *, 8> Worklist;
	SmallVector<SDValue, 3> InputChains;
	unsigned int Max = 8192;

	// Quick exit on trivial merge.
	if (ChainNodesMatched.size() == 1)
	return ChainNodesMatched[0]->getOperand(0);

	// Add chains that aren't already added (internal). Peek through
	// token factors.
	std::function<void(const SDValue)> AddChains = [&](const SDValue V) {
	if (V.getValueType() != MVT::Other)
	return;
	if (V->getOpcode() == ISD::EntryToken)
	return;
	if (!Visited.insert(V.getNode()).second)
	return;
	if (V->getOpcode() == ISD::TokenFactor) {
	for (const SDValue &Op : V->op_values())
	AddChains(Op);
	} else
	InputChains.push_back(V);
	};

	for (auto *N : ChainNodesMatched) {
	Worklist.push_back(N);
	Visited.insert(N);
	}

	while (!Worklist.empty())
	AddChains(Worklist.pop_back_val()->getOperand(0));

	// Skip the search if there are no chain dependencies.
	if (InputChains.size() == 0)
	return CurDAG->getEntryNode();

	// If one of these chains is a successor of input, we must have a
	// node that is both the predecessor and successor of the
	// to-be-merged nodes. Fail.
	Visited.clear();
	for (SDValue V : InputChains)
	Worklist.push_back(V.getNode());

	for (auto *N : ChainNodesMatched)
	if (SDNode::hasPredecessorHelper(N, Visited, Worklist, Max, true))
	return SDValue();

	// Return merged chain.
	if (InputChains.size() == 1)
	return InputChains[0];
	return CurDAG->getNode(ISD::TokenFactor, SDLoc(ChainNodesMatched[0]),
	MVT::Other, InputChains);
	}

	/// MorphNode - Handle morphing a node in place for the selector.
	SDNode *SelectionDAGISel::
	MorphNode(SDNode *Node, unsigned TargetOpc, SDVTList VTList,
	ArrayRef<SDValue> Ops, unsigned EmitNodeInfo) {
	// It is possible we're using MorphNodeTo to replace a node with no
	// normal results with one that has a normal result (or we could be
	// adding a chain) and the input could have glue and chains as well.
	// In this case we need to shift the operands down.
	// FIXME: This is a horrible hack and broken in obscure cases, no worse
	// than the old isel though.
	int OldGlueResultNo = -1, OldChainResultNo = -1;

	unsigned NTMNumResults = Node->getNumValues();
	if (Node->getValueType(NTMNumResults-1) == MVT::Glue) {
	OldGlueResultNo = NTMNumResults-1;
	if (NTMNumResults != 1 &&
	Node->getValueType(NTMNumResults-2) == MVT::Other)
	OldChainResultNo = NTMNumResults-2;
	} else if (Node->getValueType(NTMNumResults-1) == MVT::Other)
	OldChainResultNo = NTMNumResults-1;

	// Call the underlying SelectionDAG routine to do the transmogrification. Note
	// that this deletes operands of the old node that become dead.
	SDNode *Res = CurDAG->MorphNodeTo(Node, ~TargetOpc, VTList, Ops);

	// MorphNodeTo can operate in two ways: if an existing node with the
	// specified operands exists, it can just return it. Otherwise, it
	// updates the node in place to have the requested operands.
	if (Res == Node) {
	// If we updated the node in place, reset the node ID. To the isel,
	// this should be just like a newly allocated machine node.
	Res->setNodeId(-1);
	}

	unsigned ResNumResults = Res->getNumValues();
	// Move the glue if needed.
	if ((EmitNodeInfo & OPFL_GlueOutput) && OldGlueResultNo != -1 &&
	static_cast<unsigned>(OldGlueResultNo) != ResNumResults - 1)
	ReplaceUses(SDValue(Node, OldGlueResultNo),
	SDValue(Res, ResNumResults - 1));

	if ((EmitNodeInfo & OPFL_GlueOutput) != 0)
	--ResNumResults;

	// Move the chain reference if needed.
	if ((EmitNodeInfo & OPFL_Chain) && OldChainResultNo != -1 &&
	static_cast<unsigned>(OldChainResultNo) != ResNumResults - 1)
	ReplaceUses(SDValue(Node, OldChainResultNo),
	SDValue(Res, ResNumResults - 1));

	// Otherwise, no replacement happened because the node already exists. Replace
	// Uses of the old node with the new one.
	if (Res != Node) {
	ReplaceNode(Node, Res);
	} else {
	EnforceNodeIdInvariant(Res);
	}

	return Res;
	}

	/// CheckSame - Implements OP_CheckSame.
	LLVM_ATTRIBUTE_ALWAYS_INLINE static bool
	CheckSame(const unsigned char *MatcherTable, unsigned &MatcherIndex, SDValue N,
	const SmallVectorImpl<std::pair<SDValue, SDNode *>> &RecordedNodes) {
	// Accept if it is exactly the same as a previously recorded node.
	unsigned RecNo = MatcherTable[MatcherIndex++];
	assert(RecNo < RecordedNodes.size() && "Invalid CheckSame");
	return N == RecordedNodes[RecNo].first;
	}

	/// CheckChildSame - Implements OP_CheckChildXSame.
	LLVM_ATTRIBUTE_ALWAYS_INLINE static bool CheckChildSame(
	const unsigned char *MatcherTable, unsigned &MatcherIndex, SDValue N,
	const SmallVectorImpl<std::pair<SDValue, SDNode *>> &RecordedNodes,
	unsigned ChildNo) {
	if (ChildNo >= N.getNumOperands())
	return false; // Match fails if out of range child #.
	return ::CheckSame(MatcherTable, MatcherIndex, N.getOperand(ChildNo),
	RecordedNodes);
	}

	/// CheckPatternPredicate - Implements OP_CheckPatternPredicate.
	LLVM_ATTRIBUTE_ALWAYS_INLINE static bool
	CheckPatternPredicate(unsigned Opcode, const unsigned char *MatcherTable,
	unsigned &MatcherIndex, const SelectionDAGISel &SDISel) {
	bool TwoBytePredNo =
	Opcode == SelectionDAGISel::OPC_CheckPatternPredicateTwoByte;
	unsigned PredNo =
	TwoBytePredNo \|\| Opcode == SelectionDAGISel::OPC_CheckPatternPredicate
	? MatcherTable[MatcherIndex++]
	: Opcode - SelectionDAGISel::OPC_CheckPatternPredicate0;
	if (TwoBytePredNo)
	PredNo \|= MatcherTable[MatcherIndex++] << 8;
	return SDISel.CheckPatternPredicate(PredNo);
	}

	/// CheckNodePredicate - Implements OP_CheckNodePredicate.
	LLVM_ATTRIBUTE_ALWAYS_INLINE static bool
	CheckNodePredicate(unsigned Opcode, const unsigned char *MatcherTable,
	unsigned &MatcherIndex, const SelectionDAGISel &SDISel,
	SDNode *N) {
	unsigned PredNo = Opcode == SelectionDAGISel::OPC_CheckPredicate
	? MatcherTable[MatcherIndex++]
	: Opcode - SelectionDAGISel::OPC_CheckPredicate0;
	return SDISel.CheckNodePredicate(N, PredNo);
	}

	LLVM_ATTRIBUTE_ALWAYS_INLINE static bool
	CheckOpcode(const unsigned char *MatcherTable, unsigned &MatcherIndex,
	SDNode *N) {
	uint16_t Opc = MatcherTable[MatcherIndex++];
	Opc \|= static_cast<uint16_t>(MatcherTable[MatcherIndex++]) << 8;
	return N->getOpcode() == Opc;
	}

	LLVM_ATTRIBUTE_ALWAYS_INLINE static bool CheckType(MVT::SimpleValueType VT,
	SDValue N,
	const TargetLowering *TLI,
	const DataLayout &DL) {
	if (N.getValueType() == VT)
	return true;

	// Handle the case when VT is iPTR.
	return VT == MVT::iPTR && N.getValueType() == TLI->getPointerTy(DL);
	}

	LLVM_ATTRIBUTE_ALWAYS_INLINE static bool
	CheckChildType(MVT::SimpleValueType VT, SDValue N, const TargetLowering *TLI,
	const DataLayout &DL, unsigned ChildNo) {
	if (ChildNo >= N.getNumOperands())
	return false; // Match fails if out of range child #.
	return ::CheckType(VT, N.getOperand(ChildNo), TLI, DL);
	}

	LLVM_ATTRIBUTE_ALWAYS_INLINE static bool
	CheckCondCode(const unsigned char *MatcherTable, unsigned &MatcherIndex,
	SDValue N) {
	return cast<CondCodeSDNode>(N)->get() ==
	static_cast<ISD::CondCode>(MatcherTable[MatcherIndex++]);
	}

	LLVM_ATTRIBUTE_ALWAYS_INLINE static bool
	CheckChild2CondCode(const unsigned char *MatcherTable, unsigned &MatcherIndex,
	SDValue N) {
	if (2 >= N.getNumOperands())
	return false;
	return ::CheckCondCode(MatcherTable, MatcherIndex, N.getOperand(2));
	}

	LLVM_ATTRIBUTE_ALWAYS_INLINE static bool
	CheckValueType(const unsigned char *MatcherTable, unsigned &MatcherIndex,
	SDValue N, const TargetLowering *TLI, const DataLayout &DL) {
	MVT::SimpleValueType VT =
	static_cast<MVT::SimpleValueType>(MatcherTable[MatcherIndex++]);
	if (cast<VTSDNode>(N)->getVT() == VT)
	return true;

	// Handle the case when VT is iPTR.
	return VT == MVT::iPTR && cast<VTSDNode>(N)->getVT() == TLI->getPointerTy(DL);
	}

	// Bit 0 stores the sign of the immediate. The upper bits contain the magnitude
	// shifted left by 1.
	static uint64_t decodeSignRotatedValue(uint64_t V) {
	if ((V & 1) == 0)
	return V >> 1;
	if (V != 1)
	return -(V >> 1);
	// There is no such thing as -0 with integers. "-0" really means MININT.
	return 1ULL << 63;
	}

	LLVM_ATTRIBUTE_ALWAYS_INLINE static bool
	CheckInteger(const unsigned char *MatcherTable, unsigned &MatcherIndex,
	SDValue N) {
	int64_t Val = MatcherTable[MatcherIndex++];
	if (Val & 128)
	Val = GetVBR(Val, MatcherTable, MatcherIndex);

	Val = decodeSignRotatedValue(Val);

	ConstantSDNode *C = dyn_cast<ConstantSDNode>(N);
	return C && C->getAPIntValue().trySExtValue() == Val;
	}

	LLVM_ATTRIBUTE_ALWAYS_INLINE static bool
	CheckChildInteger(const unsigned char *MatcherTable, unsigned &MatcherIndex,
	SDValue N, unsigned ChildNo) {
	if (ChildNo >= N.getNumOperands())
	return false; // Match fails if out of range child #.
	return ::CheckInteger(MatcherTable, MatcherIndex, N.getOperand(ChildNo));
	}

	LLVM_ATTRIBUTE_ALWAYS_INLINE static bool
	CheckAndImm(const unsigned char *MatcherTable, unsigned &MatcherIndex,
	SDValue N, const SelectionDAGISel &SDISel) {
	int64_t Val = MatcherTable[MatcherIndex++];
	if (Val & 128)
	Val = GetVBR(Val, MatcherTable, MatcherIndex);

	if (N->getOpcode() != ISD::AND) return false;

	ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1));
	return C && SDISel.CheckAndMask(N.getOperand(0), C, Val);
	}

	LLVM_ATTRIBUTE_ALWAYS_INLINE static bool
	CheckOrImm(const unsigned char *MatcherTable, unsigned &MatcherIndex, SDValue N,
	const SelectionDAGISel &SDISel) {
	int64_t Val = MatcherTable[MatcherIndex++];
	if (Val & 128)
	Val = GetVBR(Val, MatcherTable, MatcherIndex);

	if (N->getOpcode() != ISD::OR) return false;

	ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1));
	return C && SDISel.CheckOrMask(N.getOperand(0), C, Val);
	}

	/// IsPredicateKnownToFail - If we know how and can do so without pushing a
	/// scope, evaluate the current node. If the current predicate is known to
	/// fail, set Result=true and return anything. If the current predicate is
	/// known to pass, set Result=false and return the MatcherIndex to continue
	/// with. If the current predicate is unknown, set Result=false and return the
	/// MatcherIndex to continue with.
	static unsigned IsPredicateKnownToFail(const unsigned char *Table,
	unsigned Index, SDValue N,
	bool &Result,
	const SelectionDAGISel &SDISel,
	SmallVectorImpl<std::pair<SDValue, SDNode*>> &RecordedNodes) {
	unsigned Opcode = Table[Index++];
	switch (Opcode) {
	default:
	Result = false;
	return Index-1; // Could not evaluate this predicate.
	case SelectionDAGISel::OPC_CheckSame:
	Result = !::CheckSame(Table, Index, N, RecordedNodes);
	return Index;
	case SelectionDAGISel::OPC_CheckChild0Same:
	case SelectionDAGISel::OPC_CheckChild1Same:
	case SelectionDAGISel::OPC_CheckChild2Same:
	case SelectionDAGISel::OPC_CheckChild3Same:
	Result = !::CheckChildSame(Table, Index, N, RecordedNodes,
	Table[Index-1] - SelectionDAGISel::OPC_CheckChild0Same);
	return Index;
	case SelectionDAGISel::OPC_CheckPatternPredicate:
	case SelectionDAGISel::OPC_CheckPatternPredicate0:
	case SelectionDAGISel::OPC_CheckPatternPredicate1:
	case SelectionDAGISel::OPC_CheckPatternPredicate2:
	case SelectionDAGISel::OPC_CheckPatternPredicate3:
	case SelectionDAGISel::OPC_CheckPatternPredicate4:
	case SelectionDAGISel::OPC_CheckPatternPredicate5:
	case SelectionDAGISel::OPC_CheckPatternPredicate6:
	case SelectionDAGISel::OPC_CheckPatternPredicate7:
	case SelectionDAGISel::OPC_CheckPatternPredicateTwoByte:
	Result = !::CheckPatternPredicate(Opcode, Table, Index, SDISel);
	return Index;
	case SelectionDAGISel::OPC_CheckPredicate:
	case SelectionDAGISel::OPC_CheckPredicate0:
	case SelectionDAGISel::OPC_CheckPredicate1:
	case SelectionDAGISel::OPC_CheckPredicate2:
	case SelectionDAGISel::OPC_CheckPredicate3:
	case SelectionDAGISel::OPC_CheckPredicate4:
	case SelectionDAGISel::OPC_CheckPredicate5:
	case SelectionDAGISel::OPC_CheckPredicate6:
	case SelectionDAGISel::OPC_CheckPredicate7:
	Result = !::CheckNodePredicate(Opcode, Table, Index, SDISel, N.getNode());
	return Index;
	case SelectionDAGISel::OPC_CheckOpcode:
	Result = !::CheckOpcode(Table, Index, N.getNode());
	return Index;
	case SelectionDAGISel::OPC_CheckType:
	case SelectionDAGISel::OPC_CheckTypeI32:
	case SelectionDAGISel::OPC_CheckTypeI64: {
	MVT::SimpleValueType VT;
	switch (Opcode) {
	case SelectionDAGISel::OPC_CheckTypeI32:
	VT = MVT::i32;
	break;
	case SelectionDAGISel::OPC_CheckTypeI64:
	VT = MVT::i64;
	break;
	default:
	VT = static_cast<MVT::SimpleValueType>(Table[Index++]);
	break;
	}
	Result = !::CheckType(VT, N, SDISel.TLI, SDISel.CurDAG->getDataLayout());
	return Index;
	}
	case SelectionDAGISel::OPC_CheckTypeRes: {
	unsigned Res = Table[Index++];
	Result = !::CheckType(static_cast<MVT::SimpleValueType>(Table[Index++]),
	N.getValue(Res), SDISel.TLI,
	SDISel.CurDAG->getDataLayout());
	return Index;
	}
	case SelectionDAGISel::OPC_CheckChild0Type:
	case SelectionDAGISel::OPC_CheckChild1Type:
	case SelectionDAGISel::OPC_CheckChild2Type:
	case SelectionDAGISel::OPC_CheckChild3Type:
	case SelectionDAGISel::OPC_CheckChild4Type:
	case SelectionDAGISel::OPC_CheckChild5Type:
	case SelectionDAGISel::OPC_CheckChild6Type:
	case SelectionDAGISel::OPC_CheckChild7Type:
	case SelectionDAGISel::OPC_CheckChild0TypeI32:
	case SelectionDAGISel::OPC_CheckChild1TypeI32:
	case SelectionDAGISel::OPC_CheckChild2TypeI32:
	case SelectionDAGISel::OPC_CheckChild3TypeI32:
	case SelectionDAGISel::OPC_CheckChild4TypeI32:
	case SelectionDAGISel::OPC_CheckChild5TypeI32:
	case SelectionDAGISel::OPC_CheckChild6TypeI32:
	case SelectionDAGISel::OPC_CheckChild7TypeI32:
	case SelectionDAGISel::OPC_CheckChild0TypeI64:
	case SelectionDAGISel::OPC_CheckChild1TypeI64:
	case SelectionDAGISel::OPC_CheckChild2TypeI64:
	case SelectionDAGISel::OPC_CheckChild3TypeI64:
	case SelectionDAGISel::OPC_CheckChild4TypeI64:
	case SelectionDAGISel::OPC_CheckChild5TypeI64:
	case SelectionDAGISel::OPC_CheckChild6TypeI64:
	case SelectionDAGISel::OPC_CheckChild7TypeI64: {
	MVT::SimpleValueType VT;
	unsigned ChildNo;
	if (Opcode >= SelectionDAGISel::OPC_CheckChild0TypeI32 &&
	Opcode <= SelectionDAGISel::OPC_CheckChild7TypeI32) {
	VT = MVT::i32;
	ChildNo = Opcode - SelectionDAGISel::OPC_CheckChild0TypeI32;
	} else if (Opcode >= SelectionDAGISel::OPC_CheckChild0TypeI64 &&
	Opcode <= SelectionDAGISel::OPC_CheckChild7TypeI64) {
	VT = MVT::i64;
	ChildNo = Opcode - SelectionDAGISel::OPC_CheckChild0TypeI64;
	} else {
	VT = static_cast<MVT::SimpleValueType>(Table[Index++]);
	ChildNo = Opcode - SelectionDAGISel::OPC_CheckChild0Type;
	}
	Result = !::CheckChildType(VT, N, SDISel.TLI,
	SDISel.CurDAG->getDataLayout(), ChildNo);
	return Index;
	}
	case SelectionDAGISel::OPC_CheckCondCode:
	Result = !::CheckCondCode(Table, Index, N);
	return Index;
	case SelectionDAGISel::OPC_CheckChild2CondCode:
	Result = !::CheckChild2CondCode(Table, Index, N);
	return Index;
	case SelectionDAGISel::OPC_CheckValueType:
	Result = !::CheckValueType(Table, Index, N, SDISel.TLI,
	SDISel.CurDAG->getDataLayout());
	return Index;
	case SelectionDAGISel::OPC_CheckInteger:
	Result = !::CheckInteger(Table, Index, N);
	return Index;
	case SelectionDAGISel::OPC_CheckChild0Integer:
	case SelectionDAGISel::OPC_CheckChild1Integer:
	case SelectionDAGISel::OPC_CheckChild2Integer:
	case SelectionDAGISel::OPC_CheckChild3Integer:
	case SelectionDAGISel::OPC_CheckChild4Integer:
	Result = !::CheckChildInteger(Table, Index, N,
	Table[Index-1] - SelectionDAGISel::OPC_CheckChild0Integer);
	return Index;
	case SelectionDAGISel::OPC_CheckAndImm:
	Result = !::CheckAndImm(Table, Index, N, SDISel);
	return Index;
	case SelectionDAGISel::OPC_CheckOrImm:
	Result = !::CheckOrImm(Table, Index, N, SDISel);
	return Index;
	}
	}

	namespace {

	struct MatchScope {
	/// FailIndex - If this match fails, this is the index to continue with.
	unsigned FailIndex;

	/// NodeStack - The node stack when the scope was formed.
	SmallVector<SDValue, 4> NodeStack;

	/// NumRecordedNodes - The number of recorded nodes when the scope was formed.
	unsigned NumRecordedNodes;

	/// NumMatchedMemRefs - The number of matched memref entries.
	unsigned NumMatchedMemRefs;

	/// InputChain/InputGlue - The current chain/glue
	SDValue InputChain, InputGlue;

	/// HasChainNodesMatched - True if the ChainNodesMatched list is non-empty.
	bool HasChainNodesMatched;
	};

	/// \A DAG update listener to keep the matching state
	/// (i.e. RecordedNodes and MatchScope) uptodate if the target is allowed to
	/// change the DAG while matching. X86 addressing mode matcher is an example
	/// for this.
	class MatchStateUpdater : public SelectionDAG::DAGUpdateListener
	{
	SDNode **NodeToMatch;
	SmallVectorImpl<std::pair<SDValue, SDNode *>> &RecordedNodes;
	SmallVectorImpl<MatchScope> &MatchScopes;

	public:
	MatchStateUpdater(SelectionDAG &DAG, SDNode **NodeToMatch,
	SmallVectorImpl<std::pair<SDValue, SDNode *>> &RN,
	SmallVectorImpl<MatchScope> &MS)
	: SelectionDAG::DAGUpdateListener(DAG), NodeToMatch(NodeToMatch),
	RecordedNodes(RN), MatchScopes(MS) {}

	void NodeDeleted(SDNode N, SDNode E) override {
	// Some early-returns here to avoid the search if we deleted the node or
	// if the update comes from MorphNodeTo (MorphNodeTo is the last thing we
	// do, so it's unnecessary to update matching state at that point).
	// Neither of these can occur currently because we only install this
	// update listener during matching a complex patterns.
	if (!E \|\| E->isMachineOpcode())
	return;
	// Check if NodeToMatch was updated.
	if (N == *NodeToMatch)
	*NodeToMatch = E;
	// Performing linear search here does not matter because we almost never
	// run this code. You'd have to have a CSE during complex pattern
	// matching.
	for (auto &I : RecordedNodes)
	if (I.first.getNode() == N)
	I.first.setNode(E);

	for (auto &I : MatchScopes)
	for (auto &J : I.NodeStack)
	if (J.getNode() == N)
	J.setNode(E);
	}
	};

	} // end anonymous namespace

	void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch,
	const unsigned char *MatcherTable,
	unsigned TableSize) {
	// FIXME: Should these even be selected? Handle these cases in the caller?
	switch (NodeToMatch->getOpcode()) {
	default:
	break;
	case ISD::EntryToken: // These nodes remain the same.
	case ISD::BasicBlock:
	case ISD::Register:
	case ISD::RegisterMask:
	case ISD::HANDLENODE:
	case ISD::MDNODE_SDNODE:
	case ISD::TargetConstant:
	case ISD::TargetConstantFP:
	case ISD::TargetConstantPool:
	case ISD::TargetFrameIndex:
	case ISD::TargetExternalSymbol:
	case ISD::MCSymbol:
	case ISD::TargetBlockAddress:
	case ISD::TargetJumpTable:
	case ISD::TargetGlobalTLSAddress:
	case ISD::TargetGlobalAddress:
	case ISD::TokenFactor:
	case ISD::CopyFromReg:
	case ISD::CopyToReg:
	case ISD::EH_LABEL:
	case ISD::ANNOTATION_LABEL:
	case ISD::LIFETIME_START:
	case ISD::LIFETIME_END:
	case ISD::PSEUDO_PROBE:
	NodeToMatch->setNodeId(-1); // Mark selected.
	return;
	case ISD::AssertSext:
	case ISD::AssertZext:
	case ISD::AssertAlign:
	ReplaceUses(SDValue(NodeToMatch, 0), NodeToMatch->getOperand(0));
	CurDAG->RemoveDeadNode(NodeToMatch);
	return;
	case ISD::INLINEASM:
	case ISD::INLINEASM_BR:
	Select_INLINEASM(NodeToMatch);
	return;
	case ISD::READ_REGISTER:
	Select_READ_REGISTER(NodeToMatch);
	return;
	case ISD::WRITE_REGISTER:
	Select_WRITE_REGISTER(NodeToMatch);
	return;
	case ISD::UNDEF:
	Select_UNDEF(NodeToMatch);
	return;
	case ISD::FREEZE:
	Select_FREEZE(NodeToMatch);
	return;
	case ISD::ARITH_FENCE:
	Select_ARITH_FENCE(NodeToMatch);
	return;
	case ISD::MEMBARRIER:
	Select_MEMBARRIER(NodeToMatch);
	return;
	case ISD::STACKMAP:
	Select_STACKMAP(NodeToMatch);
	return;
	case ISD::PATCHPOINT:
	Select_PATCHPOINT(NodeToMatch);
	return;
	case ISD::JUMP_TABLE_DEBUG_INFO:
	Select_JUMP_TABLE_DEBUG_INFO(NodeToMatch);
	return;
	case ISD::CONVERGENCECTRL_ANCHOR:
	Select_CONVERGENCECTRL_ANCHOR(NodeToMatch);
	return;
	case ISD::CONVERGENCECTRL_ENTRY:
	Select_CONVERGENCECTRL_ENTRY(NodeToMatch);
	return;
	case ISD::CONVERGENCECTRL_LOOP:
	Select_CONVERGENCECTRL_LOOP(NodeToMatch);
	return;
	}

	assert(!NodeToMatch->isMachineOpcode() && "Node already selected!");

	// Set up the node stack with NodeToMatch as the only node on the stack.
	SmallVector<SDValue, 8> NodeStack;
	SDValue N = SDValue(NodeToMatch, 0);
	NodeStack.push_back(N);

	// MatchScopes - Scopes used when matching, if a match failure happens, this
	// indicates where to continue checking.
	SmallVector<MatchScope, 8> MatchScopes;

	// RecordedNodes - This is the set of nodes that have been recorded by the
	// state machine. The second value is the parent of the node, or null if the
	// root is recorded.
	SmallVector<std::pair<SDValue, SDNode*>, 8> RecordedNodes;

	// MatchedMemRefs - This is the set of MemRef's we've seen in the input
	// pattern.
	SmallVector<MachineMemOperand*, 2> MatchedMemRefs;

	// These are the current input chain and glue for use when generating nodes.
	// Various Emit operations change these. For example, emitting a copytoreg
	// uses and updates these.
	SDValue InputChain, InputGlue;

	// ChainNodesMatched - If a pattern matches nodes that have input/output
	// chains, the OPC_EmitMergeInputChains operation is emitted which indicates
	// which ones they are. The result is captured into this list so that we can
	// update the chain results when the pattern is complete.
	SmallVector<SDNode*, 3> ChainNodesMatched;

	LLVM_DEBUG(dbgs() << "ISEL: Starting pattern match\n");

	// Determine where to start the interpreter. Normally we start at opcode #0,
	// but if the state machine starts with an OPC_SwitchOpcode, then we
	// accelerate the first lookup (which is guaranteed to be hot) with the
	// OpcodeOffset table.
	unsigned MatcherIndex = 0;

	if (!OpcodeOffset.empty()) {
	// Already computed the OpcodeOffset table, just index into it.
	if (N.getOpcode() < OpcodeOffset.size())
	MatcherIndex = OpcodeOffset[N.getOpcode()];
	LLVM_DEBUG(dbgs() << " Initial Opcode index to " << MatcherIndex << "\n");

	} else if (MatcherTable[0] == OPC_SwitchOpcode) {
	// Otherwise, the table isn't computed, but the state machine does start
	// with an OPC_SwitchOpcode instruction. Populate the table now, since this
	// is the first time we're selecting an instruction.
	unsigned Idx = 1;
	while (true) {
	// Get the size of this case.
	unsigned CaseSize = MatcherTable[Idx++];
	if (CaseSize & 128)
	CaseSize = GetVBR(CaseSize, MatcherTable, Idx);
	if (CaseSize == 0) break;

	// Get the opcode, add the index to the table.
	uint16_t Opc = MatcherTable[Idx++];
	Opc \|= static_cast<uint16_t>(MatcherTable[Idx++]) << 8;
	if (Opc >= OpcodeOffset.size())
	OpcodeOffset.resize((Opc+1)*2);
	OpcodeOffset[Opc] = Idx;
	Idx += CaseSize;
	}

	// Okay, do the lookup for the first opcode.
	if (N.getOpcode() < OpcodeOffset.size())
	MatcherIndex = OpcodeOffset[N.getOpcode()];
	}

	while (true) {
	assert(MatcherIndex < TableSize && "Invalid index");
	#ifndef NDEBUG
	unsigned CurrentOpcodeIndex = MatcherIndex;
	#endif
	BuiltinOpcodes Opcode =
	static_cast<BuiltinOpcodes>(MatcherTable[MatcherIndex++]);
	switch (Opcode) {
	case OPC_Scope: {
	// Okay, the semantics of this operation are that we should push a scope
	// then evaluate the first child. However, pushing a scope only to have
	// the first check fail (which then pops it) is inefficient. If we can
	// determine immediately that the first check (or first several) will
	// immediately fail, don't even bother pushing a scope for them.
	unsigned FailIndex;

	while (true) {
	unsigned NumToSkip = MatcherTable[MatcherIndex++];
	if (NumToSkip & 128)
	NumToSkip = GetVBR(NumToSkip, MatcherTable, MatcherIndex);
	// Found the end of the scope with no match.
	if (NumToSkip == 0) {
	FailIndex = 0;
	break;
	}

	FailIndex = MatcherIndex+NumToSkip;

	unsigned MatcherIndexOfPredicate = MatcherIndex;
	(void)MatcherIndexOfPredicate; // silence warning.

	// If we can't evaluate this predicate without pushing a scope (e.g. if
	// it is a 'MoveParent') or if the predicate succeeds on this node, we
	// push the scope and evaluate the full predicate chain.
	bool Result;
	MatcherIndex = IsPredicateKnownToFail(MatcherTable, MatcherIndex, N,
	Result, *this, RecordedNodes);
	if (!Result)
	break;

	LLVM_DEBUG(
	dbgs() << " Skipped scope entry (due to false predicate) at "
	<< "index " << MatcherIndexOfPredicate << ", continuing at "
	<< FailIndex << "\n");
	++NumDAGIselRetries;

	// Otherwise, we know that this case of the Scope is guaranteed to fail,
	// move to the next case.
	MatcherIndex = FailIndex;
	}

	// If the whole scope failed to match, bail.
	if (FailIndex == 0) break;

	// Push a MatchScope which indicates where to go if the first child fails
	// to match.
	MatchScope NewEntry;
	NewEntry.FailIndex = FailIndex;
	NewEntry.NodeStack.append(NodeStack.begin(), NodeStack.end());
	NewEntry.NumRecordedNodes = RecordedNodes.size();
	NewEntry.NumMatchedMemRefs = MatchedMemRefs.size();
	NewEntry.InputChain = InputChain;
	NewEntry.InputGlue = InputGlue;
	NewEntry.HasChainNodesMatched = !ChainNodesMatched.empty();
	MatchScopes.push_back(NewEntry);
	continue;
	}
	case OPC_RecordNode: {
	// Remember this node, it may end up being an operand in the pattern.
	SDNode *Parent = nullptr;
	if (NodeStack.size() > 1)
	Parent = NodeStack[NodeStack.size()-2].getNode();
	RecordedNodes.push_back(std::make_pair(N, Parent));
	continue;
	}

	case OPC_RecordChild0: case OPC_RecordChild1:
	case OPC_RecordChild2: case OPC_RecordChild3:
	case OPC_RecordChild4: case OPC_RecordChild5:
	case OPC_RecordChild6: case OPC_RecordChild7: {
	unsigned ChildNo = Opcode-OPC_RecordChild0;
	if (ChildNo >= N.getNumOperands())
	break; // Match fails if out of range child #.

	RecordedNodes.push_back(std::make_pair(N->getOperand(ChildNo),
	N.getNode()));
	continue;
	}
	case OPC_RecordMemRef:
	if (auto *MN = dyn_cast<MemSDNode>(N))
	MatchedMemRefs.push_back(MN->getMemOperand());
	else {
	LLVM_DEBUG(dbgs() << "Expected MemSDNode "; N->dump(CurDAG);
	dbgs() << '\n');
	}

	continue;

	case OPC_CaptureGlueInput:
	// If the current node has an input glue, capture it in InputGlue.
	if (N->getNumOperands() != 0 &&
	N->getOperand(N->getNumOperands()-1).getValueType() == MVT::Glue)
	InputGlue = N->getOperand(N->getNumOperands()-1);
	continue;

	case OPC_MoveChild: {
	unsigned ChildNo = MatcherTable[MatcherIndex++];
	if (ChildNo >= N.getNumOperands())
	break; // Match fails if out of range child #.
	N = N.getOperand(ChildNo);
	NodeStack.push_back(N);
	continue;
	}

	case OPC_MoveChild0: case OPC_MoveChild1:
	case OPC_MoveChild2: case OPC_MoveChild3:
	case OPC_MoveChild4: case OPC_MoveChild5:
	case OPC_MoveChild6: case OPC_MoveChild7: {
	unsigned ChildNo = Opcode-OPC_MoveChild0;
	if (ChildNo >= N.getNumOperands())
	break; // Match fails if out of range child #.
	N = N.getOperand(ChildNo);
	NodeStack.push_back(N);
	continue;
	}

	case OPC_MoveSibling:
	case OPC_MoveSibling0:
	case OPC_MoveSibling1:
	case OPC_MoveSibling2:
	case OPC_MoveSibling3:
	case OPC_MoveSibling4:
	case OPC_MoveSibling5:
	case OPC_MoveSibling6:
	case OPC_MoveSibling7: {
	// Pop the current node off the NodeStack.
	NodeStack.pop_back();
	assert(!NodeStack.empty() && "Node stack imbalance!");
	N = NodeStack.back();

	unsigned SiblingNo = Opcode == OPC_MoveSibling
	? MatcherTable[MatcherIndex++]
	: Opcode - OPC_MoveSibling0;
	if (SiblingNo >= N.getNumOperands())
	break; // Match fails if out of range sibling #.
	N = N.getOperand(SiblingNo);
	NodeStack.push_back(N);
	continue;
	}
	case OPC_MoveParent:
	// Pop the current node off the NodeStack.
	NodeStack.pop_back();
	assert(!NodeStack.empty() && "Node stack imbalance!");
	N = NodeStack.back();
	continue;

	case OPC_CheckSame:
	if (!::CheckSame(MatcherTable, MatcherIndex, N, RecordedNodes)) break;
	continue;

	case OPC_CheckChild0Same: case OPC_CheckChild1Same:
	case OPC_CheckChild2Same: case OPC_CheckChild3Same:
	if (!::CheckChildSame(MatcherTable, MatcherIndex, N, RecordedNodes,
	Opcode-OPC_CheckChild0Same))
	break;
	continue;

	case OPC_CheckPatternPredicate:
	case OPC_CheckPatternPredicate0:
	case OPC_CheckPatternPredicate1:
	case OPC_CheckPatternPredicate2:
	case OPC_CheckPatternPredicate3:
	case OPC_CheckPatternPredicate4:
	case OPC_CheckPatternPredicate5:
	case OPC_CheckPatternPredicate6:
	case OPC_CheckPatternPredicate7:
	case OPC_CheckPatternPredicateTwoByte:
	if (!::CheckPatternPredicate(Opcode, MatcherTable, MatcherIndex, *this))
	break;
	continue;
	case SelectionDAGISel::OPC_CheckPredicate0:
	case SelectionDAGISel::OPC_CheckPredicate1:
	case SelectionDAGISel::OPC_CheckPredicate2:
	case SelectionDAGISel::OPC_CheckPredicate3:
	case SelectionDAGISel::OPC_CheckPredicate4:
	case SelectionDAGISel::OPC_CheckPredicate5:
	case SelectionDAGISel::OPC_CheckPredicate6:
	case SelectionDAGISel::OPC_CheckPredicate7:
	case OPC_CheckPredicate:
	if (!::CheckNodePredicate(Opcode, MatcherTable, MatcherIndex, *this,
	N.getNode()))
	break;
	continue;
	case OPC_CheckPredicateWithOperands: {
	unsigned OpNum = MatcherTable[MatcherIndex++];
	SmallVector<SDValue, 8> Operands;

	for (unsigned i = 0; i < OpNum; ++i)
	Operands.push_back(RecordedNodes[MatcherTable[MatcherIndex++]].first);

	unsigned PredNo = MatcherTable[MatcherIndex++];
	if (!CheckNodePredicateWithOperands(N.getNode(), PredNo, Operands))
	break;
	continue;
	}
	case OPC_CheckComplexPat:
	case OPC_CheckComplexPat0:
	case OPC_CheckComplexPat1:
	case OPC_CheckComplexPat2:
	case OPC_CheckComplexPat3:
	case OPC_CheckComplexPat4:
	case OPC_CheckComplexPat5:
	case OPC_CheckComplexPat6:
	case OPC_CheckComplexPat7: {
	unsigned CPNum = Opcode == OPC_CheckComplexPat
	? MatcherTable[MatcherIndex++]
	: Opcode - OPC_CheckComplexPat0;
	unsigned RecNo = MatcherTable[MatcherIndex++];
	assert(RecNo < RecordedNodes.size() && "Invalid CheckComplexPat");

	// If target can modify DAG during matching, keep the matching state
	// consistent.
	std::unique_ptr<MatchStateUpdater> MSU;
	if (ComplexPatternFuncMutatesDAG())
	MSU.reset(new MatchStateUpdater(*CurDAG, &NodeToMatch, RecordedNodes,
	MatchScopes));

	if (!CheckComplexPattern(NodeToMatch, RecordedNodes[RecNo].second,
	RecordedNodes[RecNo].first, CPNum,
	RecordedNodes))
	break;
	continue;
	}
	case OPC_CheckOpcode:
	if (!::CheckOpcode(MatcherTable, MatcherIndex, N.getNode())) break;
	continue;

	case OPC_CheckType:
	case OPC_CheckTypeI32:
	case OPC_CheckTypeI64:
	MVT::SimpleValueType VT;
	switch (Opcode) {
	case OPC_CheckTypeI32:
	VT = MVT::i32;
	break;
	case OPC_CheckTypeI64:
	VT = MVT::i64;
	break;
	default:
	VT = static_cast<MVT::SimpleValueType>(MatcherTable[MatcherIndex++]);
	break;
	}
	if (!::CheckType(VT, N, TLI, CurDAG->getDataLayout()))
	break;
	continue;

	case OPC_CheckTypeRes: {
	unsigned Res = MatcherTable[MatcherIndex++];
	if (!::CheckType(
	static_cast<MVT::SimpleValueType>(MatcherTable[MatcherIndex++]),
	N.getValue(Res), TLI, CurDAG->getDataLayout()))
	break;
	continue;
	}

	case OPC_SwitchOpcode: {
	unsigned CurNodeOpcode = N.getOpcode();
	unsigned SwitchStart = MatcherIndex-1; (void)SwitchStart;
	unsigned CaseSize;
	while (true) {
	// Get the size of this case.
	CaseSize = MatcherTable[MatcherIndex++];
	if (CaseSize & 128)
	CaseSize = GetVBR(CaseSize, MatcherTable, MatcherIndex);
	if (CaseSize == 0) break;

	uint16_t Opc = MatcherTable[MatcherIndex++];
	Opc \|= static_cast<uint16_t>(MatcherTable[MatcherIndex++]) << 8;

	// If the opcode matches, then we will execute this case.
	if (CurNodeOpcode == Opc)
	break;

	// Otherwise, skip over this case.
	MatcherIndex += CaseSize;
	}

	// If no cases matched, bail out.
	if (CaseSize == 0) break;

	// Otherwise, execute the case we found.
	LLVM_DEBUG(dbgs() << " OpcodeSwitch from " << SwitchStart << " to "
	<< MatcherIndex << "\n");
	continue;
	}

	case OPC_SwitchType: {
	MVT CurNodeVT = N.getSimpleValueType();
	unsigned SwitchStart = MatcherIndex-1; (void)SwitchStart;
	unsigned CaseSize;
	while (true) {
	// Get the size of this case.
	CaseSize = MatcherTable[MatcherIndex++];
	if (CaseSize & 128)
	CaseSize = GetVBR(CaseSize, MatcherTable, MatcherIndex);
	if (CaseSize == 0) break;

	MVT CaseVT =
	static_cast<MVT::SimpleValueType>(MatcherTable[MatcherIndex++]);
	if (CaseVT == MVT::iPTR)
	CaseVT = TLI->getPointerTy(CurDAG->getDataLayout());

	// If the VT matches, then we will execute this case.
	if (CurNodeVT == CaseVT)
	break;

	// Otherwise, skip over this case.
	MatcherIndex += CaseSize;
	}

	// If no cases matched, bail out.
	if (CaseSize == 0) break;

	// Otherwise, execute the case we found.
	LLVM_DEBUG(dbgs() << " TypeSwitch[" << CurNodeVT
	<< "] from " << SwitchStart << " to " << MatcherIndex
	<< '\n');
	continue;
	}
	case OPC_CheckChild0Type:
	case OPC_CheckChild1Type:
	case OPC_CheckChild2Type:
	case OPC_CheckChild3Type:
	case OPC_CheckChild4Type:
	case OPC_CheckChild5Type:
	case OPC_CheckChild6Type:
	case OPC_CheckChild7Type:
	case OPC_CheckChild0TypeI32:
	case OPC_CheckChild1TypeI32:
	case OPC_CheckChild2TypeI32:
	case OPC_CheckChild3TypeI32:
	case OPC_CheckChild4TypeI32:
	case OPC_CheckChild5TypeI32:
	case OPC_CheckChild6TypeI32:
	case OPC_CheckChild7TypeI32:
	case OPC_CheckChild0TypeI64:
	case OPC_CheckChild1TypeI64:
	case OPC_CheckChild2TypeI64:
	case OPC_CheckChild3TypeI64:
	case OPC_CheckChild4TypeI64:
	case OPC_CheckChild5TypeI64:
	case OPC_CheckChild6TypeI64:
	case OPC_CheckChild7TypeI64: {
	MVT::SimpleValueType VT;
	unsigned ChildNo;
	if (Opcode >= SelectionDAGISel::OPC_CheckChild0TypeI32 &&
	Opcode <= SelectionDAGISel::OPC_CheckChild7TypeI32) {
	VT = MVT::i32;
	ChildNo = Opcode - SelectionDAGISel::OPC_CheckChild0TypeI32;
	} else if (Opcode >= SelectionDAGISel::OPC_CheckChild0TypeI64 &&
	Opcode <= SelectionDAGISel::OPC_CheckChild7TypeI64) {
	VT = MVT::i64;
	ChildNo = Opcode - SelectionDAGISel::OPC_CheckChild0TypeI64;
	} else {
	VT = static_cast<MVT::SimpleValueType>(MatcherTable[MatcherIndex++]);
	ChildNo = Opcode - SelectionDAGISel::OPC_CheckChild0Type;
	}
	if (!::CheckChildType(VT, N, TLI, CurDAG->getDataLayout(), ChildNo))
	break;
	continue;
	}
	case OPC_CheckCondCode:
	if (!::CheckCondCode(MatcherTable, MatcherIndex, N)) break;
	continue;
	case OPC_CheckChild2CondCode:
	if (!::CheckChild2CondCode(MatcherTable, MatcherIndex, N)) break;
	continue;
	case OPC_CheckValueType:
	if (!::CheckValueType(MatcherTable, MatcherIndex, N, TLI,
	CurDAG->getDataLayout()))
	break;
	continue;
	case OPC_CheckInteger:
	if (!::CheckInteger(MatcherTable, MatcherIndex, N)) break;
	continue;
	case OPC_CheckChild0Integer: case OPC_CheckChild1Integer:
	case OPC_CheckChild2Integer: case OPC_CheckChild3Integer:
	case OPC_CheckChild4Integer:
	if (!::CheckChildInteger(MatcherTable, MatcherIndex, N,
	Opcode-OPC_CheckChild0Integer)) break;
	continue;
	case OPC_CheckAndImm:
	if (!::CheckAndImm(MatcherTable, MatcherIndex, N, *this)) break;
	continue;
	case OPC_CheckOrImm:
	if (!::CheckOrImm(MatcherTable, MatcherIndex, N, *this)) break;
	continue;
	case OPC_CheckImmAllOnesV:
	if (!ISD::isConstantSplatVectorAllOnes(N.getNode()))
	break;
	continue;
	case OPC_CheckImmAllZerosV:
	if (!ISD::isConstantSplatVectorAllZeros(N.getNode()))
	break;
	continue;

	case OPC_CheckFoldableChainNode: {
	assert(NodeStack.size() != 1 && "No parent node");
	// Verify that all intermediate nodes between the root and this one have
	// a single use (ignoring chains, which are handled in UpdateChains).
	bool HasMultipleUses = false;
	for (unsigned i = 1, e = NodeStack.size()-1; i != e; ++i) {
	unsigned NNonChainUses = 0;
	SDNode *NS = NodeStack[i].getNode();
	for (auto UI = NS->use_begin(), UE = NS->use_end(); UI != UE; ++UI)
	if (UI.getUse().getValueType() != MVT::Other)
	if (++NNonChainUses > 1) {
	HasMultipleUses = true;
	break;
	}
	if (HasMultipleUses) break;
	}
	if (HasMultipleUses) break;

	// Check to see that the target thinks this is profitable to fold and that
	// we can fold it without inducing cycles in the graph.
	if (!IsProfitableToFold(N, NodeStack[NodeStack.size()-2].getNode(),
	NodeToMatch) \|\|
	!IsLegalToFold(N, NodeStack[NodeStack.size()-2].getNode(),
	NodeToMatch, OptLevel,
	true/We validate our own chains/))
	break;

	continue;
	}
	case OPC_EmitInteger:
	case OPC_EmitInteger8:
	case OPC_EmitInteger16:
	case OPC_EmitInteger32:
	case OPC_EmitInteger64:
	case OPC_EmitStringInteger:
	case OPC_EmitStringInteger32: {
	MVT::SimpleValueType VT;
	switch (Opcode) {
	case OPC_EmitInteger8:
	VT = MVT::i8;
	break;
	case OPC_EmitInteger16:
	VT = MVT::i16;
	break;
	case OPC_EmitInteger32:
	case OPC_EmitStringInteger32:
	VT = MVT::i32;
	break;
	case OPC_EmitInteger64:
	VT = MVT::i64;
	break;
	default:
	VT = static_cast<MVT::SimpleValueType>(MatcherTable[MatcherIndex++]);
	break;
	}
	int64_t Val = MatcherTable[MatcherIndex++];
	if (Val & 128)
	Val = GetVBR(Val, MatcherTable, MatcherIndex);
	if (Opcode >= OPC_EmitInteger && Opcode <= OPC_EmitInteger64)
	Val = decodeSignRotatedValue(Val);
	RecordedNodes.push_back(std::pair<SDValue, SDNode *>(
	CurDAG->getTargetConstant(Val, SDLoc(NodeToMatch), VT), nullptr));
	continue;
	}
	case OPC_EmitRegister:
	case OPC_EmitRegisterI32:
	case OPC_EmitRegisterI64: {
	MVT::SimpleValueType VT;
	switch (Opcode) {
	case OPC_EmitRegisterI32:
	VT = MVT::i32;
	break;
	case OPC_EmitRegisterI64:
	VT = MVT::i64;
	break;
	default:
	VT = static_cast<MVT::SimpleValueType>(MatcherTable[MatcherIndex++]);
	break;
	}
	unsigned RegNo = MatcherTable[MatcherIndex++];
	RecordedNodes.push_back(std::pair<SDValue, SDNode *>(
	CurDAG->getRegister(RegNo, VT), nullptr));
	continue;
	}
	case OPC_EmitRegister2: {
	// For targets w/ more than 256 register names, the register enum
	// values are stored in two bytes in the matcher table (just like
	// opcodes).
	MVT::SimpleValueType VT =
	static_cast<MVT::SimpleValueType>(MatcherTable[MatcherIndex++]);
	unsigned RegNo = MatcherTable[MatcherIndex++];
	RegNo \|= MatcherTable[MatcherIndex++] << 8;
	RecordedNodes.push_back(std::pair<SDValue, SDNode*>(
	CurDAG->getRegister(RegNo, VT), nullptr));
	continue;
	}

	case OPC_EmitConvertToTarget:
	case OPC_EmitConvertToTarget0:
	case OPC_EmitConvertToTarget1:
	case OPC_EmitConvertToTarget2:
	case OPC_EmitConvertToTarget3:
	case OPC_EmitConvertToTarget4:
	case OPC_EmitConvertToTarget5:
	case OPC_EmitConvertToTarget6:
	case OPC_EmitConvertToTarget7: {
	// Convert from IMM/FPIMM to target version.
	unsigned RecNo = Opcode == OPC_EmitConvertToTarget
	? MatcherTable[MatcherIndex++]
	: Opcode - OPC_EmitConvertToTarget0;
	assert(RecNo < RecordedNodes.size() && "Invalid EmitConvertToTarget");
	SDValue Imm = RecordedNodes[RecNo].first;

	if (Imm->getOpcode() == ISD::Constant) {
	const ConstantInt *Val=cast<ConstantSDNode>(Imm)->getConstantIntValue();
	Imm = CurDAG->getTargetConstant(*Val, SDLoc(NodeToMatch),
	Imm.getValueType());
	} else if (Imm->getOpcode() == ISD::ConstantFP) {
	const ConstantFP *Val=cast<ConstantFPSDNode>(Imm)->getConstantFPValue();
	Imm = CurDAG->getTargetConstantFP(*Val, SDLoc(NodeToMatch),
	Imm.getValueType());
	}

	RecordedNodes.push_back(std::make_pair(Imm, RecordedNodes[RecNo].second));
	continue;
	}

	case OPC_EmitMergeInputChains1_0: // OPC_EmitMergeInputChains, 1, 0
	case OPC_EmitMergeInputChains1_1: // OPC_EmitMergeInputChains, 1, 1
	case OPC_EmitMergeInputChains1_2: { // OPC_EmitMergeInputChains, 1, 2
	// These are space-optimized forms of OPC_EmitMergeInputChains.
	assert(!InputChain.getNode() &&
	"EmitMergeInputChains should be the first chain producing node");
	assert(ChainNodesMatched.empty() &&
	"Should only have one EmitMergeInputChains per match");

	// Read all of the chained nodes.
	unsigned RecNo = Opcode - OPC_EmitMergeInputChains1_0;
	assert(RecNo < RecordedNodes.size() && "Invalid EmitMergeInputChains");
	ChainNodesMatched.push_back(RecordedNodes[RecNo].first.getNode());

	// If the chained node is not the root, we can't fold it if it has
	// multiple uses.
	// FIXME: What if other value results of the node have uses not matched
	// by this pattern?
	if (ChainNodesMatched.back() != NodeToMatch &&
	!RecordedNodes[RecNo].first.hasOneUse()) {
	ChainNodesMatched.clear();
	break;
	}

	// Merge the input chains if they are not intra-pattern references.
	InputChain = HandleMergeInputChains(ChainNodesMatched, CurDAG);

	if (!InputChain.getNode())
	break; // Failed to merge.
	continue;
	}

	case OPC_EmitMergeInputChains: {
	assert(!InputChain.getNode() &&
	"EmitMergeInputChains should be the first chain producing node");
	// This node gets a list of nodes we matched in the input that have
	// chains. We want to token factor all of the input chains to these nodes
	// together. However, if any of the input chains is actually one of the
	// nodes matched in this pattern, then we have an intra-match reference.
	// Ignore these because the newly token factored chain should not refer to
	// the old nodes.
	unsigned NumChains = MatcherTable[MatcherIndex++];
	assert(NumChains != 0 && "Can't TF zero chains");

	assert(ChainNodesMatched.empty() &&
	"Should only have one EmitMergeInputChains per match");

	// Read all of the chained nodes.
	for (unsigned i = 0; i != NumChains; ++i) {
	unsigned RecNo = MatcherTable[MatcherIndex++];
	assert(RecNo < RecordedNodes.size() && "Invalid EmitMergeInputChains");
	ChainNodesMatched.push_back(RecordedNodes[RecNo].first.getNode());

	// If the chained node is not the root, we can't fold it if it has
	// multiple uses.
	// FIXME: What if other value results of the node have uses not matched
	// by this pattern?
	if (ChainNodesMatched.back() != NodeToMatch &&
	!RecordedNodes[RecNo].first.hasOneUse()) {
	ChainNodesMatched.clear();
	break;
	}
	}

	// If the inner loop broke out, the match fails.
	if (ChainNodesMatched.empty())
	break;

	// Merge the input chains if they are not intra-pattern references.
	InputChain = HandleMergeInputChains(ChainNodesMatched, CurDAG);

	if (!InputChain.getNode())
	break; // Failed to merge.

	continue;
	}

	case OPC_EmitCopyToReg:
	case OPC_EmitCopyToReg0:
	case OPC_EmitCopyToReg1:
	case OPC_EmitCopyToReg2:
	case OPC_EmitCopyToReg3:
	case OPC_EmitCopyToReg4:
	case OPC_EmitCopyToReg5:
	case OPC_EmitCopyToReg6:
	case OPC_EmitCopyToReg7:
	case OPC_EmitCopyToRegTwoByte: {
	unsigned RecNo =
	Opcode >= OPC_EmitCopyToReg0 && Opcode <= OPC_EmitCopyToReg7
	? Opcode - OPC_EmitCopyToReg0
	: MatcherTable[MatcherIndex++];
	assert(RecNo < RecordedNodes.size() && "Invalid EmitCopyToReg");
	unsigned DestPhysReg = MatcherTable[MatcherIndex++];
	if (Opcode == OPC_EmitCopyToRegTwoByte)
	DestPhysReg \|= MatcherTable[MatcherIndex++] << 8;

	if (!InputChain.getNode())
	InputChain = CurDAG->getEntryNode();

	InputChain = CurDAG->getCopyToReg(InputChain, SDLoc(NodeToMatch),
	DestPhysReg, RecordedNodes[RecNo].first,
	InputGlue);

	InputGlue = InputChain.getValue(1);
	continue;
	}

	case OPC_EmitNodeXForm: {
	unsigned XFormNo = MatcherTable[MatcherIndex++];
	unsigned RecNo = MatcherTable[MatcherIndex++];
	assert(RecNo < RecordedNodes.size() && "Invalid EmitNodeXForm");
	SDValue Res = RunSDNodeXForm(RecordedNodes[RecNo].first, XFormNo);
	RecordedNodes.push_back(std::pair<SDValue,SDNode*>(Res, nullptr));
	continue;
	}
	case OPC_Coverage: {
	// This is emitted right before MorphNode/EmitNode.
	// So it should be safe to assume that this node has been selected
	unsigned index = MatcherTable[MatcherIndex++];
	index \|= (MatcherTable[MatcherIndex++] << 8);
	dbgs() << "COVERED: " << getPatternForIndex(index) << "\n";
	dbgs() << "INCLUDED: " << getIncludePathForIndex(index) << "\n";
	continue;
	}

	case OPC_EmitNode:
	case OPC_EmitNode0:
	case OPC_EmitNode1:
	case OPC_EmitNode2:
	case OPC_EmitNode0None:
	case OPC_EmitNode1None:
	case OPC_EmitNode2None:
	case OPC_EmitNode0Chain:
	case OPC_EmitNode1Chain:
	case OPC_EmitNode2Chain:
	case OPC_MorphNodeTo:
	case OPC_MorphNodeTo0:
	case OPC_MorphNodeTo1:
	case OPC_MorphNodeTo2:
	case OPC_MorphNodeTo0None:
	case OPC_MorphNodeTo1None:
	case OPC_MorphNodeTo2None:
	case OPC_MorphNodeTo0Chain:
	case OPC_MorphNodeTo1Chain:
	case OPC_MorphNodeTo2Chain:
	case OPC_MorphNodeTo0GlueInput:
	case OPC_MorphNodeTo1GlueInput:
	case OPC_MorphNodeTo2GlueInput:
	case OPC_MorphNodeTo0GlueOutput:
	case OPC_MorphNodeTo1GlueOutput:
	case OPC_MorphNodeTo2GlueOutput: {
	uint16_t TargetOpc = MatcherTable[MatcherIndex++];
	TargetOpc \|= static_cast<uint16_t>(MatcherTable[MatcherIndex++]) << 8;
	unsigned EmitNodeInfo;
	if (Opcode >= OPC_EmitNode0None && Opcode <= OPC_EmitNode2Chain) {
	if (Opcode >= OPC_EmitNode0Chain && Opcode <= OPC_EmitNode2Chain)
	EmitNodeInfo = OPFL_Chain;
	else
	EmitNodeInfo = OPFL_None;
	} else if (Opcode >= OPC_MorphNodeTo0None &&
	Opcode <= OPC_MorphNodeTo2GlueOutput) {
	if (Opcode >= OPC_MorphNodeTo0Chain && Opcode <= OPC_MorphNodeTo2Chain)
	EmitNodeInfo = OPFL_Chain;
	else if (Opcode >= OPC_MorphNodeTo0GlueInput &&
	Opcode <= OPC_MorphNodeTo2GlueInput)
	EmitNodeInfo = OPFL_GlueInput;
	else if (Opcode >= OPC_MorphNodeTo0GlueOutput &&
	Opcode <= OPC_MorphNodeTo2GlueOutput)
	EmitNodeInfo = OPFL_GlueOutput;
	else
	EmitNodeInfo = OPFL_None;
	} else
	EmitNodeInfo = MatcherTable[MatcherIndex++];
	// Get the result VT list.
	unsigned NumVTs;
	// If this is one of the compressed forms, get the number of VTs based
	// on the Opcode. Otherwise read the next byte from the table.
	if (Opcode >= OPC_MorphNodeTo0 && Opcode <= OPC_MorphNodeTo2)
	NumVTs = Opcode - OPC_MorphNodeTo0;
	else if (Opcode >= OPC_MorphNodeTo0None && Opcode <= OPC_MorphNodeTo2None)
	NumVTs = Opcode - OPC_MorphNodeTo0None;
	else if (Opcode >= OPC_MorphNodeTo0Chain &&
	Opcode <= OPC_MorphNodeTo2Chain)
	NumVTs = Opcode - OPC_MorphNodeTo0Chain;
	else if (Opcode >= OPC_MorphNodeTo0GlueInput &&
	Opcode <= OPC_MorphNodeTo2GlueInput)
	NumVTs = Opcode - OPC_MorphNodeTo0GlueInput;
	else if (Opcode >= OPC_MorphNodeTo0GlueOutput &&
	Opcode <= OPC_MorphNodeTo2GlueOutput)
	NumVTs = Opcode - OPC_MorphNodeTo0GlueOutput;
	else if (Opcode >= OPC_EmitNode0 && Opcode <= OPC_EmitNode2)
	NumVTs = Opcode - OPC_EmitNode0;
	else if (Opcode >= OPC_EmitNode0None && Opcode <= OPC_EmitNode2None)
	NumVTs = Opcode - OPC_EmitNode0None;
	else if (Opcode >= OPC_EmitNode0Chain && Opcode <= OPC_EmitNode2Chain)
	NumVTs = Opcode - OPC_EmitNode0Chain;
	else
	NumVTs = MatcherTable[MatcherIndex++];
	SmallVector<EVT, 4> VTs;
	for (unsigned i = 0; i != NumVTs; ++i) {
	MVT::SimpleValueType VT =
	static_cast<MVT::SimpleValueType>(MatcherTable[MatcherIndex++]);
	if (VT == MVT::iPTR)
	VT = TLI->getPointerTy(CurDAG->getDataLayout()).SimpleTy;
	VTs.push_back(VT);
	}

	if (EmitNodeInfo & OPFL_Chain)
	VTs.push_back(MVT::Other);
	if (EmitNodeInfo & OPFL_GlueOutput)
	VTs.push_back(MVT::Glue);

	// This is hot code, so optimize the two most common cases of 1 and 2
	// results.
	SDVTList VTList;
	if (VTs.size() == 1)
	VTList = CurDAG->getVTList(VTs[0]);
	else if (VTs.size() == 2)
	VTList = CurDAG->getVTList(VTs[0], VTs[1]);
	else
	VTList = CurDAG->getVTList(VTs);

	// Get the operand list.
	unsigned NumOps = MatcherTable[MatcherIndex++];
	SmallVector<SDValue, 8> Ops;
	for (unsigned i = 0; i != NumOps; ++i) {
	unsigned RecNo = MatcherTable[MatcherIndex++];
	if (RecNo & 128)
	RecNo = GetVBR(RecNo, MatcherTable, MatcherIndex);

	assert(RecNo < RecordedNodes.size() && "Invalid EmitNode");
	Ops.push_back(RecordedNodes[RecNo].first);
	}

	// If there are variadic operands to add, handle them now.
	if (EmitNodeInfo & OPFL_VariadicInfo) {
	// Determine the start index to copy from.
	unsigned FirstOpToCopy = getNumFixedFromVariadicInfo(EmitNodeInfo);
	FirstOpToCopy += (EmitNodeInfo & OPFL_Chain) ? 1 : 0;
	assert(NodeToMatch->getNumOperands() >= FirstOpToCopy &&
	"Invalid variadic node");
	// Copy all of the variadic operands, not including a potential glue
	// input.
	for (unsigned i = FirstOpToCopy, e = NodeToMatch->getNumOperands();
	i != e; ++i) {
	SDValue V = NodeToMatch->getOperand(i);
	if (V.getValueType() == MVT::Glue) break;
	Ops.push_back(V);
	}
	}

	// If this has chain/glue inputs, add them.
	if (EmitNodeInfo & OPFL_Chain)
	Ops.push_back(InputChain);
	if ((EmitNodeInfo & OPFL_GlueInput) && InputGlue.getNode() != nullptr)
	Ops.push_back(InputGlue);

	// Check whether any matched node could raise an FP exception. Since all
	// such nodes must have a chain, it suffices to check ChainNodesMatched.
	// We need to perform this check before potentially modifying one of the
	// nodes via MorphNode.
	bool MayRaiseFPException =
	llvm::any_of(ChainNodesMatched, [this](SDNode *N) {
	return mayRaiseFPException(N) && !N->getFlags().hasNoFPExcept();
	});

	// Create the node.
	MachineSDNode *Res = nullptr;
	bool IsMorphNodeTo =
	Opcode == OPC_MorphNodeTo \|\|
	(Opcode >= OPC_MorphNodeTo0 && Opcode <= OPC_MorphNodeTo2GlueOutput);
	if (!IsMorphNodeTo) {
	// If this is a normal EmitNode command, just create the new node and
	// add the results to the RecordedNodes list.
	Res = CurDAG->getMachineNode(TargetOpc, SDLoc(NodeToMatch),
	VTList, Ops);

	// Add all the non-glue/non-chain results to the RecordedNodes list.
	for (unsigned i = 0, e = VTs.size(); i != e; ++i) {
	if (VTs[i] == MVT::Other \|\| VTs[i] == MVT::Glue) break;
	RecordedNodes.push_back(std::pair<SDValue,SDNode*>(SDValue(Res, i),
	nullptr));
	}
	} else {
	assert(NodeToMatch->getOpcode() != ISD::DELETED_NODE &&
	"NodeToMatch was removed partway through selection");
	SelectionDAG::DAGNodeDeletedListener NDL(CurDAG, [&](SDNode N,
	SDNode *E) {
	CurDAG->salvageDebugInfo(*N);
	auto &Chain = ChainNodesMatched;
	assert((!E \|\| !is_contained(Chain, N)) &&
	"Chain node replaced during MorphNode");
	llvm::erase(Chain, N);
	});
	Res = cast<MachineSDNode>(MorphNode(NodeToMatch, TargetOpc, VTList,
	Ops, EmitNodeInfo));
	}

	// Set the NoFPExcept flag when no original matched node could
	// raise an FP exception, but the new node potentially might.
	if (!MayRaiseFPException && mayRaiseFPException(Res)) {
	SDNodeFlags Flags = Res->getFlags();
	Flags.setNoFPExcept(true);
	Res->setFlags(Flags);
	}

	// If the node had chain/glue results, update our notion of the current
	// chain and glue.
	if (EmitNodeInfo & OPFL_GlueOutput) {
	InputGlue = SDValue(Res, VTs.size()-1);
	if (EmitNodeInfo & OPFL_Chain)
	InputChain = SDValue(Res, VTs.size()-2);
	} else if (EmitNodeInfo & OPFL_Chain)
	InputChain = SDValue(Res, VTs.size()-1);

	// If the OPFL_MemRefs glue is set on this node, slap all of the
	// accumulated memrefs onto it.
	//
	// FIXME: This is vastly incorrect for patterns with multiple outputs
	// instructions that access memory and for ComplexPatterns that match
	// loads.
	if (EmitNodeInfo & OPFL_MemRefs) {
	// Only attach load or store memory operands if the generated
	// instruction may load or store.
	const MCInstrDesc &MCID = TII->get(TargetOpc);
	bool mayLoad = MCID.mayLoad();
	bool mayStore = MCID.mayStore();

	// We expect to have relatively few of these so just filter them into a
	// temporary buffer so that we can easily add them to the instruction.
	SmallVector<MachineMemOperand *, 4> FilteredMemRefs;
	for (MachineMemOperand *MMO : MatchedMemRefs) {
	if (MMO->isLoad()) {
	if (mayLoad)
	FilteredMemRefs.push_back(MMO);
	} else if (MMO->isStore()) {
	if (mayStore)
	FilteredMemRefs.push_back(MMO);
	} else {
	FilteredMemRefs.push_back(MMO);
	}
	}

	CurDAG->setNodeMemRefs(Res, FilteredMemRefs);
	}

	LLVM_DEBUG(if (!MatchedMemRefs.empty() && Res->memoperands_empty()) dbgs()
	<< " Dropping mem operands\n";
	dbgs() << " " << (IsMorphNodeTo ? "Morphed" : "Created")
	<< " node: ";
	Res->dump(CurDAG););

	// If this was a MorphNodeTo then we're completely done!
	if (IsMorphNodeTo) {
	// Update chain uses.
	UpdateChains(Res, InputChain, ChainNodesMatched, true);
	return;
	}
	continue;
	}

	case OPC_CompleteMatch: {
	// The match has been completed, and any new nodes (if any) have been
	// created. Patch up references to the matched dag to use the newly
	// created nodes.
	unsigned NumResults = MatcherTable[MatcherIndex++];

	for (unsigned i = 0; i != NumResults; ++i) {
	unsigned ResSlot = MatcherTable[MatcherIndex++];
	if (ResSlot & 128)
	ResSlot = GetVBR(ResSlot, MatcherTable, MatcherIndex);

	assert(ResSlot < RecordedNodes.size() && "Invalid CompleteMatch");
	SDValue Res = RecordedNodes[ResSlot].first;

	assert(i < NodeToMatch->getNumValues() &&
	NodeToMatch->getValueType(i) != MVT::Other &&
	NodeToMatch->getValueType(i) != MVT::Glue &&
	"Invalid number of results to complete!");
	assert((NodeToMatch->getValueType(i) == Res.getValueType() \|\|
	NodeToMatch->getValueType(i) == MVT::iPTR \|\|
	Res.getValueType() == MVT::iPTR \|\|
	NodeToMatch->getValueType(i).getSizeInBits() ==
	Res.getValueSizeInBits()) &&
	"invalid replacement");
	ReplaceUses(SDValue(NodeToMatch, i), Res);
	}

	// Update chain uses.
	UpdateChains(NodeToMatch, InputChain, ChainNodesMatched, false);

	// If the root node defines glue, we need to update it to the glue result.
	// TODO: This never happens in our tests and I think it can be removed /
	// replaced with an assert, but if we do it this the way the change is
	// NFC.
	if (NodeToMatch->getValueType(NodeToMatch->getNumValues() - 1) ==
	MVT::Glue &&
	InputGlue.getNode())
	ReplaceUses(SDValue(NodeToMatch, NodeToMatch->getNumValues() - 1),
	InputGlue);

	assert(NodeToMatch->use_empty() &&
	"Didn't replace all uses of the node?");
	CurDAG->RemoveDeadNode(NodeToMatch);

	return;
	}
	}

	// If the code reached this point, then the match failed. See if there is
	// another child to try in the current 'Scope', otherwise pop it until we
	// find a case to check.
	LLVM_DEBUG(dbgs() << " Match failed at index " << CurrentOpcodeIndex
	<< "\n");
	++NumDAGIselRetries;
	while (true) {
	if (MatchScopes.empty()) {
	CannotYetSelect(NodeToMatch);
	return;
	}

	// Restore the interpreter state back to the point where the scope was
	// formed.
	MatchScope &LastScope = MatchScopes.back();
	RecordedNodes.resize(LastScope.NumRecordedNodes);
	NodeStack.clear();
	NodeStack.append(LastScope.NodeStack.begin(), LastScope.NodeStack.end());
	N = NodeStack.back();

	if (LastScope.NumMatchedMemRefs != MatchedMemRefs.size())
	MatchedMemRefs.resize(LastScope.NumMatchedMemRefs);
	MatcherIndex = LastScope.FailIndex;

	LLVM_DEBUG(dbgs() << " Continuing at " << MatcherIndex << "\n");

	InputChain = LastScope.InputChain;
	InputGlue = LastScope.InputGlue;
	if (!LastScope.HasChainNodesMatched)
	ChainNodesMatched.clear();

	// Check to see what the offset is at the new MatcherIndex. If it is zero
	// we have reached the end of this scope, otherwise we have another child
	// in the current scope to try.
	unsigned NumToSkip = MatcherTable[MatcherIndex++];
	if (NumToSkip & 128)
	NumToSkip = GetVBR(NumToSkip, MatcherTable, MatcherIndex);

	// If we have another child in this scope to match, update FailIndex and
	// try it.
	if (NumToSkip != 0) {
	LastScope.FailIndex = MatcherIndex+NumToSkip;
	break;
	}

	// End of this scope, pop it and try the next child in the containing
	// scope.
	MatchScopes.pop_back();
	}
	}
	}

	/// Return whether the node may raise an FP exception.
	bool SelectionDAGISel::mayRaiseFPException(SDNode *N) const {
	// For machine opcodes, consult the MCID flag.
	if (N->isMachineOpcode()) {
	const MCInstrDesc &MCID = TII->get(N->getMachineOpcode());
	return MCID.mayRaiseFPException();
	}

	// For ISD opcodes, only StrictFP opcodes may raise an FP
	// exception.
	if (N->isTargetOpcode())
	return N->isTargetStrictFPOpcode();
	return N->isStrictFPOpcode();
	}

	bool SelectionDAGISel::isOrEquivalentToAdd(const SDNode *N) const {
	assert(N->getOpcode() == ISD::OR && "Unexpected opcode");
	auto *C = dyn_cast<ConstantSDNode>(N->getOperand(1));
	if (!C)
	return false;

	// Detect when "or" is used to add an offset to a stack object.
	if (auto *FN = dyn_cast<FrameIndexSDNode>(N->getOperand(0))) {
	MachineFrameInfo &MFI = MF->getFrameInfo();
	Align A = MFI.getObjectAlign(FN->getIndex());
	int32_t Off = C->getSExtValue();
	// If the alleged offset fits in the zero bits guaranteed by
	// the alignment, then this or is really an add.
	return (Off >= 0) && (((A.value() - 1) & Off) == unsigned(Off));
	}
	return false;
	}

	void SelectionDAGISel::CannotYetSelect(SDNode *N) {
	std::string msg;
	raw_string_ostream Msg(msg);
	Msg << "Cannot select: ";

	if (N->getOpcode() != ISD::INTRINSIC_W_CHAIN &&
	N->getOpcode() != ISD::INTRINSIC_WO_CHAIN &&
	N->getOpcode() != ISD::INTRINSIC_VOID) {
	N->printrFull(Msg, CurDAG);
	Msg << "\nIn function: " << MF->getName();
	} else {
	bool HasInputChain = N->getOperand(0).getValueType() == MVT::Other;
	unsigned iid = N->getConstantOperandVal(HasInputChain);
	if (iid < Intrinsic::num_intrinsics)
	Msg << "intrinsic %" << Intrinsic::getBaseName((Intrinsic::ID)iid);
	else if (const TargetIntrinsicInfo *TII = TM.getIntrinsicInfo())
	Msg << "target intrinsic %" << TII->getName(iid);
	else
	Msg << "unknown intrinsic #" << iid;
	}
	report_fatal_error(Twine(msg));
	}
	diff --git a/contrib/llvm-project/llvm/lib/IR/BasicBlock.cpp b/contrib/llvm-project/llvm/lib/IR/BasicBlock.cpp
	index 0a9498f051cb..46896d3cdf7d 100644
	--- a/contrib/llvm-project/llvm/lib/IR/BasicBlock.cpp
	+++ b/contrib/llvm-project/llvm/lib/IR/BasicBlock.cpp
	@@ -1,1159 +1,1167 @@
	//===-- BasicBlock.cpp - Implement BasicBlock related methods -------------===//
	//
	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
	// See https://llvm.org/LICENSE.txt for license information.
	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
	//
	//===----------------------------------------------------------------------===//
	//
	// This file implements the BasicBlock class for the IR library.
	//
	//===----------------------------------------------------------------------===//

	#include "llvm/IR/BasicBlock.h"
	#include "SymbolTableListTraitsImpl.h"
	#include "llvm/ADT/STLExtras.h"
	#include "llvm/ADT/Statistic.h"
	#include "llvm/IR/CFG.h"
	#include "llvm/IR/Constants.h"
	#include "llvm/IR/DebugProgramInstruction.h"
	#include "llvm/IR/Instructions.h"
	#include "llvm/IR/IntrinsicInst.h"
	#include "llvm/IR/LLVMContext.h"
	#include "llvm/IR/Type.h"
	#include "llvm/Support/CommandLine.h"

	#include "LLVMContextImpl.h"

	using namespace llvm;

	#define DEBUG_TYPE "ir"
	STATISTIC(NumInstrRenumberings, "Number of renumberings across all blocks");

	cl::opt<bool> UseNewDbgInfoFormat(
	"experimental-debuginfo-iterators",
	cl::desc("Enable communicating debuginfo positions through iterators, "
	"eliminating intrinsics. Has no effect if "
	"--preserve-input-debuginfo-format=true."),
	cl::init(true));
	cl::opt<cl::boolOrDefault> PreserveInputDbgFormat(
	"preserve-input-debuginfo-format", cl::Hidden,
	cl::desc("When set to true, IR files will be processed and printed in "
	"their current debug info format, regardless of default behaviour "
	"or other flags passed. Has no effect if input IR does not "
	"contain debug records or intrinsics. Ignored in llvm-link, "
	"llvm-lto, and llvm-lto2."));

	bool WriteNewDbgInfoFormatToBitcode /set default value in cl::init() below/;
	cl::opt<bool, true> WriteNewDbgInfoFormatToBitcode2(
	"write-experimental-debuginfo-iterators-to-bitcode", cl::Hidden,
	cl::location(WriteNewDbgInfoFormatToBitcode), cl::init(true));

	DbgMarker BasicBlock::createMarker(Instruction I) {
	assert(IsNewDbgInfoFormat &&
	"Tried to create a marker in a non new debug-info block!");
	if (I->DebugMarker)
	return I->DebugMarker;
	DbgMarker *Marker = new DbgMarker();
	Marker->MarkedInstr = I;
	I->DebugMarker = Marker;
	return Marker;
	}

	DbgMarker *BasicBlock::createMarker(InstListType::iterator It) {
	assert(IsNewDbgInfoFormat &&
	"Tried to create a marker in a non new debug-info block!");
	if (It != end())
	return createMarker(&*It);
	DbgMarker *DM = getTrailingDbgRecords();
	if (DM)
	return DM;
	DM = new DbgMarker();
	setTrailingDbgRecords(DM);
	return DM;
	}

	void BasicBlock::convertToNewDbgValues() {
	IsNewDbgInfoFormat = true;

	// Iterate over all instructions in the instruction list, collecting debug
	// info intrinsics and converting them to DbgRecords. Once we find a "real"
	// instruction, attach all those DbgRecords to a DbgMarker in that
	// instruction.
	SmallVector<DbgRecord *, 4> DbgVarRecs;
	for (Instruction &I : make_early_inc_range(InstList)) {
	assert(!I.DebugMarker && "DebugMarker already set on old-format instrs?");
	if (DbgVariableIntrinsic *DVI = dyn_cast<DbgVariableIntrinsic>(&I)) {
	// Convert this dbg.value to a DbgVariableRecord.
	DbgVariableRecord *Value = new DbgVariableRecord(DVI);
	DbgVarRecs.push_back(Value);
	DVI->eraseFromParent();
	continue;
	}

	if (DbgLabelInst *DLI = dyn_cast<DbgLabelInst>(&I)) {
	DbgVarRecs.push_back(
	new DbgLabelRecord(DLI->getLabel(), DLI->getDebugLoc()));
	DLI->eraseFromParent();
	continue;
	}

	if (DbgVarRecs.empty())
	continue;

	// Create a marker to store DbgRecords in.
	createMarker(&I);
	DbgMarker *Marker = I.DebugMarker;

	for (DbgRecord *DVR : DbgVarRecs)
	Marker->insertDbgRecord(DVR, false);

	DbgVarRecs.clear();
	}
	}

	void BasicBlock::convertFromNewDbgValues() {
	invalidateOrders();
	IsNewDbgInfoFormat = false;

	// Iterate over the block, finding instructions annotated with DbgMarkers.
	// Convert any attached DbgRecords to debug intrinsics and insert ahead of the
	// instruction.
	for (auto &Inst : *this) {
	if (!Inst.DebugMarker)
	continue;

	DbgMarker &Marker = *Inst.DebugMarker;
	for (DbgRecord &DR : Marker.getDbgRecordRange())
	InstList.insert(Inst.getIterator(),
	DR.createDebugIntrinsic(getModule(), nullptr));

	Marker.eraseFromParent();
	}

	// Assume no trailing DbgRecords: we could technically create them at the end
	// of the block, after a terminator, but this would be non-cannonical and
	// indicates that something else is broken somewhere.
	assert(!getTrailingDbgRecords());
	}

	#ifndef NDEBUG
	void BasicBlock::dumpDbgValues() const {
	for (auto &Inst : *this) {
	if (!Inst.DebugMarker)
	continue;

	dbgs() << "@ " << Inst.DebugMarker << " ";
	Inst.DebugMarker->dump();
	};
	}
	#endif

	void BasicBlock::setIsNewDbgInfoFormat(bool NewFlag) {
	if (NewFlag && !IsNewDbgInfoFormat)
	convertToNewDbgValues();
	else if (!NewFlag && IsNewDbgInfoFormat)
	convertFromNewDbgValues();
	}
	void BasicBlock::setNewDbgInfoFormatFlag(bool NewFlag) {
	IsNewDbgInfoFormat = NewFlag;
	}

	ValueSymbolTable *BasicBlock::getValueSymbolTable() {
	if (Function *F = getParent())
	return F->getValueSymbolTable();
	return nullptr;
	}

	LLVMContext &BasicBlock::getContext() const {
	return getType()->getContext();
	}

	template <> void llvm::invalidateParentIListOrdering(BasicBlock *BB) {
	BB->invalidateOrders();
	}

	// Explicit instantiation of SymbolTableListTraits since some of the methods
	// are not in the public header file...
	template class llvm::SymbolTableListTraits<
	Instruction, ilist_iterator_bits<true>, ilist_parent<BasicBlock>>;

	BasicBlock::BasicBlock(LLVMContext &C, const Twine &Name, Function *NewParent,
	BasicBlock *InsertBefore)
	: Value(Type::getLabelTy(C), Value::BasicBlockVal),
	IsNewDbgInfoFormat(UseNewDbgInfoFormat), Parent(nullptr) {

	if (NewParent)
	insertInto(NewParent, InsertBefore);
	else
	assert(!InsertBefore &&
	"Cannot insert block before another block with no function!");

	end().getNodePtr()->setParent(this);
	setName(Name);
	if (NewParent)
	setIsNewDbgInfoFormat(NewParent->IsNewDbgInfoFormat);
	}

	void BasicBlock::insertInto(Function NewParent, BasicBlock InsertBefore) {
	assert(NewParent && "Expected a parent");
	assert(!Parent && "Already has a parent");

	if (InsertBefore)
	NewParent->insert(InsertBefore->getIterator(), this);
	else
	NewParent->insert(NewParent->end(), this);

	setIsNewDbgInfoFormat(NewParent->IsNewDbgInfoFormat);
	}

	BasicBlock::~BasicBlock() {
	validateInstrOrdering();

	// If the address of the block is taken and it is being deleted (e.g. because
	// it is dead), this means that there is either a dangling constant expr
	// hanging off the block, or an undefined use of the block (source code
	// expecting the address of a label to keep the block alive even though there
	// is no indirect branch). Handle these cases by zapping the BlockAddress
	// nodes. There are no other possible uses at this point.
	if (hasAddressTaken()) {
	assert(!use_empty() && "There should be at least one blockaddress!");
	Constant *Replacement =
	ConstantInt::get(llvm::Type::getInt32Ty(getContext()), 1);
	while (!use_empty()) {
	BlockAddress *BA = cast<BlockAddress>(user_back());
	BA->replaceAllUsesWith(ConstantExpr::getIntToPtr(Replacement,
	BA->getType()));
	BA->destroyConstant();
	}
	}

	assert(getParent() == nullptr && "BasicBlock still linked into the program!");
	dropAllReferences();
	for (auto &Inst : *this) {
	if (!Inst.DebugMarker)
	continue;
	Inst.DebugMarker->eraseFromParent();
	}
	InstList.clear();
	}

	void BasicBlock::setParent(Function *parent) {
	// Set Parent=parent, updating instruction symtab entries as appropriate.
	InstList.setSymTabObject(&Parent, parent);
	}

	iterator_range<filter_iterator<BasicBlock::const_iterator,
	std::function<bool(const Instruction &)>>>
	BasicBlock::instructionsWithoutDebug(bool SkipPseudoOp) const {
	std::function<bool(const Instruction &)> Fn = [=](const Instruction &I) {
	return !isa<DbgInfoIntrinsic>(I) &&
	!(SkipPseudoOp && isa<PseudoProbeInst>(I));
	};
	return make_filter_range(*this, Fn);
	}

	iterator_range<
	filter_iterator<BasicBlock::iterator, std::function<bool(Instruction &)>>>
	BasicBlock::instructionsWithoutDebug(bool SkipPseudoOp) {
	std::function<bool(Instruction &)> Fn = [=](Instruction &I) {
	return !isa<DbgInfoIntrinsic>(I) &&
	!(SkipPseudoOp && isa<PseudoProbeInst>(I));
	};
	return make_filter_range(*this, Fn);
	}

	filter_iterator<BasicBlock::const_iterator,
	std::function<bool(const Instruction &)>>::difference_type
	BasicBlock::sizeWithoutDebug() const {
	return std::distance(instructionsWithoutDebug().begin(),
	instructionsWithoutDebug().end());
	}

	void BasicBlock::removeFromParent() {
	getParent()->getBasicBlockList().remove(getIterator());
	}

	iplist<BasicBlock>::iterator BasicBlock::eraseFromParent() {
	return getParent()->getBasicBlockList().erase(getIterator());
	}

	void BasicBlock::moveBefore(SymbolTableList<BasicBlock>::iterator MovePos) {
	getParent()->splice(MovePos, getParent(), getIterator());
	}

	void BasicBlock::moveAfter(BasicBlock *MovePos) {
	MovePos->getParent()->splice(++MovePos->getIterator(), getParent(),
	getIterator());
	}

	const Module *BasicBlock::getModule() const {
	return getParent()->getParent();
	}

	const DataLayout &BasicBlock::getDataLayout() const {
	return getModule()->getDataLayout();
	}

	const CallInst *BasicBlock::getTerminatingMustTailCall() const {
	if (InstList.empty())
	return nullptr;
	const ReturnInst *RI = dyn_cast<ReturnInst>(&InstList.back());
	if (!RI \|\| RI == &InstList.front())
	return nullptr;

	const Instruction *Prev = RI->getPrevNode();
	if (!Prev)
	return nullptr;

	if (Value *RV = RI->getReturnValue()) {
	if (RV != Prev)
	return nullptr;

	// Look through the optional bitcast.
	if (auto *BI = dyn_cast<BitCastInst>(Prev)) {
	RV = BI->getOperand(0);
	Prev = BI->getPrevNode();
	if (!Prev \|\| RV != Prev)
	return nullptr;
	}
	}

	if (auto *CI = dyn_cast<CallInst>(Prev)) {
	if (CI->isMustTailCall())
	return CI;
	}
	return nullptr;
	}

	const CallInst *BasicBlock::getTerminatingDeoptimizeCall() const {
	if (InstList.empty())
	return nullptr;
	auto *RI = dyn_cast<ReturnInst>(&InstList.back());
	if (!RI \|\| RI == &InstList.front())
	return nullptr;

	if (auto *CI = dyn_cast_or_null<CallInst>(RI->getPrevNode()))
	if (Function *F = CI->getCalledFunction())
	if (F->getIntrinsicID() == Intrinsic::experimental_deoptimize)
	return CI;

	return nullptr;
	}

	const CallInst *BasicBlock::getPostdominatingDeoptimizeCall() const {
	const BasicBlock* BB = this;
	SmallPtrSet<const BasicBlock *, 8> Visited;
	Visited.insert(BB);
	while (auto *Succ = BB->getUniqueSuccessor()) {
	if (!Visited.insert(Succ).second)
	return nullptr;
	BB = Succ;
	}
	return BB->getTerminatingDeoptimizeCall();
	}

	const Instruction *BasicBlock::getFirstMayFaultInst() const {
	if (InstList.empty())
	return nullptr;
	for (const Instruction &I : *this)
	if (isa<LoadInst>(I) \|\| isa<StoreInst>(I) \|\| isa<CallBase>(I))
	return &I;
	return nullptr;
	}

	const Instruction* BasicBlock::getFirstNonPHI() const {
	for (const Instruction &I : *this)
	if (!isa<PHINode>(I))
	return &I;
	return nullptr;
	}

	BasicBlock::const_iterator BasicBlock::getFirstNonPHIIt() const {
	const Instruction *I = getFirstNonPHI();
	if (!I)
	return end();
	BasicBlock::const_iterator It = I->getIterator();
	// Set the head-inclusive bit to indicate that this iterator includes
	// any debug-info at the start of the block. This is a no-op unless the
	// appropriate CMake flag is set.
	It.setHeadBit(true);
	return It;
	}

	const Instruction *BasicBlock::getFirstNonPHIOrDbg(bool SkipPseudoOp) const {
	for (const Instruction &I : *this) {
	if (isa<PHINode>(I) \|\| isa<DbgInfoIntrinsic>(I))
	continue;

	if (SkipPseudoOp && isa<PseudoProbeInst>(I))
	continue;

	return &I;
	}
	return nullptr;
	}

	const Instruction *
	BasicBlock::getFirstNonPHIOrDbgOrLifetime(bool SkipPseudoOp) const {
	for (const Instruction &I : *this) {
	if (isa<PHINode>(I) \|\| isa<DbgInfoIntrinsic>(I))
	continue;

	if (I.isLifetimeStartOrEnd())
	continue;

	if (SkipPseudoOp && isa<PseudoProbeInst>(I))
	continue;

	return &I;
	}
	return nullptr;
	}

	BasicBlock::const_iterator BasicBlock::getFirstInsertionPt() const {
	const Instruction *FirstNonPHI = getFirstNonPHI();
	if (!FirstNonPHI)
	return end();

	const_iterator InsertPt = FirstNonPHI->getIterator();
	if (InsertPt->isEHPad()) ++InsertPt;
	// Set the head-inclusive bit to indicate that this iterator includes
	// any debug-info at the start of the block. This is a no-op unless the
	// appropriate CMake flag is set.
	InsertPt.setHeadBit(true);
	return InsertPt;
	}

	BasicBlock::const_iterator BasicBlock::getFirstNonPHIOrDbgOrAlloca() const {
	const Instruction *FirstNonPHI = getFirstNonPHI();
	if (!FirstNonPHI)
	return end();

	const_iterator InsertPt = FirstNonPHI->getIterator();
	if (InsertPt->isEHPad())
	++InsertPt;

	if (isEntryBlock()) {
	const_iterator End = end();
	while (InsertPt != End &&
	(isa<AllocaInst>(InsertPt) \|\| isa<DbgInfoIntrinsic>(InsertPt) \|\|
	isa<PseudoProbeInst>(*InsertPt))) {
	if (const AllocaInst AI = dyn_cast<AllocaInst>(&InsertPt)) {
	if (!AI->isStaticAlloca())
	break;
	}
	++InsertPt;
	}
	}
	return InsertPt;
	}

	void BasicBlock::dropAllReferences() {
	for (Instruction &I : *this)
	I.dropAllReferences();
	}

	const BasicBlock *BasicBlock::getSinglePredecessor() const {
	const_pred_iterator PI = pred_begin(this), E = pred_end(this);
	if (PI == E) return nullptr; // No preds.
	const BasicBlock ThePred = PI;
	++PI;
	return (PI == E) ? ThePred : nullptr /multiple preds/;
	}

	const BasicBlock *BasicBlock::getUniquePredecessor() const {
	const_pred_iterator PI = pred_begin(this), E = pred_end(this);
	if (PI == E) return nullptr; // No preds.
	const BasicBlock PredBB = PI;
	++PI;
	for (;PI != E; ++PI) {
	if (*PI != PredBB)
	return nullptr;
	// The same predecessor appears multiple times in the predecessor list.
	// This is OK.
	}
	return PredBB;
	}

	bool BasicBlock::hasNPredecessors(unsigned N) const {
	return hasNItems(pred_begin(this), pred_end(this), N);
	}

	bool BasicBlock::hasNPredecessorsOrMore(unsigned N) const {
	return hasNItemsOrMore(pred_begin(this), pred_end(this), N);
	}

	const BasicBlock *BasicBlock::getSingleSuccessor() const {
	const_succ_iterator SI = succ_begin(this), E = succ_end(this);
	if (SI == E) return nullptr; // no successors
	const BasicBlock TheSucc = SI;
	++SI;
	return (SI == E) ? TheSucc : nullptr /* multiple successors */;
	}

	const BasicBlock *BasicBlock::getUniqueSuccessor() const {
	const_succ_iterator SI = succ_begin(this), E = succ_end(this);
	if (SI == E) return nullptr; // No successors
	const BasicBlock SuccBB = SI;
	++SI;
	for (;SI != E; ++SI) {
	if (*SI != SuccBB)
	return nullptr;
	// The same successor appears multiple times in the successor list.
	// This is OK.
	}
	return SuccBB;
	}

	iterator_range<BasicBlock::phi_iterator> BasicBlock::phis() {
	PHINode P = empty() ? nullptr : dyn_cast<PHINode>(&begin());
	return make_range<phi_iterator>(P, nullptr);
	}

	void BasicBlock::removePredecessor(BasicBlock *Pred,
	bool KeepOneInputPHIs) {
	// Use hasNUsesOrMore to bound the cost of this assertion for complex CFGs.
	assert((hasNUsesOrMore(16) \|\| llvm::is_contained(predecessors(this), Pred)) &&
	"Pred is not a predecessor!");

	// Return early if there are no PHI nodes to update.
	if (empty() \|\| !isa<PHINode>(begin()))
	return;

	unsigned NumPreds = cast<PHINode>(front()).getNumIncomingValues();
	for (PHINode &Phi : make_early_inc_range(phis())) {
	Phi.removeIncomingValue(Pred, !KeepOneInputPHIs);
	if (KeepOneInputPHIs)
	continue;

	// If we have a single predecessor, removeIncomingValue may have erased the
	// PHI node itself.
	if (NumPreds == 1)
	continue;

	// Try to replace the PHI node with a constant value.
	if (Value *PhiConstant = Phi.hasConstantValue()) {
	Phi.replaceAllUsesWith(PhiConstant);
	Phi.eraseFromParent();
	}
	}
	}

	bool BasicBlock::canSplitPredecessors() const {
	const Instruction *FirstNonPHI = getFirstNonPHI();
	if (isa<LandingPadInst>(FirstNonPHI))
	return true;
	// This is perhaps a little conservative because constructs like
	// CleanupBlockInst are pretty easy to split. However, SplitBlockPredecessors
	// cannot handle such things just yet.
	if (FirstNonPHI->isEHPad())
	return false;
	return true;
	}

	bool BasicBlock::isLegalToHoistInto() const {
	auto *Term = getTerminator();
	// No terminator means the block is under construction.
	if (!Term)
	return true;

	// If the block has no successors, there can be no instructions to hoist.
	assert(Term->getNumSuccessors() > 0);

	// Instructions should not be hoisted across special terminators, which may
	// have side effects or return values.
	return !Term->isSpecialTerminator();
	}

	bool BasicBlock::isEntryBlock() const {
	const Function *F = getParent();
	assert(F && "Block must have a parent function to use this API");
	return this == &F->getEntryBlock();
	}

	BasicBlock *BasicBlock::splitBasicBlock(iterator I, const Twine &BBName,
	bool Before) {
	if (Before)
	return splitBasicBlockBefore(I, BBName);

	assert(getTerminator() && "Can't use splitBasicBlock on degenerate BB!");
	assert(I != InstList.end() &&
	"Trying to get me to create degenerate basic block!");

	BasicBlock *New = BasicBlock::Create(getContext(), BBName, getParent(),
	this->getNextNode());

	// Save DebugLoc of split point before invalidating iterator.
	DebugLoc Loc = I->getStableDebugLoc();
	// Move all of the specified instructions from the original basic block into
	// the new basic block.
	New->splice(New->end(), this, I, end());

	// Add a branch instruction to the newly formed basic block.
	BranchInst *BI = BranchInst::Create(New, this);
	BI->setDebugLoc(Loc);

	// Now we must loop through all of the successors of the New block (which
	// _were_ the successors of the 'this' block), and update any PHI nodes in
	// successors. If there were PHI nodes in the successors, then they need to
	// know that incoming branches will be from New, not from Old (this).
	//
	New->replaceSuccessorsPhiUsesWith(this, New);
	return New;
	}

	BasicBlock *BasicBlock::splitBasicBlockBefore(iterator I, const Twine &BBName) {
	assert(getTerminator() &&
	"Can't use splitBasicBlockBefore on degenerate BB!");
	assert(I != InstList.end() &&
	"Trying to get me to create degenerate basic block!");

	assert((!isa<PHINode>(*I) \|\| getSinglePredecessor()) &&
	"cannot split on multi incoming phis");

	BasicBlock *New = BasicBlock::Create(getContext(), BBName, getParent(), this);
	// Save DebugLoc of split point before invalidating iterator.
	DebugLoc Loc = I->getDebugLoc();
	// Move all of the specified instructions from the original basic block into
	// the new basic block.
	New->splice(New->end(), this, begin(), I);

	// Loop through all of the predecessors of the 'this' block (which will be the
	// predecessors of the New block), replace the specified successor 'this'
	// block to point at the New block and update any PHI nodes in 'this' block.
	// If there were PHI nodes in 'this' block, the PHI nodes are updated
	// to reflect that the incoming branches will be from the New block and not
	// from predecessors of the 'this' block.
	// Save predecessors to separate vector before modifying them.
	SmallVector<BasicBlock *, 4> Predecessors;
	for (BasicBlock *Pred : predecessors(this))
	Predecessors.push_back(Pred);
	for (BasicBlock *Pred : Predecessors) {
	Instruction *TI = Pred->getTerminator();
	TI->replaceSuccessorWith(this, New);
	this->replacePhiUsesWith(Pred, New);
	}
	// Add a branch instruction from "New" to "this" Block.
	BranchInst *BI = BranchInst::Create(this, New);
	BI->setDebugLoc(Loc);

	return New;
	}

	BasicBlock::iterator BasicBlock::erase(BasicBlock::iterator FromIt,
	BasicBlock::iterator ToIt) {
	for (Instruction &I : make_early_inc_range(make_range(FromIt, ToIt)))
	I.eraseFromParent();
	return ToIt;
	}

	void BasicBlock::replacePhiUsesWith(BasicBlock Old, BasicBlock New) {
	// N.B. This might not be a complete BasicBlock, so don't assume
	// that it ends with a non-phi instruction.
	for (Instruction &I : *this) {
	PHINode *PN = dyn_cast<PHINode>(&I);
	if (!PN)
	break;
	PN->replaceIncomingBlockWith(Old, New);
	}
	}

	void BasicBlock::replaceSuccessorsPhiUsesWith(BasicBlock *Old,
	BasicBlock *New) {
	Instruction *TI = getTerminator();
	if (!TI)
	// Cope with being called on a BasicBlock that doesn't have a terminator
	// yet. Clang's CodeGenFunction::EmitReturnBlock() likes to do this.
	return;
	for (BasicBlock *Succ : successors(TI))
	Succ->replacePhiUsesWith(Old, New);
	}

	void BasicBlock::replaceSuccessorsPhiUsesWith(BasicBlock *New) {
	this->replaceSuccessorsPhiUsesWith(this, New);
	}

	bool BasicBlock::isLandingPad() const {
	return isa<LandingPadInst>(getFirstNonPHI());
	}

	const LandingPadInst *BasicBlock::getLandingPadInst() const {
	return dyn_cast<LandingPadInst>(getFirstNonPHI());
	}

	std::optional<uint64_t> BasicBlock::getIrrLoopHeaderWeight() const {
	const Instruction *TI = getTerminator();
	if (MDNode *MDIrrLoopHeader =
	TI->getMetadata(LLVMContext::MD_irr_loop)) {
	MDString *MDName = cast<MDString>(MDIrrLoopHeader->getOperand(0));
	if (MDName->getString() == "loop_header_weight") {
	auto *CI = mdconst::extract<ConstantInt>(MDIrrLoopHeader->getOperand(1));
	return std::optional<uint64_t>(CI->getValue().getZExtValue());
	}
	}
	return std::nullopt;
	}

	BasicBlock::iterator llvm::skipDebugIntrinsics(BasicBlock::iterator It) {
	while (isa<DbgInfoIntrinsic>(It))
	++It;
	return It;
	}

	void BasicBlock::renumberInstructions() {
	unsigned Order = 0;
	for (Instruction &I : *this)
	I.Order = Order++;

	// Set the bit to indicate that the instruction order valid and cached.
	BasicBlockBits Bits = getBasicBlockBits();
	Bits.InstrOrderValid = true;
	setBasicBlockBits(Bits);

	NumInstrRenumberings++;
	}

	void BasicBlock::flushTerminatorDbgRecords() {
	// If we erase the terminator in a block, any DbgRecords will sink and "fall
	// off the end", existing after any terminator that gets inserted. With
	// dbg.value intrinsics we would just insert the terminator at end() and
	// the dbg.values would come before the terminator. With DbgRecords, we must
	// do this manually.
	// To get out of this unfortunate form, whenever we insert a terminator,
	// check whether there's anything trailing at the end and move those
	// DbgRecords in front of the terminator.

	// Do nothing if we're not in new debug-info format.
	if (!IsNewDbgInfoFormat)
	return;

	// If there's no terminator, there's nothing to do.
	Instruction *Term = getTerminator();
	if (!Term)
	return;

	// Are there any dangling DbgRecords?
	DbgMarker *TrailingDbgRecords = getTrailingDbgRecords();
	if (!TrailingDbgRecords)
	return;

	// Transfer DbgRecords from the trailing position onto the terminator.
	createMarker(Term);
	Term->DebugMarker->absorbDebugValues(*TrailingDbgRecords, false);
	TrailingDbgRecords->eraseFromParent();
	deleteTrailingDbgRecords();
	}

	void BasicBlock::spliceDebugInfoEmptyBlock(BasicBlock::iterator Dest,
	BasicBlock *Src,
	BasicBlock::iterator First,
	BasicBlock::iterator Last) {
	// Imagine the folowing:
	//
	// bb1:
	// dbg.value(...
	// ret i32 0
	//
	// If an optimisation pass attempts to splice the contents of the block from
	// BB1->begin() to BB1->getTerminator(), then the dbg.value will be
	// transferred to the destination.
	// However, in the "new" DbgRecord format for debug-info, that range is empty:
	// begin() returns an iterator to the terminator, as there will only be a
	// single instruction in the block. We must piece together from the bits set
	// in the iterators whether there was the intention to transfer any debug
	// info.

	// If we're not in "new" debug-info format, do nothing.
	if (!IsNewDbgInfoFormat)
	return;

	assert(First == Last);
	bool InsertAtHead = Dest.getHeadBit();
	bool ReadFromHead = First.getHeadBit();

	// If the source block is completely empty, including no terminator, then
	// transfer any trailing DbgRecords that are still hanging around. This can
	// occur when a block is optimised away and the terminator has been moved
	// somewhere else.
	if (Src->empty()) {
	DbgMarker *SrcTrailingDbgRecords = Src->getTrailingDbgRecords();
	if (!SrcTrailingDbgRecords)
	return;

	Dest->adoptDbgRecords(Src, Src->end(), InsertAtHead);
	// adoptDbgRecords should have released the trailing DbgRecords.
	assert(!Src->getTrailingDbgRecords());
	return;
	}

	// There are instructions in this block; if the First iterator was
	// with begin() / getFirstInsertionPt() then the caller intended debug-info
	// at the start of the block to be transferred. Return otherwise.
	if (Src->empty() \|\| First != Src->begin() \|\| !ReadFromHead)
	return;

	// Is there actually anything to transfer?
	if (!First->hasDbgRecords())
	return;

	createMarker(Dest)->absorbDebugValues(*First->DebugMarker, InsertAtHead);

	return;
	}

	void BasicBlock::spliceDebugInfo(BasicBlock::iterator Dest, BasicBlock *Src,
	BasicBlock::iterator First,
	BasicBlock::iterator Last) {
	/* Do a quick normalisation before calling the real splice implementation. We
	might be operating on a degenerate basic block that has no instructions
	in it, a legitimate transient state. In that case, Dest will be end() and
	any DbgRecords temporarily stored in the TrailingDbgRecords map in
	LLVMContext. We might illustrate it thus:

	Dest
	\|
	this-block: ~~~~~~~~
	Src-block: ++++B---B---B---B:::C
	\| \|
	First Last

	However: does the caller expect the "~" DbgRecords to end up before or
	after the spliced segment? This is communciated in the "Head" bit of Dest,
	which signals whether the caller called begin() or end() on this block.

	If the head bit is set, then all is well, we leave DbgRecords trailing just
	like how dbg.value instructions would trail after instructions spliced to
	the beginning of this block.

	If the head bit isn't set, then try to jam the "~" DbgRecords onto the
	front of the First instruction, then splice like normal, which joins the
	"~" DbgRecords with the "+" DbgRecords. However if the "+" DbgRecords are
	supposed to be left behind in Src, then:
	* detach the "+" DbgRecords,
	* move the "~" DbgRecords onto First,
	* splice like normal,
	* replace the "+" DbgRecords onto the Last position.
	Complicated, but gets the job done. */

	// If we're inserting at end(), and not in front of dangling DbgRecords, then
	// move the DbgRecords onto "First". They'll then be moved naturally in the
	// splice process.
	DbgMarker *MoreDanglingDbgRecords = nullptr;
	DbgMarker *OurTrailingDbgRecords = getTrailingDbgRecords();
	if (Dest == end() && !Dest.getHeadBit() && OurTrailingDbgRecords) {
	// Are the "+" DbgRecords not supposed to move? If so, detach them
	// temporarily.
	if (!First.getHeadBit() && First->hasDbgRecords()) {
	MoreDanglingDbgRecords = Src->getMarker(First);
	MoreDanglingDbgRecords->removeFromParent();
	}

	if (First->hasDbgRecords()) {
	// Place them at the front, it would look like this:
	// Dest
	// \|
	// this-block:
	// Src-block: ~~~~~~~~++++B---B---B---B:::C
	// \| \|
	// First Last
	First->adoptDbgRecords(this, end(), true);
	} else {
	// No current marker, create one and absorb in. (FIXME: we can avoid an
	// allocation in the future).
	DbgMarker CurMarker = Src->createMarker(&First);
	CurMarker->absorbDebugValues(*OurTrailingDbgRecords, false);
	OurTrailingDbgRecords->eraseFromParent();
	}
	deleteTrailingDbgRecords();
	First.setHeadBit(true);
	}

	// Call the main debug-info-splicing implementation.
	spliceDebugInfoImpl(Dest, Src, First, Last);

	// Do we have some "+" DbgRecords hanging around that weren't supposed to
	// move, and we detached to make things easier?
	if (!MoreDanglingDbgRecords)
	return;

	// FIXME: we could avoid an allocation here sometimes. (adoptDbgRecords
	// requires an iterator).
	DbgMarker *LastMarker = Src->createMarker(Last);
	LastMarker->absorbDebugValues(*MoreDanglingDbgRecords, true);
	MoreDanglingDbgRecords->eraseFromParent();
	}

	void BasicBlock::spliceDebugInfoImpl(BasicBlock::iterator Dest, BasicBlock *Src,
	BasicBlock::iterator First,
	BasicBlock::iterator Last) {
	// Find out where to _place_ these dbg.values; if InsertAtHead is specified,
	// this will be at the start of Dest's debug value range, otherwise this is
	// just Dest's marker.
	bool InsertAtHead = Dest.getHeadBit();
	bool ReadFromHead = First.getHeadBit();
	// Use this flag to signal the abnormal case, where we don't want to copy the
	// DbgRecords ahead of the "Last" position.
	bool ReadFromTail = !Last.getTailBit();
	bool LastIsEnd = (Last == Src->end());

	/*
	Here's an illustration of what we're about to do. We have two blocks, this
	and Src, and two segments of list. Each instruction is marked by a capital
	while potential DbgRecord debug-info is marked out by "-" characters and a
	few other special characters (+:=) where I want to highlight what's going
	on.

	Dest
	\|
	this-block: A----A----A ====A----A----A----A---A---A
	Src-block ++++B---B---B---B:::C
	\| \|
	First Last

	The splice method is going to take all the instructions from First up to
	(but not including) Last and insert them in _front_ of Dest, forming one
	long list. All the DbgRecords attached to instructions _between_ First and
	Last need no maintenence. However, we have to do special things with the
	DbgRecords marked with the +:= characters. We only have three positions:
	should the "+" DbgRecords be transferred, and if so to where? Do we move the
	":" DbgRecords? Would they go in front of the "=" DbgRecords, or should the
	"=" DbgRecords go before "+" DbgRecords?

	We're told which way it should be by the bits carried in the iterators. The
	"Head" bit indicates whether the specified position is supposed to be at the
	front of the attached DbgRecords (true) or not (false). The Tail bit is true
	on the other end of a range: is the range intended to include DbgRecords up
	to the end (false) or not (true).

	FIXME: the tail bit doesn't need to be distinct from the head bit, we could
	combine them.

	Here are some examples of different configurations:

	Dest.Head = true, First.Head = true, Last.Tail = false

	this-block: A----A----A++++B---B---B---B:::====A----A----A----A---A---A
	\| \|
	First Dest

	Wheras if we didn't want to read from the Src list,

	Dest.Head = true, First.Head = false, Last.Tail = false

	this-block: A----A----AB---B---B---B:::====A----A----A----A---A---A
	\| \|
	First Dest

	Or if we didn't want to insert at the head of Dest:

	Dest.Head = false, First.Head = false, Last.Tail = false

	this-block: A----A----A====B---B---B---B:::A----A----A----A---A---A
	\| \|
	First Dest

	Tests for these various configurations can be found in the unit test file
	BasicBlockDbgInfoTest.cpp.

	*/

	// Detach the marker at Dest -- this lets us move the "====" DbgRecords
	// around.
	DbgMarker *DestMarker = nullptr;
	if ((DestMarker = getMarker(Dest))) {
	if (Dest == end()) {
	assert(DestMarker == getTrailingDbgRecords());
	deleteTrailingDbgRecords();
	} else {
	DestMarker->removeFromParent();
	}
	}

	// If we're moving the tail range of DbgRecords (":::"), absorb them into the
	// front of the DbgRecords at Dest.
	if (ReadFromTail && Src->getMarker(Last)) {
	DbgMarker *FromLast = Src->getMarker(Last);
	if (LastIsEnd) {
	- Dest->adoptDbgRecords(Src, Last, true);
	- // adoptDbgRecords will release any trailers.
	+ if (Dest == end()) {
	+ // Abosrb the trailing markers from Src.
	+ assert(FromLast == Src->getTrailingDbgRecords());
	+ createMarker(Dest)->absorbDebugValues(*FromLast, true);
	+ FromLast->eraseFromParent();
	+ Src->deleteTrailingDbgRecords();
	+ } else {
	+ // adoptDbgRecords will release any trailers.
	+ Dest->adoptDbgRecords(Src, Last, true);
	+ }
	assert(!Src->getTrailingDbgRecords());
	} else {
	// FIXME: can we use adoptDbgRecords here to reduce allocations?
	DbgMarker *OntoDest = createMarker(Dest);
	OntoDest->absorbDebugValues(*FromLast, true);
	}
	}

	// If we're _not_ reading from the head of First, i.e. the "++++" DbgRecords,
	// move their markers onto Last. They remain in the Src block. No action
	// needed.
	if (!ReadFromHead && First->hasDbgRecords()) {
	if (Last != Src->end()) {
	Last->adoptDbgRecords(Src, First, true);
	} else {
	DbgMarker *OntoLast = Src->createMarker(Last);
	DbgMarker *FromFirst = Src->createMarker(First);
	// Always insert at front of Last.
	OntoLast->absorbDebugValues(*FromFirst, true);
	}
	}

	// Finally, do something with the "====" DbgRecords we detached.
	if (DestMarker) {
	if (InsertAtHead) {
	// Insert them at the end of the DbgRecords at Dest. The "::::" DbgRecords
	// might be in front of them.
	DbgMarker *NewDestMarker = createMarker(Dest);
	NewDestMarker->absorbDebugValues(*DestMarker, false);
	} else {
	// Insert them right at the start of the range we moved, ahead of First
	// and the "++++" DbgRecords.
	// This also covers the rare circumstance where we insert at end(), and we
	// did not generate the iterator with begin() / getFirstInsertionPt(),
	// meaning any trailing debug-info at the end of the block would
	// "normally" have been pushed in front of "First". We move it there now.
	DbgMarker *FirstMarker = createMarker(First);
	FirstMarker->absorbDebugValues(*DestMarker, true);
	}
	DestMarker->eraseFromParent();
	}
	}

	void BasicBlock::splice(iterator Dest, BasicBlock *Src, iterator First,
	iterator Last) {
	assert(Src->IsNewDbgInfoFormat == IsNewDbgInfoFormat);

	#ifdef EXPENSIVE_CHECKS
	// Check that First is before Last.
	auto FromBBEnd = Src->end();
	for (auto It = First; It != Last; ++It)
	assert(It != FromBBEnd && "FromBeginIt not before FromEndIt!");
	#endif // EXPENSIVE_CHECKS

	// Lots of horrible special casing for empty transfers: the dbg.values between
	// two positions could be spliced in dbg.value mode.
	if (First == Last) {
	spliceDebugInfoEmptyBlock(Dest, Src, First, Last);
	return;
	}

	// Handle non-instr debug-info specific juggling.
	if (IsNewDbgInfoFormat)
	spliceDebugInfo(Dest, Src, First, Last);

	// And move the instructions.
	getInstList().splice(Dest, Src->getInstList(), First, Last);

	flushTerminatorDbgRecords();
	}

	void BasicBlock::insertDbgRecordAfter(DbgRecord DR, Instruction I) {
	assert(IsNewDbgInfoFormat);
	assert(I->getParent() == this);

	iterator NextIt = std::next(I->getIterator());
	DbgMarker *NextMarker = createMarker(NextIt);
	NextMarker->insertDbgRecord(DR, true);
	}

	void BasicBlock::insertDbgRecordBefore(DbgRecord *DR,
	InstListType::iterator Where) {
	assert(Where == end() \|\| Where->getParent() == this);
	bool InsertAtHead = Where.getHeadBit();
	DbgMarker *M = createMarker(Where);
	M->insertDbgRecord(DR, InsertAtHead);
	}

	DbgMarker BasicBlock::getNextMarker(Instruction I) {
	return getMarker(std::next(I->getIterator()));
	}

	DbgMarker *BasicBlock::getMarker(InstListType::iterator It) {
	if (It == end()) {
	DbgMarker *DM = getTrailingDbgRecords();
	return DM;
	}
	return It->DebugMarker;
	}

	void BasicBlock::reinsertInstInDbgRecords(
	Instruction *I, std::optional<DbgRecord::self_iterator> Pos) {
	// "I" was originally removed from a position where it was
	// immediately in front of Pos. Any DbgRecords on that position then "fell
	// down" onto Pos. "I" has been re-inserted at the front of that wedge of
	// DbgRecords, shuffle them around to represent the original positioning. To
	// illustrate:
	//
	// Instructions: I1---I---I0
	// DbgRecords: DDD DDD
	//
	// Instruction "I" removed,
	//
	// Instructions: I1------I0
	// DbgRecords: DDDDDD
	// ^Pos
	//
	// Instruction "I" re-inserted (now):
	//
	// Instructions: I1---I------I0
	// DbgRecords: DDDDDD
	// ^Pos
	//
	// After this method completes:
	//
	// Instructions: I1---I---I0
	// DbgRecords: DDD DDD

	// This happens if there were no DbgRecords on I0. Are there now DbgRecords
	// there?
	if (!Pos) {
	DbgMarker *NextMarker = getNextMarker(I);
	if (!NextMarker)
	return;
	if (NextMarker->StoredDbgRecords.empty())
	return;
	// There are DbgMarkers there now -- they fell down from "I".
	DbgMarker *ThisMarker = createMarker(I);
	ThisMarker->absorbDebugValues(*NextMarker, false);
	return;
	}

	// Is there even a range of DbgRecords to move?
	DbgMarker DM = (Pos)->getMarker();
	auto Range = make_range(DM->StoredDbgRecords.begin(), (*Pos));
	if (Range.begin() == Range.end())
	return;

	// Otherwise: splice.
	DbgMarker *ThisMarker = createMarker(I);
	assert(ThisMarker->StoredDbgRecords.empty());
	ThisMarker->absorbDebugValues(Range, *DM, true);
	}

	#ifndef NDEBUG
	/// In asserts builds, this checks the numbering. In non-asserts builds, it
	/// is defined as a no-op inline function in BasicBlock.h.
	void BasicBlock::validateInstrOrdering() const {
	if (!isInstrOrderValid())
	return;
	const Instruction *Prev = nullptr;
	for (const Instruction &I : *this) {
	assert((!Prev \|\| Prev->comesBefore(&I)) &&
	"cached instruction ordering is incorrect");
	Prev = &I;
	}
	}
	#endif

	void BasicBlock::setTrailingDbgRecords(DbgMarker *foo) {
	getContext().pImpl->setTrailingDbgRecords(this, foo);
	}

	DbgMarker *BasicBlock::getTrailingDbgRecords() {
	return getContext().pImpl->getTrailingDbgRecords(this);
	}

	void BasicBlock::deleteTrailingDbgRecords() {
	getContext().pImpl->deleteTrailingDbgRecords(this);
	}
	diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
	index ba46ededc63a..87e057a468af 100644
	--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
	+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
	@@ -1,5219 +1,5186 @@
	//===- AArch64FrameLowering.cpp - AArch64 Frame Lowering -------- C++ --====//
	//
	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
	// See https://llvm.org/LICENSE.txt for license information.
	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
	//
	//===----------------------------------------------------------------------===//
	//
	// This file contains the AArch64 implementation of TargetFrameLowering class.
	//
	// On AArch64, stack frames are structured as follows:
	//
	// The stack grows downward.
	//
	// All of the individual frame areas on the frame below are optional, i.e. it's
	// possible to create a function so that the particular area isn't present
	// in the frame.
	//
	// At function entry, the "frame" looks as follows:
	//
	// \| \| Higher address
	// \|-----------------------------------\|
	// \| \|
	// \| arguments passed on the stack \|
	// \| \|
	// \|-----------------------------------\| <- sp
	// \| \| Lower address
	//
	//
	// After the prologue has run, the frame has the following general structure.
	// Note that this doesn't depict the case where a red-zone is used. Also,
	// technically the last frame area (VLAs) doesn't get created until in the
	// main function body, after the prologue is run. However, it's depicted here
	// for completeness.
	//
	// \| \| Higher address
	// \|-----------------------------------\|
	// \| \|
	// \| arguments passed on the stack \|
	// \| \|
	// \|-----------------------------------\|
	// \| \|
	// \| (Win64 only) varargs from reg \|
	// \| \|
	// \|-----------------------------------\|
	// \| \|
	// \| callee-saved gpr registers \| <--.
	// \| \| \| On Darwin platforms these
	// \|- - - - - - - - - - - - - - - - - -\| \| callee saves are swapped,
	// \| prev_lr \| \| (frame record first)
	// \| prev_fp \| <--'
	// \| async context if needed \|
	// \| (a.k.a. "frame record") \|
	// \|-----------------------------------\| <- fp(=x29)
	// \| <hazard padding> \|
	// \|-----------------------------------\|
	// \| \|
	// \| callee-saved fp/simd/SVE regs \|
	// \| \|
	// \|-----------------------------------\|
	// \| \|
	// \| SVE stack objects \|
	// \| \|
	// \|-----------------------------------\|
	// \|.empty.space.to.make.part.below....\|
	// \|.aligned.in.case.it.needs.more.than\| (size of this area is unknown at
	// \|.the.standard.16-byte.alignment....\| compile time; if present)
	// \|-----------------------------------\|
	// \| local variables of fixed size \|
	// \| including spill slots \|
	// \| <FPR> \|
	// \| <hazard padding> \|
	// \| <GPR> \|
	// \|-----------------------------------\| <- bp(not defined by ABI,
	// \|.variable-sized.local.variables....\| LLVM chooses X19)
	// \|.(VLAs)............................\| (size of this area is unknown at
	// \|...................................\| compile time)
	// \|-----------------------------------\| <- sp
	// \| \| Lower address
	//
	//
	// To access the data in a frame, at-compile time, a constant offset must be
	// computable from one of the pointers (fp, bp, sp) to access it. The size
	// of the areas with a dotted background cannot be computed at compile-time
	// if they are present, making it required to have all three of fp, bp and
	// sp to be set up to be able to access all contents in the frame areas,
	// assuming all of the frame areas are non-empty.
	//
	// For most functions, some of the frame areas are empty. For those functions,
	// it may not be necessary to set up fp or bp:
	// * A base pointer is definitely needed when there are both VLAs and local
	// variables with more-than-default alignment requirements.
	// * A frame pointer is definitely needed when there are local variables with
	// more-than-default alignment requirements.
	//
	// For Darwin platforms the frame-record (fp, lr) is stored at the top of the
	// callee-saved area, since the unwind encoding does not allow for encoding
	// this dynamically and existing tools depend on this layout. For other
	// platforms, the frame-record is stored at the bottom of the (gpr) callee-saved
	// area to allow SVE stack objects (allocated directly below the callee-saves,
	// if available) to be accessed directly from the framepointer.
	// The SVE spill/fill instructions have VL-scaled addressing modes such
	// as:
	// ldr z8, [fp, #-7 mul vl]
	// For SVE the size of the vector length (VL) is not known at compile-time, so
	// '#-7 mul vl' is an offset that can only be evaluated at runtime. With this
	// layout, we don't need to add an unscaled offset to the framepointer before
	// accessing the SVE object in the frame.
	//
	// In some cases when a base pointer is not strictly needed, it is generated
	// anyway when offsets from the frame pointer to access local variables become
	// so large that the offset can't be encoded in the immediate fields of loads
	// or stores.
	//
	// Outgoing function arguments must be at the bottom of the stack frame when
	// calling another function. If we do not have variable-sized stack objects, we
	// can allocate a "reserved call frame" area at the bottom of the local
	// variable area, large enough for all outgoing calls. If we do have VLAs, then
	// the stack pointer must be decremented and incremented around each call to
	// make space for the arguments below the VLAs.
	//
	// FIXME: also explain the redzone concept.
	//
	// About stack hazards: Under some SME contexts, a coprocessor with its own
	// separate cache can used for FP operations. This can create hazards if the CPU
	// and the SME unit try to access the same area of memory, including if the
	// access is to an area of the stack. To try to alleviate this we attempt to
	// introduce extra padding into the stack frame between FP and GPR accesses,
	// controlled by the StackHazardSize option. Without changing the layout of the
	// stack frame in the diagram above, a stack object of size StackHazardSize is
	// added between GPR and FPR CSRs. Another is added to the stack objects
	// section, and stack objects are sorted so that FPR > Hazard padding slot >
	// GPRs (where possible). Unfortunately some things are not handled well (VLA
	// area, arguments on the stack, object with both GPR and FPR accesses), but if
	// those are controlled by the user then the entire stack frame becomes GPR at
	// the start/end with FPR in the middle, surrounded by Hazard padding.
	//
	// An example of the prologue:
	//
	// .globl __foo
	// .align 2
	// __foo:
	// Ltmp0:
	// .cfi_startproc
	// .cfi_personality 155, ___gxx_personality_v0
	// Leh_func_begin:
	// .cfi_lsda 16, Lexception33
	//
	// stp xa,bx, [sp, -#offset]!
	// ...
	// stp x28, x27, [sp, #offset-32]
	// stp fp, lr, [sp, #offset-16]
	// add fp, sp, #offset - 16
	// sub sp, sp, #1360
	//
	// The Stack:
	// +-------------------------------------------+
	// 10000 \| ........ \| ........ \| ........ \| ........ \|
	// 10004 \| ........ \| ........ \| ........ \| ........ \|
	// +-------------------------------------------+
	// 10008 \| ........ \| ........ \| ........ \| ........ \|
	// 1000c \| ........ \| ........ \| ........ \| ........ \|
	// +===========================================+
	// 10010 \| X28 Register \|
	// 10014 \| X28 Register \|
	// +-------------------------------------------+
	// 10018 \| X27 Register \|
	// 1001c \| X27 Register \|
	// +===========================================+
	// 10020 \| Frame Pointer \|
	// 10024 \| Frame Pointer \|
	// +-------------------------------------------+
	// 10028 \| Link Register \|
	// 1002c \| Link Register \|
	// +===========================================+
	// 10030 \| ........ \| ........ \| ........ \| ........ \|
	// 10034 \| ........ \| ........ \| ........ \| ........ \|
	// +-------------------------------------------+
	// 10038 \| ........ \| ........ \| ........ \| ........ \|
	// 1003c \| ........ \| ........ \| ........ \| ........ \|
	// +-------------------------------------------+
	//
	// [sp] = 10030 :: >>initial value<<
	// sp = 10020 :: stp fp, lr, [sp, #-16]!
	// fp = sp == 10020 :: mov fp, sp
	// [sp] == 10020 :: stp x28, x27, [sp, #-16]!
	// sp == 10010 :: >>final value<<
	//
	// The frame pointer (w29) points to address 10020. If we use an offset of
	// '16' from 'w29', we get the CFI offsets of -8 for w30, -16 for w29, -24
	// for w27, and -32 for w28:
	//
	// Ltmp1:
	// .cfi_def_cfa w29, 16
	// Ltmp2:
	// .cfi_offset w30, -8
	// Ltmp3:
	// .cfi_offset w29, -16
	// Ltmp4:
	// .cfi_offset w27, -24
	// Ltmp5:
	// .cfi_offset w28, -32
	//
	//===----------------------------------------------------------------------===//

	#include "AArch64FrameLowering.h"
	#include "AArch64InstrInfo.h"
	#include "AArch64MachineFunctionInfo.h"
	#include "AArch64RegisterInfo.h"
	#include "AArch64Subtarget.h"
	#include "AArch64TargetMachine.h"
	#include "MCTargetDesc/AArch64AddressingModes.h"
	#include "MCTargetDesc/AArch64MCTargetDesc.h"
	#include "llvm/ADT/ScopeExit.h"
	#include "llvm/ADT/SmallVector.h"
	#include "llvm/ADT/Statistic.h"
	#include "llvm/Analysis/ValueTracking.h"
	#include "llvm/CodeGen/LivePhysRegs.h"
	#include "llvm/CodeGen/MachineBasicBlock.h"
	#include "llvm/CodeGen/MachineFrameInfo.h"
	#include "llvm/CodeGen/MachineFunction.h"
	#include "llvm/CodeGen/MachineInstr.h"
	#include "llvm/CodeGen/MachineInstrBuilder.h"
	#include "llvm/CodeGen/MachineMemOperand.h"
	#include "llvm/CodeGen/MachineModuleInfo.h"
	#include "llvm/CodeGen/MachineOperand.h"
	#include "llvm/CodeGen/MachineRegisterInfo.h"
	#include "llvm/CodeGen/RegisterScavenging.h"
	#include "llvm/CodeGen/TargetInstrInfo.h"
	#include "llvm/CodeGen/TargetRegisterInfo.h"
	#include "llvm/CodeGen/TargetSubtargetInfo.h"
	#include "llvm/CodeGen/WinEHFuncInfo.h"
	#include "llvm/IR/Attributes.h"
	#include "llvm/IR/CallingConv.h"
	#include "llvm/IR/DataLayout.h"
	#include "llvm/IR/DebugLoc.h"
	#include "llvm/IR/Function.h"
	#include "llvm/MC/MCAsmInfo.h"
	#include "llvm/MC/MCDwarf.h"
	#include "llvm/Support/CommandLine.h"
	#include "llvm/Support/Debug.h"
	#include "llvm/Support/ErrorHandling.h"
	#include "llvm/Support/FormatVariadic.h"
	#include "llvm/Support/MathExtras.h"
	#include "llvm/Support/raw_ostream.h"
	#include "llvm/Target/TargetMachine.h"
	#include "llvm/Target/TargetOptions.h"
	#include <cassert>
	#include <cstdint>
	#include <iterator>
	#include <optional>
	#include <vector>

	using namespace llvm;

	#define DEBUG_TYPE "frame-info"

	static cl::opt<bool> EnableRedZone("aarch64-redzone",
	cl::desc("enable use of redzone on AArch64"),
	cl::init(false), cl::Hidden);

	static cl::opt<bool> StackTaggingMergeSetTag(
	"stack-tagging-merge-settag",
	cl::desc("merge settag instruction in function epilog"), cl::init(true),
	cl::Hidden);

	static cl::opt<bool> OrderFrameObjects("aarch64-order-frame-objects",
	cl::desc("sort stack allocations"),
	cl::init(true), cl::Hidden);

	cl::opt<bool> EnableHomogeneousPrologEpilog(
	"homogeneous-prolog-epilog", cl::Hidden,
	cl::desc("Emit homogeneous prologue and epilogue for the size "
	"optimization (default = off)"));

	// Stack hazard padding size. 0 = disabled.
	static cl::opt<unsigned> StackHazardSize("aarch64-stack-hazard-size",
	cl::init(0), cl::Hidden);
	// Stack hazard size for analysis remarks. StackHazardSize takes precedence.
	static cl::opt<unsigned>
	StackHazardRemarkSize("aarch64-stack-hazard-remark-size", cl::init(0),
	cl::Hidden);
	// Whether to insert padding into non-streaming functions (for testing).
	static cl::opt<bool>
	StackHazardInNonStreaming("aarch64-stack-hazard-in-non-streaming",
	cl::init(false), cl::Hidden);

	STATISTIC(NumRedZoneFunctions, "Number of functions using red zone");

	/// Returns how much of the incoming argument stack area (in bytes) we should
	/// clean up in an epilogue. For the C calling convention this will be 0, for
	/// guaranteed tail call conventions it can be positive (a normal return or a
	/// tail call to a function that uses less stack space for arguments) or
	/// negative (for a tail call to a function that needs more stack space than us
	/// for arguments).
	static int64_t getArgumentStackToRestore(MachineFunction &MF,
	MachineBasicBlock &MBB) {
	MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
	AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
	bool IsTailCallReturn = (MBB.end() != MBBI)
	? AArch64InstrInfo::isTailCallReturnInst(*MBBI)
	: false;

	int64_t ArgumentPopSize = 0;
	if (IsTailCallReturn) {
	MachineOperand &StackAdjust = MBBI->getOperand(1);

	// For a tail-call in a callee-pops-arguments environment, some or all of
	// the stack may actually be in use for the call's arguments, this is
	// calculated during LowerCall and consumed here...
	ArgumentPopSize = StackAdjust.getImm();
	} else {
	// ... otherwise the amount to pop is all of the argument space,
	// conveniently stored in the MachineFunctionInfo by
	// LowerFormalArguments. This will, of course, be zero for the C calling
	// convention.
	ArgumentPopSize = AFI->getArgumentStackToRestore();
	}

	return ArgumentPopSize;
	}

	static bool produceCompactUnwindFrame(MachineFunction &MF);
	static bool needsWinCFI(const MachineFunction &MF);
	static StackOffset getSVEStackSize(const MachineFunction &MF);
	static Register findScratchNonCalleeSaveRegister(MachineBasicBlock *MBB);

	/// Returns true if a homogeneous prolog or epilog code can be emitted
	/// for the size optimization. If possible, a frame helper call is injected.
	/// When Exit block is given, this check is for epilog.
	bool AArch64FrameLowering::homogeneousPrologEpilog(
	MachineFunction &MF, MachineBasicBlock *Exit) const {
	if (!MF.getFunction().hasMinSize())
	return false;
	if (!EnableHomogeneousPrologEpilog)
	return false;
	if (EnableRedZone)
	return false;

	// TODO: Window is supported yet.
	if (needsWinCFI(MF))
	return false;
	// TODO: SVE is not supported yet.
	if (getSVEStackSize(MF))
	return false;

	// Bail on stack adjustment needed on return for simplicity.
	const MachineFrameInfo &MFI = MF.getFrameInfo();
	const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo();
	if (MFI.hasVarSizedObjects() \|\| RegInfo->hasStackRealignment(MF))
	return false;
	if (Exit && getArgumentStackToRestore(MF, *Exit))
	return false;

	auto *AFI = MF.getInfo<AArch64FunctionInfo>();
	if (AFI->hasSwiftAsyncContext() \|\| AFI->hasStreamingModeChanges())
	return false;

	// If there are an odd number of GPRs before LR and FP in the CSRs list,
	// they will not be paired into one RegPairInfo, which is incompatible with
	// the assumption made by the homogeneous prolog epilog pass.
	const MCPhysReg *CSRegs = MF.getRegInfo().getCalleeSavedRegs();
	unsigned NumGPRs = 0;
	for (unsigned I = 0; CSRegs[I]; ++I) {
	Register Reg = CSRegs[I];
	if (Reg == AArch64::LR) {
	assert(CSRegs[I + 1] == AArch64::FP);
	if (NumGPRs % 2 != 0)
	return false;
	break;
	}
	if (AArch64::GPR64RegClass.contains(Reg))
	++NumGPRs;
	}

	return true;
	}

	/// Returns true if CSRs should be paired.
	bool AArch64FrameLowering::producePairRegisters(MachineFunction &MF) const {
	return produceCompactUnwindFrame(MF) \|\| homogeneousPrologEpilog(MF);
	}

	/// This is the biggest offset to the stack pointer we can encode in aarch64
	/// instructions (without using a separate calculation and a temp register).
	/// Note that the exception here are vector stores/loads which cannot encode any
	/// displacements (see estimateRSStackSizeLimit(), isAArch64FrameOffsetLegal()).
	static const unsigned DefaultSafeSPDisplacement = 255;

	/// Look at each instruction that references stack frames and return the stack
	/// size limit beyond which some of these instructions will require a scratch
	/// register during their expansion later.
	static unsigned estimateRSStackSizeLimit(MachineFunction &MF) {
	// FIXME: For now, just conservatively guestimate based on unscaled indexing
	// range. We'll end up allocating an unnecessary spill slot a lot, but
	// realistically that's not a big deal at this stage of the game.
	for (MachineBasicBlock &MBB : MF) {
	for (MachineInstr &MI : MBB) {
	if (MI.isDebugInstr() \|\| MI.isPseudo() \|\|
	MI.getOpcode() == AArch64::ADDXri \|\|
	MI.getOpcode() == AArch64::ADDSXri)
	continue;

	for (const MachineOperand &MO : MI.operands()) {
	if (!MO.isFI())
	continue;

	StackOffset Offset;
	if (isAArch64FrameOffsetLegal(MI, Offset, nullptr, nullptr, nullptr) ==
	AArch64FrameOffsetCannotUpdate)
	return 0;
	}
	}
	}
	return DefaultSafeSPDisplacement;
	}

	TargetStackID::Value
	AArch64FrameLowering::getStackIDForScalableVectors() const {
	return TargetStackID::ScalableVector;
	}

	/// Returns the size of the fixed object area (allocated next to sp on entry)
	/// On Win64 this may include a var args area and an UnwindHelp object for EH.
	static unsigned getFixedObjectSize(const MachineFunction &MF,
	const AArch64FunctionInfo *AFI, bool IsWin64,
	bool IsFunclet) {
	if (!IsWin64 \|\| IsFunclet) {
	return AFI->getTailCallReservedStack();
	} else {
	if (AFI->getTailCallReservedStack() != 0 &&
	!MF.getFunction().getAttributes().hasAttrSomewhere(
	Attribute::SwiftAsync))
	report_fatal_error("cannot generate ABI-changing tail call for Win64");
	// Var args are stored here in the primary function.
	const unsigned VarArgsArea = AFI->getVarArgsGPRSize();
	// To support EH funclets we allocate an UnwindHelp object
	const unsigned UnwindHelpObject = (MF.hasEHFunclets() ? 8 : 0);
	return AFI->getTailCallReservedStack() +
	alignTo(VarArgsArea + UnwindHelpObject, 16);
	}
	}

	/// Returns the size of the entire SVE stackframe (calleesaves + spills).
	static StackOffset getSVEStackSize(const MachineFunction &MF) {
	const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
	return StackOffset::getScalable((int64_t)AFI->getStackSizeSVE());
	}

	bool AArch64FrameLowering::canUseRedZone(const MachineFunction &MF) const {
	if (!EnableRedZone)
	return false;

	// Don't use the red zone if the function explicitly asks us not to.
	// This is typically used for kernel code.
	const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
	const unsigned RedZoneSize =
	Subtarget.getTargetLowering()->getRedZoneSize(MF.getFunction());
	if (!RedZoneSize)
	return false;

	const MachineFrameInfo &MFI = MF.getFrameInfo();
	const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
	uint64_t NumBytes = AFI->getLocalStackSize();

	// If neither NEON or SVE are available, a COPY from one Q-reg to
	// another requires a spill -> reload sequence. We can do that
	// using a pre-decrementing store/post-decrementing load, but
	// if we do so, we can't use the Red Zone.
	bool LowerQRegCopyThroughMem = Subtarget.hasFPARMv8() &&
	!Subtarget.isNeonAvailable() &&
	!Subtarget.hasSVE();

	return !(MFI.hasCalls() \|\| hasFP(MF) \|\| NumBytes > RedZoneSize \|\|
	getSVEStackSize(MF) \|\| LowerQRegCopyThroughMem);
	}

	/// hasFP - Return true if the specified function should have a dedicated frame
	/// pointer register.
	bool AArch64FrameLowering::hasFP(const MachineFunction &MF) const {
	const MachineFrameInfo &MFI = MF.getFrameInfo();
	const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo();

	// Win64 EH requires a frame pointer if funclets are present, as the locals
	// are accessed off the frame pointer in both the parent function and the
	// funclets.
	if (MF.hasEHFunclets())
	return true;
	// Retain behavior of always omitting the FP for leaf functions when possible.
	if (MF.getTarget().Options.DisableFramePointerElim(MF))
	return true;
	if (MFI.hasVarSizedObjects() \|\| MFI.isFrameAddressTaken() \|\|
	MFI.hasStackMap() \|\| MFI.hasPatchPoint() \|\|
	RegInfo->hasStackRealignment(MF))
	return true;
	// With large callframes around we may need to use FP to access the scavenging
	// emergency spillslot.
	//
	// Unfortunately some calls to hasFP() like machine verifier ->
	// getReservedReg() -> hasFP in the middle of global isel are too early
	// to know the max call frame size. Hopefully conservatively returning "true"
	// in those cases is fine.
	// DefaultSafeSPDisplacement is fine as we only emergency spill GP regs.
	if (!MFI.isMaxCallFrameSizeComputed() \|\|
	MFI.getMaxCallFrameSize() > DefaultSafeSPDisplacement)
	return true;

	return false;
	}

	/// hasReservedCallFrame - Under normal circumstances, when a frame pointer is
	/// not required, we reserve argument space for call sites in the function
	/// immediately on entry to the current function. This eliminates the need for
	/// add/sub sp brackets around call sites. Returns true if the call frame is
	/// included as part of the stack frame.
	bool AArch64FrameLowering::hasReservedCallFrame(
	const MachineFunction &MF) const {
	// The stack probing code for the dynamically allocated outgoing arguments
	// area assumes that the stack is probed at the top - either by the prologue
	// code, which issues a probe if `hasVarSizedObjects` return true, or by the
	// most recent variable-sized object allocation. Changing the condition here
	// may need to be followed up by changes to the probe issuing logic.
	return !MF.getFrameInfo().hasVarSizedObjects();
	}

	MachineBasicBlock::iterator AArch64FrameLowering::eliminateCallFramePseudoInstr(
	MachineFunction &MF, MachineBasicBlock &MBB,
	MachineBasicBlock::iterator I) const {
	const AArch64InstrInfo *TII =
	static_cast<const AArch64InstrInfo *>(MF.getSubtarget().getInstrInfo());
	const AArch64TargetLowering *TLI =
	MF.getSubtarget<AArch64Subtarget>().getTargetLowering();
	[[maybe_unused]] MachineFrameInfo &MFI = MF.getFrameInfo();
	DebugLoc DL = I->getDebugLoc();
	unsigned Opc = I->getOpcode();
	bool IsDestroy = Opc == TII->getCallFrameDestroyOpcode();
	uint64_t CalleePopAmount = IsDestroy ? I->getOperand(1).getImm() : 0;

	if (!hasReservedCallFrame(MF)) {
	int64_t Amount = I->getOperand(0).getImm();
	Amount = alignTo(Amount, getStackAlign());
	if (!IsDestroy)
	Amount = -Amount;

	// N.b. if CalleePopAmount is valid but zero (i.e. callee would pop, but it
	// doesn't have to pop anything), then the first operand will be zero too so
	// this adjustment is a no-op.
	if (CalleePopAmount == 0) {
	// FIXME: in-function stack adjustment for calls is limited to 24-bits
	// because there's no guaranteed temporary register available.
	//
	// ADD/SUB (immediate) has only LSL #0 and LSL #12 available.
	// 1) For offset <= 12-bit, we use LSL #0
	// 2) For 12-bit <= offset <= 24-bit, we use two instructions. One uses
	// LSL #0, and the other uses LSL #12.
	//
	// Most call frames will be allocated at the start of a function so
	// this is OK, but it is a limitation that needs dealing with.
	assert(Amount > -0xffffff && Amount < 0xffffff && "call frame too large");

	if (TLI->hasInlineStackProbe(MF) &&
	-Amount >= AArch64::StackProbeMaxUnprobedStack) {
	// When stack probing is enabled, the decrement of SP may need to be
	// probed. We only need to do this if the call site needs 1024 bytes of
	// space or more, because a region smaller than that is allowed to be
	// unprobed at an ABI boundary. We rely on the fact that SP has been
	// probed exactly at this point, either by the prologue or most recent
	// dynamic allocation.
	assert(MFI.hasVarSizedObjects() &&
	"non-reserved call frame without var sized objects?");
	Register ScratchReg =
	MF.getRegInfo().createVirtualRegister(&AArch64::GPR64RegClass);
	inlineStackProbeFixed(I, ScratchReg, -Amount, StackOffset::get(0, 0));
	} else {
	emitFrameOffset(MBB, I, DL, AArch64::SP, AArch64::SP,
	StackOffset::getFixed(Amount), TII);
	}
	}
	} else if (CalleePopAmount != 0) {
	// If the calling convention demands that the callee pops arguments from the
	// stack, we want to add it back if we have a reserved call frame.
	assert(CalleePopAmount < 0xffffff && "call frame too large");
	emitFrameOffset(MBB, I, DL, AArch64::SP, AArch64::SP,
	StackOffset::getFixed(-(int64_t)CalleePopAmount), TII);
	}
	return MBB.erase(I);
	}

	void AArch64FrameLowering::emitCalleeSavedGPRLocations(
	MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) const {
	MachineFunction &MF = *MBB.getParent();
	MachineFrameInfo &MFI = MF.getFrameInfo();
	AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
	SMEAttrs Attrs(MF.getFunction());
	bool LocallyStreaming =
	Attrs.hasStreamingBody() && !Attrs.hasStreamingInterface();

	const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
	if (CSI.empty())
	return;

	const TargetSubtargetInfo &STI = MF.getSubtarget();
	const TargetRegisterInfo &TRI = *STI.getRegisterInfo();
	const TargetInstrInfo &TII = *STI.getInstrInfo();
	DebugLoc DL = MBB.findDebugLoc(MBBI);

	for (const auto &Info : CSI) {
	unsigned FrameIdx = Info.getFrameIdx();
	if (MFI.getStackID(FrameIdx) == TargetStackID::ScalableVector)
	continue;

	assert(!Info.isSpilledToReg() && "Spilling to registers not implemented");
	int64_t DwarfReg = TRI.getDwarfRegNum(Info.getReg(), true);
	int64_t Offset = MFI.getObjectOffset(FrameIdx) - getOffsetOfLocalArea();

	// The location of VG will be emitted before each streaming-mode change in
	// the function. Only locally-streaming functions require emitting the
	// non-streaming VG location here.
	if ((LocallyStreaming && FrameIdx == AFI->getStreamingVGIdx()) \|\|
	(!LocallyStreaming &&
	DwarfReg == TRI.getDwarfRegNum(AArch64::VG, true)))
	continue;

	unsigned CFIIndex = MF.addFrameInst(
	MCCFIInstruction::createOffset(nullptr, DwarfReg, Offset));
	BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
	.addCFIIndex(CFIIndex)
	.setMIFlags(MachineInstr::FrameSetup);
	}
	}

	void AArch64FrameLowering::emitCalleeSavedSVELocations(
	MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) const {
	MachineFunction &MF = *MBB.getParent();
	MachineFrameInfo &MFI = MF.getFrameInfo();

	// Add callee saved registers to move list.
	const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
	if (CSI.empty())
	return;

	const TargetSubtargetInfo &STI = MF.getSubtarget();
	const TargetRegisterInfo &TRI = *STI.getRegisterInfo();
	const TargetInstrInfo &TII = *STI.getInstrInfo();
	DebugLoc DL = MBB.findDebugLoc(MBBI);
	AArch64FunctionInfo &AFI = *MF.getInfo<AArch64FunctionInfo>();

	for (const auto &Info : CSI) {
	if (!(MFI.getStackID(Info.getFrameIdx()) == TargetStackID::ScalableVector))
	continue;

	// Not all unwinders may know about SVE registers, so assume the lowest
	// common demoninator.
	assert(!Info.isSpilledToReg() && "Spilling to registers not implemented");
	unsigned Reg = Info.getReg();
	if (!static_cast<const AArch64RegisterInfo &>(TRI).regNeedsCFI(Reg, Reg))
	continue;

	StackOffset Offset =
	StackOffset::getScalable(MFI.getObjectOffset(Info.getFrameIdx())) -
	StackOffset::getFixed(AFI.getCalleeSavedStackSize(MFI));

	unsigned CFIIndex = MF.addFrameInst(createCFAOffset(TRI, Reg, Offset));
	BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
	.addCFIIndex(CFIIndex)
	.setMIFlags(MachineInstr::FrameSetup);
	}
	}

	static void insertCFISameValue(const MCInstrDesc &Desc, MachineFunction &MF,
	MachineBasicBlock &MBB,
	MachineBasicBlock::iterator InsertPt,
	unsigned DwarfReg) {
	unsigned CFIIndex =
	MF.addFrameInst(MCCFIInstruction::createSameValue(nullptr, DwarfReg));
	BuildMI(MBB, InsertPt, DebugLoc(), Desc).addCFIIndex(CFIIndex);
	}

	void AArch64FrameLowering::resetCFIToInitialState(
	MachineBasicBlock &MBB) const {

	MachineFunction &MF = *MBB.getParent();
	const auto &Subtarget = MF.getSubtarget<AArch64Subtarget>();
	const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
	const auto &TRI =
	static_cast<const AArch64RegisterInfo &>(*Subtarget.getRegisterInfo());
	const auto &MFI = *MF.getInfo<AArch64FunctionInfo>();

	const MCInstrDesc &CFIDesc = TII.get(TargetOpcode::CFI_INSTRUCTION);
	DebugLoc DL;

	// Reset the CFA to `SP + 0`.
	MachineBasicBlock::iterator InsertPt = MBB.begin();
	unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::cfiDefCfa(
	nullptr, TRI.getDwarfRegNum(AArch64::SP, true), 0));
	BuildMI(MBB, InsertPt, DL, CFIDesc).addCFIIndex(CFIIndex);

	// Flip the RA sign state.
	if (MFI.shouldSignReturnAddress(MF)) {
	CFIIndex = MF.addFrameInst(MCCFIInstruction::createNegateRAState(nullptr));
	BuildMI(MBB, InsertPt, DL, CFIDesc).addCFIIndex(CFIIndex);
	}

	// Shadow call stack uses X18, reset it.
	if (MFI.needsShadowCallStackPrologueEpilogue(MF))
	insertCFISameValue(CFIDesc, MF, MBB, InsertPt,
	TRI.getDwarfRegNum(AArch64::X18, true));

	// Emit .cfi_same_value for callee-saved registers.
	const std::vector<CalleeSavedInfo> &CSI =
	MF.getFrameInfo().getCalleeSavedInfo();
	for (const auto &Info : CSI) {
	unsigned Reg = Info.getReg();
	if (!TRI.regNeedsCFI(Reg, Reg))
	continue;
	insertCFISameValue(CFIDesc, MF, MBB, InsertPt,
	TRI.getDwarfRegNum(Reg, true));
	}
	}

	static void emitCalleeSavedRestores(MachineBasicBlock &MBB,
	MachineBasicBlock::iterator MBBI,
	bool SVE) {
	MachineFunction &MF = *MBB.getParent();
	MachineFrameInfo &MFI = MF.getFrameInfo();

	const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
	if (CSI.empty())
	return;

	const TargetSubtargetInfo &STI = MF.getSubtarget();
	const TargetRegisterInfo &TRI = *STI.getRegisterInfo();
	const TargetInstrInfo &TII = *STI.getInstrInfo();
	DebugLoc DL = MBB.findDebugLoc(MBBI);

	for (const auto &Info : CSI) {
	if (SVE !=
	(MFI.getStackID(Info.getFrameIdx()) == TargetStackID::ScalableVector))
	continue;

	unsigned Reg = Info.getReg();
	if (SVE &&
	!static_cast<const AArch64RegisterInfo &>(TRI).regNeedsCFI(Reg, Reg))
	continue;

	if (!Info.isRestored())
	continue;

	unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createRestore(
	nullptr, TRI.getDwarfRegNum(Info.getReg(), true)));
	BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
	.addCFIIndex(CFIIndex)
	.setMIFlags(MachineInstr::FrameDestroy);
	}
	}

	void AArch64FrameLowering::emitCalleeSavedGPRRestores(
	MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) const {
	emitCalleeSavedRestores(MBB, MBBI, false);
	}

	void AArch64FrameLowering::emitCalleeSavedSVERestores(
	MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) const {
	emitCalleeSavedRestores(MBB, MBBI, true);
	}

	// Return the maximum possible number of bytes for `Size` due to the
	// architectural limit on the size of a SVE register.
	static int64_t upperBound(StackOffset Size) {
	static const int64_t MAX_BYTES_PER_SCALABLE_BYTE = 16;
	return Size.getScalable() * MAX_BYTES_PER_SCALABLE_BYTE + Size.getFixed();
	}

	void AArch64FrameLowering::allocateStackSpace(
	MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
	int64_t RealignmentPadding, StackOffset AllocSize, bool NeedsWinCFI,
	bool *HasWinCFI, bool EmitCFI, StackOffset InitialOffset,
	bool FollowupAllocs) const {

	if (!AllocSize)
	return;

	DebugLoc DL;
	MachineFunction &MF = *MBB.getParent();
	const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
	const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
	AArch64FunctionInfo &AFI = *MF.getInfo<AArch64FunctionInfo>();
	const MachineFrameInfo &MFI = MF.getFrameInfo();

	const int64_t MaxAlign = MFI.getMaxAlign().value();
	const uint64_t AndMask = ~(MaxAlign - 1);

	if (!Subtarget.getTargetLowering()->hasInlineStackProbe(MF)) {
	Register TargetReg = RealignmentPadding
	? findScratchNonCalleeSaveRegister(&MBB)
	: AArch64::SP;
	// SUB Xd/SP, SP, AllocSize
	emitFrameOffset(MBB, MBBI, DL, TargetReg, AArch64::SP, -AllocSize, &TII,
	MachineInstr::FrameSetup, false, NeedsWinCFI, HasWinCFI,
	EmitCFI, InitialOffset);

	if (RealignmentPadding) {
	// AND SP, X9, 0b11111...0000
	BuildMI(MBB, MBBI, DL, TII.get(AArch64::ANDXri), AArch64::SP)
	.addReg(TargetReg, RegState::Kill)
	.addImm(AArch64_AM::encodeLogicalImmediate(AndMask, 64))
	.setMIFlags(MachineInstr::FrameSetup);
	AFI.setStackRealigned(true);

	// No need for SEH instructions here; if we're realigning the stack,
	// we've set a frame pointer and already finished the SEH prologue.
	assert(!NeedsWinCFI);
	}
	return;
	}

	//
	// Stack probing allocation.
	//

	// Fixed length allocation. If we don't need to re-align the stack and don't
	// have SVE objects, we can use a more efficient sequence for stack probing.
	if (AllocSize.getScalable() == 0 && RealignmentPadding == 0) {
	Register ScratchReg = findScratchNonCalleeSaveRegister(&MBB);
	assert(ScratchReg != AArch64::NoRegister);
	BuildMI(MBB, MBBI, DL, TII.get(AArch64::PROBED_STACKALLOC))
	.addDef(ScratchReg)
	.addImm(AllocSize.getFixed())
	.addImm(InitialOffset.getFixed())
	.addImm(InitialOffset.getScalable());
	// The fixed allocation may leave unprobed bytes at the top of the
	// stack. If we have subsequent alocation (e.g. if we have variable-sized
	// objects), we need to issue an extra probe, so these allocations start in
	// a known state.
	if (FollowupAllocs) {
	// STR XZR, [SP]
	BuildMI(MBB, MBBI, DL, TII.get(AArch64::STRXui))
	.addReg(AArch64::XZR)
	.addReg(AArch64::SP)
	.addImm(0)
	.setMIFlags(MachineInstr::FrameSetup);
	}

	return;
	}

	// Variable length allocation.

	// If the (unknown) allocation size cannot exceed the probe size, decrement
	// the stack pointer right away.
	int64_t ProbeSize = AFI.getStackProbeSize();
	if (upperBound(AllocSize) + RealignmentPadding <= ProbeSize) {
	Register ScratchReg = RealignmentPadding
	? findScratchNonCalleeSaveRegister(&MBB)
	: AArch64::SP;
	assert(ScratchReg != AArch64::NoRegister);
	// SUB Xd, SP, AllocSize
	emitFrameOffset(MBB, MBBI, DL, ScratchReg, AArch64::SP, -AllocSize, &TII,
	MachineInstr::FrameSetup, false, NeedsWinCFI, HasWinCFI,
	EmitCFI, InitialOffset);
	if (RealignmentPadding) {
	// AND SP, Xn, 0b11111...0000
	BuildMI(MBB, MBBI, DL, TII.get(AArch64::ANDXri), AArch64::SP)
	.addReg(ScratchReg, RegState::Kill)
	.addImm(AArch64_AM::encodeLogicalImmediate(AndMask, 64))
	.setMIFlags(MachineInstr::FrameSetup);
	AFI.setStackRealigned(true);
	}
	if (FollowupAllocs \|\| upperBound(AllocSize) + RealignmentPadding >
	AArch64::StackProbeMaxUnprobedStack) {
	// STR XZR, [SP]
	BuildMI(MBB, MBBI, DL, TII.get(AArch64::STRXui))
	.addReg(AArch64::XZR)
	.addReg(AArch64::SP)
	.addImm(0)
	.setMIFlags(MachineInstr::FrameSetup);
	}
	return;
	}

	// Emit a variable-length allocation probing loop.
	// TODO: As an optimisation, the loop can be "unrolled" into a few parts,
	// each of them guaranteed to adjust the stack by less than the probe size.
	Register TargetReg = findScratchNonCalleeSaveRegister(&MBB);
	assert(TargetReg != AArch64::NoRegister);
	// SUB Xd, SP, AllocSize
	emitFrameOffset(MBB, MBBI, DL, TargetReg, AArch64::SP, -AllocSize, &TII,
	MachineInstr::FrameSetup, false, NeedsWinCFI, HasWinCFI,
	EmitCFI, InitialOffset);
	if (RealignmentPadding) {
	// AND Xn, Xn, 0b11111...0000
	BuildMI(MBB, MBBI, DL, TII.get(AArch64::ANDXri), TargetReg)
	.addReg(TargetReg, RegState::Kill)
	.addImm(AArch64_AM::encodeLogicalImmediate(AndMask, 64))
	.setMIFlags(MachineInstr::FrameSetup);
	}

	BuildMI(MBB, MBBI, DL, TII.get(AArch64::PROBED_STACKALLOC_VAR))
	.addReg(TargetReg);
	if (EmitCFI) {
	// Set the CFA register back to SP.
	unsigned Reg =
	Subtarget.getRegisterInfo()->getDwarfRegNum(AArch64::SP, true);
	unsigned CFIIndex =
	MF.addFrameInst(MCCFIInstruction::createDefCfaRegister(nullptr, Reg));
	BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
	.addCFIIndex(CFIIndex)
	.setMIFlags(MachineInstr::FrameSetup);
	}
	if (RealignmentPadding)
	AFI.setStackRealigned(true);
	}

	static MCRegister getRegisterOrZero(MCRegister Reg, bool HasSVE) {
	switch (Reg.id()) {
	default:
	// The called routine is expected to preserve r19-r28
	// r29 and r30 are used as frame pointer and link register resp.
	return 0;

	// GPRs
	#define CASE(n) \
	case AArch64::W##n: \
	case AArch64::X##n: \
	return AArch64::X##n
	CASE(0);
	CASE(1);
	CASE(2);
	CASE(3);
	CASE(4);
	CASE(5);
	CASE(6);
	CASE(7);
	CASE(8);
	CASE(9);
	CASE(10);
	CASE(11);
	CASE(12);
	CASE(13);
	CASE(14);
	CASE(15);
	CASE(16);
	CASE(17);
	CASE(18);
	#undef CASE

	// FPRs
	#define CASE(n) \
	case AArch64::B##n: \
	case AArch64::H##n: \
	case AArch64::S##n: \
	case AArch64::D##n: \
	case AArch64::Q##n: \
	return HasSVE ? AArch64::Z##n : AArch64::Q##n
	CASE(0);
	CASE(1);
	CASE(2);
	CASE(3);
	CASE(4);
	CASE(5);
	CASE(6);
	CASE(7);
	CASE(8);
	CASE(9);
	CASE(10);
	CASE(11);
	CASE(12);
	CASE(13);
	CASE(14);
	CASE(15);
	CASE(16);
	CASE(17);
	CASE(18);
	CASE(19);
	CASE(20);
	CASE(21);
	CASE(22);
	CASE(23);
	CASE(24);
	CASE(25);
	CASE(26);
	CASE(27);
	CASE(28);
	CASE(29);
	CASE(30);
	CASE(31);
	#undef CASE
	}
	}

	void AArch64FrameLowering::emitZeroCallUsedRegs(BitVector RegsToZero,
	MachineBasicBlock &MBB) const {
	// Insertion point.
	MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator();

	// Fake a debug loc.
	DebugLoc DL;
	if (MBBI != MBB.end())
	DL = MBBI->getDebugLoc();

	const MachineFunction &MF = *MBB.getParent();
	const AArch64Subtarget &STI = MF.getSubtarget<AArch64Subtarget>();
	const AArch64RegisterInfo &TRI = *STI.getRegisterInfo();

	BitVector GPRsToZero(TRI.getNumRegs());
	BitVector FPRsToZero(TRI.getNumRegs());
	bool HasSVE = STI.hasSVE();
	for (MCRegister Reg : RegsToZero.set_bits()) {
	if (TRI.isGeneralPurposeRegister(MF, Reg)) {
	// For GPRs, we only care to clear out the 64-bit register.
	if (MCRegister XReg = getRegisterOrZero(Reg, HasSVE))
	GPRsToZero.set(XReg);
	} else if (AArch64InstrInfo::isFpOrNEON(Reg)) {
	// For FPRs,
	if (MCRegister XReg = getRegisterOrZero(Reg, HasSVE))
	FPRsToZero.set(XReg);
	}
	}

	const AArch64InstrInfo &TII = *STI.getInstrInfo();

	// Zero out GPRs.
	for (MCRegister Reg : GPRsToZero.set_bits())
	TII.buildClearRegister(Reg, MBB, MBBI, DL);

	// Zero out FP/vector registers.
	for (MCRegister Reg : FPRsToZero.set_bits())
	TII.buildClearRegister(Reg, MBB, MBBI, DL);

	if (HasSVE) {
	for (MCRegister PReg :
	{AArch64::P0, AArch64::P1, AArch64::P2, AArch64::P3, AArch64::P4,
	AArch64::P5, AArch64::P6, AArch64::P7, AArch64::P8, AArch64::P9,
	AArch64::P10, AArch64::P11, AArch64::P12, AArch64::P13, AArch64::P14,
	AArch64::P15}) {
	if (RegsToZero[PReg])
	BuildMI(MBB, MBBI, DL, TII.get(AArch64::PFALSE), PReg);
	}
	}
	}

	static void getLiveRegsForEntryMBB(LivePhysRegs &LiveRegs,
	const MachineBasicBlock &MBB) {
	const MachineFunction *MF = MBB.getParent();
	LiveRegs.addLiveIns(MBB);
	// Mark callee saved registers as used so we will not choose them.
	const MCPhysReg *CSRegs = MF->getRegInfo().getCalleeSavedRegs();
	for (unsigned i = 0; CSRegs[i]; ++i)
	LiveRegs.addReg(CSRegs[i]);
	}

	// Find a scratch register that we can use at the start of the prologue to
	// re-align the stack pointer. We avoid using callee-save registers since they
	// may appear to be free when this is called from canUseAsPrologue (during
	// shrink wrapping), but then no longer be free when this is called from
	// emitPrologue.
	//
	// FIXME: This is a bit conservative, since in the above case we could use one
	// of the callee-save registers as a scratch temp to re-align the stack pointer,
	// but we would then have to make sure that we were in fact saving at least one
	// callee-save register in the prologue, which is additional complexity that
	// doesn't seem worth the benefit.
	static Register findScratchNonCalleeSaveRegister(MachineBasicBlock *MBB) {
	MachineFunction *MF = MBB->getParent();

	// If MBB is an entry block, use X9 as the scratch register
	// preserve_none functions may be using X9 to pass arguments,
	// so prefer to pick an available register below.
	if (&MF->front() == MBB &&
	MF->getFunction().getCallingConv() != CallingConv::PreserveNone)
	return AArch64::X9;

	const AArch64Subtarget &Subtarget = MF->getSubtarget<AArch64Subtarget>();
	const AArch64RegisterInfo &TRI = *Subtarget.getRegisterInfo();
	LivePhysRegs LiveRegs(TRI);
	getLiveRegsForEntryMBB(LiveRegs, *MBB);

	// Prefer X9 since it was historically used for the prologue scratch reg.
	const MachineRegisterInfo &MRI = MF->getRegInfo();
	if (LiveRegs.available(MRI, AArch64::X9))
	return AArch64::X9;

	for (unsigned Reg : AArch64::GPR64RegClass) {
	if (LiveRegs.available(MRI, Reg))
	return Reg;
	}
	return AArch64::NoRegister;
	}

	bool AArch64FrameLowering::canUseAsPrologue(
	const MachineBasicBlock &MBB) const {
	const MachineFunction *MF = MBB.getParent();
	MachineBasicBlock TmpMBB = const_cast<MachineBasicBlock >(&MBB);
	const AArch64Subtarget &Subtarget = MF->getSubtarget<AArch64Subtarget>();
	const AArch64RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
	const AArch64TargetLowering *TLI = Subtarget.getTargetLowering();
	const AArch64FunctionInfo *AFI = MF->getInfo<AArch64FunctionInfo>();

	if (AFI->hasSwiftAsyncContext()) {
	const AArch64RegisterInfo &TRI = *Subtarget.getRegisterInfo();
	const MachineRegisterInfo &MRI = MF->getRegInfo();
	LivePhysRegs LiveRegs(TRI);
	getLiveRegsForEntryMBB(LiveRegs, MBB);
	// The StoreSwiftAsyncContext clobbers X16 and X17. Make sure they are
	// available.
	if (!LiveRegs.available(MRI, AArch64::X16) \|\|
	!LiveRegs.available(MRI, AArch64::X17))
	return false;
	}

	// Certain stack probing sequences might clobber flags, then we can't use
	// the block as a prologue if the flags register is a live-in.
	if (MF->getInfo<AArch64FunctionInfo>()->hasStackProbing() &&
	MBB.isLiveIn(AArch64::NZCV))
	return false;

	// Don't need a scratch register if we're not going to re-align the stack or
	// emit stack probes.
	if (!RegInfo->hasStackRealignment(MF) && !TLI->hasInlineStackProbe(MF))
	return true;
	// Otherwise, we can use any block as long as it has a scratch register
	// available.
	return findScratchNonCalleeSaveRegister(TmpMBB) != AArch64::NoRegister;
	}

	static bool windowsRequiresStackProbe(MachineFunction &MF,
	uint64_t StackSizeInBytes) {
	const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
	const AArch64FunctionInfo &MFI = *MF.getInfo<AArch64FunctionInfo>();
	// TODO: When implementing stack protectors, take that into account
	// for the probe threshold.
	return Subtarget.isTargetWindows() && MFI.hasStackProbing() &&
	StackSizeInBytes >= uint64_t(MFI.getStackProbeSize());
	}

	static bool needsWinCFI(const MachineFunction &MF) {
	const Function &F = MF.getFunction();
	return MF.getTarget().getMCAsmInfo()->usesWindowsCFI() &&
	F.needsUnwindTableEntry();
	}

	bool AArch64FrameLowering::shouldCombineCSRLocalStackBump(
	MachineFunction &MF, uint64_t StackBumpBytes) const {
	AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
	const MachineFrameInfo &MFI = MF.getFrameInfo();
	const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
	const AArch64RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
	if (homogeneousPrologEpilog(MF))
	return false;

	if (AFI->getLocalStackSize() == 0)
	return false;

	// For WinCFI, if optimizing for size, prefer to not combine the stack bump
	// (to force a stp with predecrement) to match the packed unwind format,
	// provided that there actually are any callee saved registers to merge the
	// decrement with.
	// This is potentially marginally slower, but allows using the packed
	// unwind format for functions that both have a local area and callee saved
	// registers. Using the packed unwind format notably reduces the size of
	// the unwind info.
	if (needsWinCFI(MF) && AFI->getCalleeSavedStackSize() > 0 &&
	MF.getFunction().hasOptSize())
	return false;

	// 512 is the maximum immediate for stp/ldp that will be used for
	// callee-save save/restores
	if (StackBumpBytes >= 512 \|\| windowsRequiresStackProbe(MF, StackBumpBytes))
	return false;

	if (MFI.hasVarSizedObjects())
	return false;

	if (RegInfo->hasStackRealignment(MF))
	return false;

	// This isn't strictly necessary, but it simplifies things a bit since the
	// current RedZone handling code assumes the SP is adjusted by the
	// callee-save save/restore code.
	if (canUseRedZone(MF))
	return false;

	// When there is an SVE area on the stack, always allocate the
	// callee-saves and spills/locals separately.
	if (getSVEStackSize(MF))
	return false;

	return true;
	}

	bool AArch64FrameLowering::shouldCombineCSRLocalStackBumpInEpilogue(
	MachineBasicBlock &MBB, unsigned StackBumpBytes) const {
	if (!shouldCombineCSRLocalStackBump(*MBB.getParent(), StackBumpBytes))
	return false;

	if (MBB.empty())
	return true;

	// Disable combined SP bump if the last instruction is an MTE tag store. It
	// is almost always better to merge SP adjustment into those instructions.
	MachineBasicBlock::iterator LastI = MBB.getFirstTerminator();
	MachineBasicBlock::iterator Begin = MBB.begin();
	while (LastI != Begin) {
	--LastI;
	if (LastI->isTransient())
	continue;
	if (!LastI->getFlag(MachineInstr::FrameDestroy))
	break;
	}
	switch (LastI->getOpcode()) {
	case AArch64::STGloop:
	case AArch64::STZGloop:
	case AArch64::STGi:
	case AArch64::STZGi:
	case AArch64::ST2Gi:
	case AArch64::STZ2Gi:
	return false;
	default:
	return true;
	}
	llvm_unreachable("unreachable");
	}

	// Given a load or a store instruction, generate an appropriate unwinding SEH
	// code on Windows.
	static MachineBasicBlock::iterator InsertSEH(MachineBasicBlock::iterator MBBI,
	const TargetInstrInfo &TII,
	MachineInstr::MIFlag Flag) {
	unsigned Opc = MBBI->getOpcode();
	MachineBasicBlock *MBB = MBBI->getParent();
	MachineFunction &MF = *MBB->getParent();
	DebugLoc DL = MBBI->getDebugLoc();
	unsigned ImmIdx = MBBI->getNumOperands() - 1;
	int Imm = MBBI->getOperand(ImmIdx).getImm();
	MachineInstrBuilder MIB;
	const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
	const AArch64RegisterInfo *RegInfo = Subtarget.getRegisterInfo();

	switch (Opc) {
	default:
	llvm_unreachable("No SEH Opcode for this instruction");
	case AArch64::LDPDpost:
	Imm = -Imm;
	[[fallthrough]];
	case AArch64::STPDpre: {
	unsigned Reg0 = RegInfo->getSEHRegNum(MBBI->getOperand(1).getReg());
	unsigned Reg1 = RegInfo->getSEHRegNum(MBBI->getOperand(2).getReg());
	MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveFRegP_X))
	.addImm(Reg0)
	.addImm(Reg1)
	.addImm(Imm * 8)
	.setMIFlag(Flag);
	break;
	}
	case AArch64::LDPXpost:
	Imm = -Imm;
	[[fallthrough]];
	case AArch64::STPXpre: {
	Register Reg0 = MBBI->getOperand(1).getReg();
	Register Reg1 = MBBI->getOperand(2).getReg();
	if (Reg0 == AArch64::FP && Reg1 == AArch64::LR)
	MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveFPLR_X))
	.addImm(Imm * 8)
	.setMIFlag(Flag);
	else
	MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveRegP_X))
	.addImm(RegInfo->getSEHRegNum(Reg0))
	.addImm(RegInfo->getSEHRegNum(Reg1))
	.addImm(Imm * 8)
	.setMIFlag(Flag);
	break;
	}
	case AArch64::LDRDpost:
	Imm = -Imm;
	[[fallthrough]];
	case AArch64::STRDpre: {
	unsigned Reg = RegInfo->getSEHRegNum(MBBI->getOperand(1).getReg());
	MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveFReg_X))
	.addImm(Reg)
	.addImm(Imm)
	.setMIFlag(Flag);
	break;
	}
	case AArch64::LDRXpost:
	Imm = -Imm;
	[[fallthrough]];
	case AArch64::STRXpre: {
	unsigned Reg = RegInfo->getSEHRegNum(MBBI->getOperand(1).getReg());
	MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveReg_X))
	.addImm(Reg)
	.addImm(Imm)
	.setMIFlag(Flag);
	break;
	}
	case AArch64::STPDi:
	case AArch64::LDPDi: {
	unsigned Reg0 = RegInfo->getSEHRegNum(MBBI->getOperand(0).getReg());
	unsigned Reg1 = RegInfo->getSEHRegNum(MBBI->getOperand(1).getReg());
	MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveFRegP))
	.addImm(Reg0)
	.addImm(Reg1)
	.addImm(Imm * 8)
	.setMIFlag(Flag);
	break;
	}
	case AArch64::STPXi:
	case AArch64::LDPXi: {
	Register Reg0 = MBBI->getOperand(0).getReg();
	Register Reg1 = MBBI->getOperand(1).getReg();
	if (Reg0 == AArch64::FP && Reg1 == AArch64::LR)
	MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveFPLR))
	.addImm(Imm * 8)
	.setMIFlag(Flag);
	else
	MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveRegP))
	.addImm(RegInfo->getSEHRegNum(Reg0))
	.addImm(RegInfo->getSEHRegNum(Reg1))
	.addImm(Imm * 8)
	.setMIFlag(Flag);
	break;
	}
	case AArch64::STRXui:
	case AArch64::LDRXui: {
	int Reg = RegInfo->getSEHRegNum(MBBI->getOperand(0).getReg());
	MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveReg))
	.addImm(Reg)
	.addImm(Imm * 8)
	.setMIFlag(Flag);
	break;
	}
	case AArch64::STRDui:
	case AArch64::LDRDui: {
	unsigned Reg = RegInfo->getSEHRegNum(MBBI->getOperand(0).getReg());
	MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveFReg))
	.addImm(Reg)
	.addImm(Imm * 8)
	.setMIFlag(Flag);
	break;
	}
	case AArch64::STPQi:
	case AArch64::LDPQi: {
	unsigned Reg0 = RegInfo->getSEHRegNum(MBBI->getOperand(0).getReg());
	unsigned Reg1 = RegInfo->getSEHRegNum(MBBI->getOperand(1).getReg());
	MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveAnyRegQP))
	.addImm(Reg0)
	.addImm(Reg1)
	.addImm(Imm * 16)
	.setMIFlag(Flag);
	break;
	}
	case AArch64::LDPQpost:
	Imm = -Imm;
	[[fallthrough]];
	case AArch64::STPQpre: {
	unsigned Reg0 = RegInfo->getSEHRegNum(MBBI->getOperand(1).getReg());
	unsigned Reg1 = RegInfo->getSEHRegNum(MBBI->getOperand(2).getReg());
	MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveAnyRegQPX))
	.addImm(Reg0)
	.addImm(Reg1)
	.addImm(Imm * 16)
	.setMIFlag(Flag);
	break;
	}
	}
	auto I = MBB->insertAfter(MBBI, MIB);
	return I;
	}

	// Fix up the SEH opcode associated with the save/restore instruction.
	static void fixupSEHOpcode(MachineBasicBlock::iterator MBBI,
	unsigned LocalStackSize) {
	MachineOperand *ImmOpnd = nullptr;
	unsigned ImmIdx = MBBI->getNumOperands() - 1;
	switch (MBBI->getOpcode()) {
	default:
	llvm_unreachable("Fix the offset in the SEH instruction");
	case AArch64::SEH_SaveFPLR:
	case AArch64::SEH_SaveRegP:
	case AArch64::SEH_SaveReg:
	case AArch64::SEH_SaveFRegP:
	case AArch64::SEH_SaveFReg:
	case AArch64::SEH_SaveAnyRegQP:
	case AArch64::SEH_SaveAnyRegQPX:
	ImmOpnd = &MBBI->getOperand(ImmIdx);
	break;
	}
	if (ImmOpnd)
	ImmOpnd->setImm(ImmOpnd->getImm() + LocalStackSize);
	}

	bool requiresGetVGCall(MachineFunction &MF) {
	AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
	return AFI->hasStreamingModeChanges() &&
	!MF.getSubtarget<AArch64Subtarget>().hasSVE();
	}

	bool isVGInstruction(MachineBasicBlock::iterator MBBI) {
	unsigned Opc = MBBI->getOpcode();
	if (Opc == AArch64::CNTD_XPiI \|\| Opc == AArch64::RDSVLI_XI \|\|
	Opc == AArch64::UBFMXri)
	return true;

	if (requiresGetVGCall(*MBBI->getMF())) {
	if (Opc == AArch64::ORRXrr)
	return true;

	if (Opc == AArch64::BL) {
	auto Op1 = MBBI->getOperand(0);
	return Op1.isSymbol() &&
	(StringRef(Op1.getSymbolName()) == "__arm_get_current_vg");
	}
	}

	return false;
	}

	// Convert callee-save register save/restore instruction to do stack pointer
	// decrement/increment to allocate/deallocate the callee-save stack area by
	// converting store/load to use pre/post increment version.
	static MachineBasicBlock::iterator convertCalleeSaveRestoreToSPPrePostIncDec(
	MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
	const DebugLoc &DL, const TargetInstrInfo *TII, int CSStackSizeInc,
	bool NeedsWinCFI, bool *HasWinCFI, bool EmitCFI,
	MachineInstr::MIFlag FrameFlag = MachineInstr::FrameSetup,
	int CFAOffset = 0) {
	unsigned NewOpc;

	// If the function contains streaming mode changes, we expect instructions
	// to calculate the value of VG before spilling. For locally-streaming
	// functions, we need to do this for both the streaming and non-streaming
	// vector length. Move past these instructions if necessary.
	MachineFunction &MF = *MBB.getParent();
	AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
	if (AFI->hasStreamingModeChanges())
	while (isVGInstruction(MBBI))
	++MBBI;

	switch (MBBI->getOpcode()) {
	default:
	llvm_unreachable("Unexpected callee-save save/restore opcode!");
	case AArch64::STPXi:
	NewOpc = AArch64::STPXpre;
	break;
	case AArch64::STPDi:
	NewOpc = AArch64::STPDpre;
	break;
	case AArch64::STPQi:
	NewOpc = AArch64::STPQpre;
	break;
	case AArch64::STRXui:
	NewOpc = AArch64::STRXpre;
	break;
	case AArch64::STRDui:
	NewOpc = AArch64::STRDpre;
	break;
	case AArch64::STRQui:
	NewOpc = AArch64::STRQpre;
	break;
	case AArch64::LDPXi:
	NewOpc = AArch64::LDPXpost;
	break;
	case AArch64::LDPDi:
	NewOpc = AArch64::LDPDpost;
	break;
	case AArch64::LDPQi:
	NewOpc = AArch64::LDPQpost;
	break;
	case AArch64::LDRXui:
	NewOpc = AArch64::LDRXpost;
	break;
	case AArch64::LDRDui:
	NewOpc = AArch64::LDRDpost;
	break;
	case AArch64::LDRQui:
	NewOpc = AArch64::LDRQpost;
	break;
	}
	// Get rid of the SEH code associated with the old instruction.
	if (NeedsWinCFI) {
	auto SEH = std::next(MBBI);
	if (AArch64InstrInfo::isSEHInstruction(*SEH))
	SEH->eraseFromParent();
	}

	TypeSize Scale = TypeSize::getFixed(1), Width = TypeSize::getFixed(0);
	int64_t MinOffset, MaxOffset;
	bool Success = static_cast<const AArch64InstrInfo *>(TII)->getMemOpInfo(
	NewOpc, Scale, Width, MinOffset, MaxOffset);
	(void)Success;
	assert(Success && "unknown load/store opcode");

	// If the first store isn't right where we want SP then we can't fold the
	// update in so create a normal arithmetic instruction instead.
	if (MBBI->getOperand(MBBI->getNumOperands() - 1).getImm() != 0 \|\|
	CSStackSizeInc < MinOffset \|\| CSStackSizeInc > MaxOffset) {
	// If we are destroying the frame, make sure we add the increment after the
	// last frame operation.
	if (FrameFlag == MachineInstr::FrameDestroy)
	++MBBI;
	emitFrameOffset(MBB, MBBI, DL, AArch64::SP, AArch64::SP,
	StackOffset::getFixed(CSStackSizeInc), TII, FrameFlag,
	false, false, nullptr, EmitCFI,
	StackOffset::getFixed(CFAOffset));

	return std::prev(MBBI);
	}

	MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII->get(NewOpc));
	MIB.addReg(AArch64::SP, RegState::Define);

	// Copy all operands other than the immediate offset.
	unsigned OpndIdx = 0;
	for (unsigned OpndEnd = MBBI->getNumOperands() - 1; OpndIdx < OpndEnd;
	++OpndIdx)
	MIB.add(MBBI->getOperand(OpndIdx));

	assert(MBBI->getOperand(OpndIdx).getImm() == 0 &&
	"Unexpected immediate offset in first/last callee-save save/restore "
	"instruction!");
	assert(MBBI->getOperand(OpndIdx - 1).getReg() == AArch64::SP &&
	"Unexpected base register in callee-save save/restore instruction!");
	assert(CSStackSizeInc % Scale == 0);
	MIB.addImm(CSStackSizeInc / (int)Scale);

	MIB.setMIFlags(MBBI->getFlags());
	MIB.setMemRefs(MBBI->memoperands());

	// Generate a new SEH code that corresponds to the new instruction.
	if (NeedsWinCFI) {
	*HasWinCFI = true;
	InsertSEH(MIB, TII, FrameFlag);
	}

	if (EmitCFI) {
	unsigned CFIIndex = MF.addFrameInst(
	MCCFIInstruction::cfiDefCfaOffset(nullptr, CFAOffset - CSStackSizeInc));
	BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
	.addCFIIndex(CFIIndex)
	.setMIFlags(FrameFlag);
	}

	return std::prev(MBB.erase(MBBI));
	}

	// Fixup callee-save register save/restore instructions to take into account
	// combined SP bump by adding the local stack size to the stack offsets.
	static void fixupCalleeSaveRestoreStackOffset(MachineInstr &MI,
	uint64_t LocalStackSize,
	bool NeedsWinCFI,
	bool *HasWinCFI) {
	if (AArch64InstrInfo::isSEHInstruction(MI))
	return;

	unsigned Opc = MI.getOpcode();
	unsigned Scale;
	switch (Opc) {
	case AArch64::STPXi:
	case AArch64::STRXui:
	case AArch64::STPDi:
	case AArch64::STRDui:
	case AArch64::LDPXi:
	case AArch64::LDRXui:
	case AArch64::LDPDi:
	case AArch64::LDRDui:
	Scale = 8;
	break;
	case AArch64::STPQi:
	case AArch64::STRQui:
	case AArch64::LDPQi:
	case AArch64::LDRQui:
	Scale = 16;
	break;
	default:
	llvm_unreachable("Unexpected callee-save save/restore opcode!");
	}

	unsigned OffsetIdx = MI.getNumExplicitOperands() - 1;
	assert(MI.getOperand(OffsetIdx - 1).getReg() == AArch64::SP &&
	"Unexpected base register in callee-save save/restore instruction!");
	// Last operand is immediate offset that needs fixing.
	MachineOperand &OffsetOpnd = MI.getOperand(OffsetIdx);
	// All generated opcodes have scaled offsets.
	assert(LocalStackSize % Scale == 0);
	OffsetOpnd.setImm(OffsetOpnd.getImm() + LocalStackSize / Scale);

	if (NeedsWinCFI) {
	*HasWinCFI = true;
	auto MBBI = std::next(MachineBasicBlock::iterator(MI));
	assert(MBBI != MI.getParent()->end() && "Expecting a valid instruction");
	assert(AArch64InstrInfo::isSEHInstruction(*MBBI) &&
	"Expecting a SEH instruction");
	fixupSEHOpcode(MBBI, LocalStackSize);
	}
	}

	static bool isTargetWindows(const MachineFunction &MF) {
	return MF.getSubtarget<AArch64Subtarget>().isTargetWindows();
	}

	// Convenience function to determine whether I is an SVE callee save.
	static bool IsSVECalleeSave(MachineBasicBlock::iterator I) {
	switch (I->getOpcode()) {
	default:
	return false;
	case AArch64::PTRUE_C_B:
	case AArch64::LD1B_2Z_IMM:
	case AArch64::ST1B_2Z_IMM:
	case AArch64::STR_ZXI:
	case AArch64::STR_PXI:
	case AArch64::LDR_ZXI:
	case AArch64::LDR_PXI:
	return I->getFlag(MachineInstr::FrameSetup) \|\|
	I->getFlag(MachineInstr::FrameDestroy);
	}
	}

	static void emitShadowCallStackPrologue(const TargetInstrInfo &TII,
	MachineFunction &MF,
	MachineBasicBlock &MBB,
	MachineBasicBlock::iterator MBBI,
	const DebugLoc &DL, bool NeedsWinCFI,
	bool NeedsUnwindInfo) {
	// Shadow call stack prolog: str x30, [x18], #8
	BuildMI(MBB, MBBI, DL, TII.get(AArch64::STRXpost))
	.addReg(AArch64::X18, RegState::Define)
	.addReg(AArch64::LR)
	.addReg(AArch64::X18)
	.addImm(8)
	.setMIFlag(MachineInstr::FrameSetup);

	// This instruction also makes x18 live-in to the entry block.
	MBB.addLiveIn(AArch64::X18);

	if (NeedsWinCFI)
	BuildMI(MBB, MBBI, DL, TII.get(AArch64::SEH_Nop))
	.setMIFlag(MachineInstr::FrameSetup);

	if (NeedsUnwindInfo) {
	// Emit a CFI instruction that causes 8 to be subtracted from the value of
	// x18 when unwinding past this frame.
	static const char CFIInst[] = {
	dwarf::DW_CFA_val_expression,
	18, // register
	2, // length
	static_cast<char>(unsigned(dwarf::DW_OP_breg18)),
	static_cast<char>(-8) & 0x7f, // addend (sleb128)
	};
	unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createEscape(
	nullptr, StringRef(CFIInst, sizeof(CFIInst))));
	BuildMI(MBB, MBBI, DL, TII.get(AArch64::CFI_INSTRUCTION))
	.addCFIIndex(CFIIndex)
	.setMIFlag(MachineInstr::FrameSetup);
	}
	}

	static void emitShadowCallStackEpilogue(const TargetInstrInfo &TII,
	MachineFunction &MF,
	MachineBasicBlock &MBB,
	MachineBasicBlock::iterator MBBI,
	const DebugLoc &DL) {
	// Shadow call stack epilog: ldr x30, [x18, #-8]!
	BuildMI(MBB, MBBI, DL, TII.get(AArch64::LDRXpre))
	.addReg(AArch64::X18, RegState::Define)
	.addReg(AArch64::LR, RegState::Define)
	.addReg(AArch64::X18)
	.addImm(-8)
	.setMIFlag(MachineInstr::FrameDestroy);

	if (MF.getInfo<AArch64FunctionInfo>()->needsAsyncDwarfUnwindInfo(MF)) {
	unsigned CFIIndex =
	MF.addFrameInst(MCCFIInstruction::createRestore(nullptr, 18));
	BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
	.addCFIIndex(CFIIndex)
	.setMIFlags(MachineInstr::FrameDestroy);
	}
	}

	// Define the current CFA rule to use the provided FP.
	static void emitDefineCFAWithFP(MachineFunction &MF, MachineBasicBlock &MBB,
	MachineBasicBlock::iterator MBBI,
	const DebugLoc &DL, unsigned FixedObject) {
	const AArch64Subtarget &STI = MF.getSubtarget<AArch64Subtarget>();
	const AArch64RegisterInfo *TRI = STI.getRegisterInfo();
	const TargetInstrInfo *TII = STI.getInstrInfo();
	AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();

	const int OffsetToFirstCalleeSaveFromFP =
	AFI->getCalleeSaveBaseToFrameRecordOffset() -
	AFI->getCalleeSavedStackSize();
	Register FramePtr = TRI->getFrameRegister(MF);
	unsigned Reg = TRI->getDwarfRegNum(FramePtr, true);
	unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::cfiDefCfa(
	nullptr, Reg, FixedObject - OffsetToFirstCalleeSaveFromFP));
	BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
	.addCFIIndex(CFIIndex)
	.setMIFlags(MachineInstr::FrameSetup);
	}

	#ifndef NDEBUG
	/// Collect live registers from the end of \p MI's parent up to (including) \p
	/// MI in \p LiveRegs.
	static void getLivePhysRegsUpTo(MachineInstr &MI, const TargetRegisterInfo &TRI,
	LivePhysRegs &LiveRegs) {

	MachineBasicBlock &MBB = *MI.getParent();
	LiveRegs.addLiveOuts(MBB);
	for (const MachineInstr &MI :
	reverse(make_range(MI.getIterator(), MBB.instr_end())))
	LiveRegs.stepBackward(MI);
	}
	#endif

	void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
	MachineBasicBlock &MBB) const {
	MachineBasicBlock::iterator MBBI = MBB.begin();
	const MachineFrameInfo &MFI = MF.getFrameInfo();
	const Function &F = MF.getFunction();
	const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
	const AArch64RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
	const TargetInstrInfo *TII = Subtarget.getInstrInfo();

	AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
	bool EmitCFI = AFI->needsDwarfUnwindInfo(MF);
	bool EmitAsyncCFI = AFI->needsAsyncDwarfUnwindInfo(MF);
	bool HasFP = hasFP(MF);
	bool NeedsWinCFI = needsWinCFI(MF);
	bool HasWinCFI = false;
	auto Cleanup = make_scope_exit([&]() { MF.setHasWinCFI(HasWinCFI); });

	MachineBasicBlock::iterator End = MBB.end();
	#ifndef NDEBUG
	const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
	// Collect live register from the end of MBB up to the start of the existing
	// frame setup instructions.
	MachineBasicBlock::iterator NonFrameStart = MBB.begin();
	while (NonFrameStart != End &&
	NonFrameStart->getFlag(MachineInstr::FrameSetup))
	++NonFrameStart;

	LivePhysRegs LiveRegs(*TRI);
	if (NonFrameStart != MBB.end()) {
	getLivePhysRegsUpTo(NonFrameStart, TRI, LiveRegs);
	// Ignore registers used for stack management for now.
	LiveRegs.removeReg(AArch64::SP);
	LiveRegs.removeReg(AArch64::X19);
	LiveRegs.removeReg(AArch64::FP);
	LiveRegs.removeReg(AArch64::LR);

	// X0 will be clobbered by a call to __arm_get_current_vg in the prologue.
	// This is necessary to spill VG if required where SVE is unavailable, but
	// X0 is preserved around this call.
	if (requiresGetVGCall(MF))
	LiveRegs.removeReg(AArch64::X0);
	}

	auto VerifyClobberOnExit = make_scope_exit([&]() {
	if (NonFrameStart == MBB.end())
	return;
	// Check if any of the newly instructions clobber any of the live registers.
	for (MachineInstr &MI :
	make_range(MBB.instr_begin(), NonFrameStart->getIterator())) {
	for (auto &Op : MI.operands())
	if (Op.isReg() && Op.isDef())
	assert(!LiveRegs.contains(Op.getReg()) &&
	"live register clobbered by inserted prologue instructions");
	}
	});
	#endif

	bool IsFunclet = MBB.isEHFuncletEntry();

	// At this point, we're going to decide whether or not the function uses a
	// redzone. In most cases, the function doesn't have a redzone so let's
	// assume that's false and set it to true in the case that there's a redzone.
	AFI->setHasRedZone(false);

	// Debug location must be unknown since the first debug location is used
	// to determine the end of the prologue.
	DebugLoc DL;

	const auto &MFnI = *MF.getInfo<AArch64FunctionInfo>();
	if (MFnI.needsShadowCallStackPrologueEpilogue(MF))
	emitShadowCallStackPrologue(*TII, MF, MBB, MBBI, DL, NeedsWinCFI,
	MFnI.needsDwarfUnwindInfo(MF));

	if (MFnI.shouldSignReturnAddress(MF)) {
	BuildMI(MBB, MBBI, DL, TII->get(AArch64::PAUTH_PROLOGUE))
	.setMIFlag(MachineInstr::FrameSetup);
	if (NeedsWinCFI)
	HasWinCFI = true; // AArch64PointerAuth pass will insert SEH_PACSignLR
	}

	if (EmitCFI && MFnI.isMTETagged()) {
	BuildMI(MBB, MBBI, DL, TII->get(AArch64::EMITMTETAGGED))
	.setMIFlag(MachineInstr::FrameSetup);
	}

	// We signal the presence of a Swift extended frame to external tools by
	// storing FP with 0b0001 in bits 63:60. In normal userland operation a simple
	// ORR is sufficient, it is assumed a Swift kernel would initialize the TBI
	// bits so that is still true.
	if (HasFP && AFI->hasSwiftAsyncContext()) {
	switch (MF.getTarget().Options.SwiftAsyncFramePointer) {
	case SwiftAsyncFramePointerMode::DeploymentBased:
	if (Subtarget.swiftAsyncContextIsDynamicallySet()) {
	// The special symbol below is absolute and has a value that can be
	// combined with the frame pointer to signal an extended frame.
	BuildMI(MBB, MBBI, DL, TII->get(AArch64::LOADgot), AArch64::X16)
	.addExternalSymbol("swift_async_extendedFramePointerFlags",
	AArch64II::MO_GOT);
	if (NeedsWinCFI) {
	BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
	.setMIFlags(MachineInstr::FrameSetup);
	HasWinCFI = true;
	}
	BuildMI(MBB, MBBI, DL, TII->get(AArch64::ORRXrs), AArch64::FP)
	.addUse(AArch64::FP)
	.addUse(AArch64::X16)
	.addImm(Subtarget.isTargetILP32() ? 32 : 0);
	if (NeedsWinCFI) {
	BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
	.setMIFlags(MachineInstr::FrameSetup);
	HasWinCFI = true;
	}
	break;
	}
	[[fallthrough]];

	case SwiftAsyncFramePointerMode::Always:
	// ORR x29, x29, #0x1000_0000_0000_0000
	BuildMI(MBB, MBBI, DL, TII->get(AArch64::ORRXri), AArch64::FP)
	.addUse(AArch64::FP)
	.addImm(0x1100)
	.setMIFlag(MachineInstr::FrameSetup);
	if (NeedsWinCFI) {
	BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
	.setMIFlags(MachineInstr::FrameSetup);
	HasWinCFI = true;
	}
	break;

	case SwiftAsyncFramePointerMode::Never:
	break;
	}
	}

	// All calls are tail calls in GHC calling conv, and functions have no
	// prologue/epilogue.
	if (MF.getFunction().getCallingConv() == CallingConv::GHC)
	return;

	// Set tagged base pointer to the requested stack slot.
	// Ideally it should match SP value after prologue.
	std::optional<int> TBPI = AFI->getTaggedBasePointerIndex();
	if (TBPI)
	AFI->setTaggedBasePointerOffset(-MFI.getObjectOffset(*TBPI));
	else
	AFI->setTaggedBasePointerOffset(MFI.getStackSize());

	const StackOffset &SVEStackSize = getSVEStackSize(MF);

	// getStackSize() includes all the locals in its size calculation. We don't
	// include these locals when computing the stack size of a funclet, as they
	// are allocated in the parent's stack frame and accessed via the frame
	// pointer from the funclet. We only save the callee saved registers in the
	// funclet, which are really the callee saved registers of the parent
	// function, including the funclet.
	int64_t NumBytes =
	IsFunclet ? getWinEHFuncletFrameSize(MF) : MFI.getStackSize();
	if (!AFI->hasStackFrame() && !windowsRequiresStackProbe(MF, NumBytes)) {
	assert(!HasFP && "unexpected function without stack frame but with FP");
	assert(!SVEStackSize &&
	"unexpected function without stack frame but with SVE objects");
	// All of the stack allocation is for locals.
	AFI->setLocalStackSize(NumBytes);
	if (!NumBytes)
	return;
	// REDZONE: If the stack size is less than 128 bytes, we don't need
	// to actually allocate.
	if (canUseRedZone(MF)) {
	AFI->setHasRedZone(true);
	++NumRedZoneFunctions;
	} else {
	emitFrameOffset(MBB, MBBI, DL, AArch64::SP, AArch64::SP,
	StackOffset::getFixed(-NumBytes), TII,
	MachineInstr::FrameSetup, false, NeedsWinCFI, &HasWinCFI);
	if (EmitCFI) {
	// Label used to tie together the PROLOG_LABEL and the MachineMoves.
	MCSymbol *FrameLabel = MF.getContext().createTempSymbol();
	// Encode the stack size of the leaf function.
	unsigned CFIIndex = MF.addFrameInst(
	MCCFIInstruction::cfiDefCfaOffset(FrameLabel, NumBytes));
	BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
	.addCFIIndex(CFIIndex)
	.setMIFlags(MachineInstr::FrameSetup);
	}
	}

	if (NeedsWinCFI) {
	HasWinCFI = true;
	BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_PrologEnd))
	.setMIFlag(MachineInstr::FrameSetup);
	}

	return;
	}

	bool IsWin64 = Subtarget.isCallingConvWin64(F.getCallingConv(), F.isVarArg());
	unsigned FixedObject = getFixedObjectSize(MF, AFI, IsWin64, IsFunclet);

	auto PrologueSaveSize = AFI->getCalleeSavedStackSize() + FixedObject;
	// All of the remaining stack allocations are for locals.
	AFI->setLocalStackSize(NumBytes - PrologueSaveSize);
	bool CombineSPBump = shouldCombineCSRLocalStackBump(MF, NumBytes);
	bool HomPrologEpilog = homogeneousPrologEpilog(MF);
	if (CombineSPBump) {
	assert(!SVEStackSize && "Cannot combine SP bump with SVE");
	emitFrameOffset(MBB, MBBI, DL, AArch64::SP, AArch64::SP,
	StackOffset::getFixed(-NumBytes), TII,
	MachineInstr::FrameSetup, false, NeedsWinCFI, &HasWinCFI,
	EmitAsyncCFI);
	NumBytes = 0;
	} else if (HomPrologEpilog) {
	// Stack has been already adjusted.
	NumBytes -= PrologueSaveSize;
	} else if (PrologueSaveSize != 0) {
	MBBI = convertCalleeSaveRestoreToSPPrePostIncDec(
	MBB, MBBI, DL, TII, -PrologueSaveSize, NeedsWinCFI, &HasWinCFI,
	EmitAsyncCFI);
	NumBytes -= PrologueSaveSize;
	}
	assert(NumBytes >= 0 && "Negative stack allocation size!?");

	// Move past the saves of the callee-saved registers, fixing up the offsets
	// and pre-inc if we decided to combine the callee-save and local stack
	// pointer bump above.
	while (MBBI != End && MBBI->getFlag(MachineInstr::FrameSetup) &&
	!IsSVECalleeSave(MBBI)) {
	// Move past instructions generated to calculate VG
	if (AFI->hasStreamingModeChanges())
	while (isVGInstruction(MBBI))
	++MBBI;

	if (CombineSPBump)
	fixupCalleeSaveRestoreStackOffset(*MBBI, AFI->getLocalStackSize(),
	NeedsWinCFI, &HasWinCFI);
	++MBBI;
	}

	// For funclets the FP belongs to the containing function.
	if (!IsFunclet && HasFP) {
	// Only set up FP if we actually need to.
	int64_t FPOffset = AFI->getCalleeSaveBaseToFrameRecordOffset();

	if (CombineSPBump)
	FPOffset += AFI->getLocalStackSize();

	if (AFI->hasSwiftAsyncContext()) {
	// Before we update the live FP we have to ensure there's a valid (or
	// null) asynchronous context in its slot just before FP in the frame
	// record, so store it now.
	const auto &Attrs = MF.getFunction().getAttributes();
	bool HaveInitialContext = Attrs.hasAttrSomewhere(Attribute::SwiftAsync);
	if (HaveInitialContext)
	MBB.addLiveIn(AArch64::X22);
	Register Reg = HaveInitialContext ? AArch64::X22 : AArch64::XZR;
	BuildMI(MBB, MBBI, DL, TII->get(AArch64::StoreSwiftAsyncContext))
	.addUse(Reg)
	.addUse(AArch64::SP)
	.addImm(FPOffset - 8)
	.setMIFlags(MachineInstr::FrameSetup);
	if (NeedsWinCFI) {
	// WinCFI and arm64e, where StoreSwiftAsyncContext is expanded
	// to multiple instructions, should be mutually-exclusive.
	assert(Subtarget.getTargetTriple().getArchName() != "arm64e");
	BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
	.setMIFlags(MachineInstr::FrameSetup);
	HasWinCFI = true;
	}
	}

	if (HomPrologEpilog) {
	auto Prolog = MBBI;
	--Prolog;
	assert(Prolog->getOpcode() == AArch64::HOM_Prolog);
	Prolog->addOperand(MachineOperand::CreateImm(FPOffset));
	} else {
	// Issue sub fp, sp, FPOffset or
	// mov fp,sp when FPOffset is zero.
	// Note: All stores of callee-saved registers are marked as "FrameSetup".
	// This code marks the instruction(s) that set the FP also.
	emitFrameOffset(MBB, MBBI, DL, AArch64::FP, AArch64::SP,
	StackOffset::getFixed(FPOffset), TII,
	MachineInstr::FrameSetup, false, NeedsWinCFI, &HasWinCFI);
	if (NeedsWinCFI && HasWinCFI) {
	BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_PrologEnd))
	.setMIFlag(MachineInstr::FrameSetup);
	// After setting up the FP, the rest of the prolog doesn't need to be
	// included in the SEH unwind info.
	NeedsWinCFI = false;
	}
	}
	if (EmitAsyncCFI)
	emitDefineCFAWithFP(MF, MBB, MBBI, DL, FixedObject);
	}

	// Now emit the moves for whatever callee saved regs we have (including FP,
	// LR if those are saved). Frame instructions for SVE register are emitted
	// later, after the instruction which actually save SVE regs.
	if (EmitAsyncCFI)
	emitCalleeSavedGPRLocations(MBB, MBBI);

	// Alignment is required for the parent frame, not the funclet
	const bool NeedsRealignment =
	NumBytes && !IsFunclet && RegInfo->hasStackRealignment(MF);
	const int64_t RealignmentPadding =
	(NeedsRealignment && MFI.getMaxAlign() > Align(16))
	? MFI.getMaxAlign().value() - 16
	: 0;

	if (windowsRequiresStackProbe(MF, NumBytes + RealignmentPadding)) {
	uint64_t NumWords = (NumBytes + RealignmentPadding) >> 4;
	if (NeedsWinCFI) {
	HasWinCFI = true;
	// alloc_l can hold at most 256MB, so assume that NumBytes doesn't
	// exceed this amount. We need to move at most 2^24 - 1 into x15.
	// This is at most two instructions, MOVZ follwed by MOVK.
	// TODO: Fix to use multiple stack alloc unwind codes for stacks
	// exceeding 256MB in size.
	if (NumBytes >= (1 << 28))
	report_fatal_error("Stack size cannot exceed 256MB for stack "
	"unwinding purposes");

	uint32_t LowNumWords = NumWords & 0xFFFF;
	BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVZXi), AArch64::X15)
	.addImm(LowNumWords)
	.addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0))
	.setMIFlag(MachineInstr::FrameSetup);
	BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
	.setMIFlag(MachineInstr::FrameSetup);
	if ((NumWords & 0xFFFF0000) != 0) {
	BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVKXi), AArch64::X15)
	.addReg(AArch64::X15)
	.addImm((NumWords & 0xFFFF0000) >> 16) // High half
	.addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 16))
	.setMIFlag(MachineInstr::FrameSetup);
	BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
	.setMIFlag(MachineInstr::FrameSetup);
	}
	} else {
	BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVi64imm), AArch64::X15)
	.addImm(NumWords)
	.setMIFlags(MachineInstr::FrameSetup);
	}

	const char *ChkStk = Subtarget.getChkStkName();
	switch (MF.getTarget().getCodeModel()) {
	case CodeModel::Tiny:
	case CodeModel::Small:
	case CodeModel::Medium:
	case CodeModel::Kernel:
	BuildMI(MBB, MBBI, DL, TII->get(AArch64::BL))
	.addExternalSymbol(ChkStk)
	.addReg(AArch64::X15, RegState::Implicit)
	.addReg(AArch64::X16, RegState::Implicit \| RegState::Define \| RegState::Dead)
	.addReg(AArch64::X17, RegState::Implicit \| RegState::Define \| RegState::Dead)
	.addReg(AArch64::NZCV, RegState::Implicit \| RegState::Define \| RegState::Dead)
	.setMIFlags(MachineInstr::FrameSetup);
	if (NeedsWinCFI) {
	HasWinCFI = true;
	BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
	.setMIFlag(MachineInstr::FrameSetup);
	}
	break;
	case CodeModel::Large:
	BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVaddrEXT))
	.addReg(AArch64::X16, RegState::Define)
	.addExternalSymbol(ChkStk)
	.addExternalSymbol(ChkStk)
	.setMIFlags(MachineInstr::FrameSetup);
	if (NeedsWinCFI) {
	HasWinCFI = true;
	BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
	.setMIFlag(MachineInstr::FrameSetup);
	}

	BuildMI(MBB, MBBI, DL, TII->get(getBLRCallOpcode(MF)))
	.addReg(AArch64::X16, RegState::Kill)
	.addReg(AArch64::X15, RegState::Implicit \| RegState::Define)
	.addReg(AArch64::X16, RegState::Implicit \| RegState::Define \| RegState::Dead)
	.addReg(AArch64::X17, RegState::Implicit \| RegState::Define \| RegState::Dead)
	.addReg(AArch64::NZCV, RegState::Implicit \| RegState::Define \| RegState::Dead)
	.setMIFlags(MachineInstr::FrameSetup);
	if (NeedsWinCFI) {
	HasWinCFI = true;
	BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
	.setMIFlag(MachineInstr::FrameSetup);
	}
	break;
	}

	BuildMI(MBB, MBBI, DL, TII->get(AArch64::SUBXrx64), AArch64::SP)
	.addReg(AArch64::SP, RegState::Kill)
	.addReg(AArch64::X15, RegState::Kill)
	.addImm(AArch64_AM::getArithExtendImm(AArch64_AM::UXTX, 4))
	.setMIFlags(MachineInstr::FrameSetup);
	if (NeedsWinCFI) {
	HasWinCFI = true;
	BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_StackAlloc))
	.addImm(NumBytes)
	.setMIFlag(MachineInstr::FrameSetup);
	}
	NumBytes = 0;

	if (RealignmentPadding > 0) {
	if (RealignmentPadding >= 4096) {
	BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVi64imm))
	.addReg(AArch64::X16, RegState::Define)
	.addImm(RealignmentPadding)
	.setMIFlags(MachineInstr::FrameSetup);
	BuildMI(MBB, MBBI, DL, TII->get(AArch64::ADDXrx64), AArch64::X15)
	.addReg(AArch64::SP)
	.addReg(AArch64::X16, RegState::Kill)
	.addImm(AArch64_AM::getArithExtendImm(AArch64_AM::UXTX, 0))
	.setMIFlag(MachineInstr::FrameSetup);
	} else {
	BuildMI(MBB, MBBI, DL, TII->get(AArch64::ADDXri), AArch64::X15)
	.addReg(AArch64::SP)
	.addImm(RealignmentPadding)
	.addImm(0)
	.setMIFlag(MachineInstr::FrameSetup);
	}

	uint64_t AndMask = ~(MFI.getMaxAlign().value() - 1);
	BuildMI(MBB, MBBI, DL, TII->get(AArch64::ANDXri), AArch64::SP)
	.addReg(AArch64::X15, RegState::Kill)
	.addImm(AArch64_AM::encodeLogicalImmediate(AndMask, 64));
	AFI->setStackRealigned(true);

	// No need for SEH instructions here; if we're realigning the stack,
	// we've set a frame pointer and already finished the SEH prologue.
	assert(!NeedsWinCFI);
	}
	}

	StackOffset SVECalleeSavesSize = {}, SVELocalsSize = SVEStackSize;
	MachineBasicBlock::iterator CalleeSavesBegin = MBBI, CalleeSavesEnd = MBBI;

	// Process the SVE callee-saves to determine what space needs to be
	// allocated.
	if (int64_t CalleeSavedSize = AFI->getSVECalleeSavedStackSize()) {
	LLVM_DEBUG(dbgs() << "SVECalleeSavedStackSize = " << CalleeSavedSize
	<< "\n");
	// Find callee save instructions in frame.
	CalleeSavesBegin = MBBI;
	assert(IsSVECalleeSave(CalleeSavesBegin) && "Unexpected instruction");
	while (IsSVECalleeSave(MBBI) && MBBI != MBB.getFirstTerminator())
	++MBBI;
	CalleeSavesEnd = MBBI;

	SVECalleeSavesSize = StackOffset::getScalable(CalleeSavedSize);
	SVELocalsSize = SVEStackSize - SVECalleeSavesSize;
	}

	// Allocate space for the callee saves (if any).
	StackOffset CFAOffset =
	StackOffset::getFixed((int64_t)MFI.getStackSize() - NumBytes);
	StackOffset LocalsSize = SVELocalsSize + StackOffset::getFixed(NumBytes);
	allocateStackSpace(MBB, CalleeSavesBegin, 0, SVECalleeSavesSize, false,
	nullptr, EmitAsyncCFI && !HasFP, CFAOffset,
	MFI.hasVarSizedObjects() \|\| LocalsSize);
	CFAOffset += SVECalleeSavesSize;

	if (EmitAsyncCFI)
	emitCalleeSavedSVELocations(MBB, CalleeSavesEnd);

	// Allocate space for the rest of the frame including SVE locals. Align the
	// stack as necessary.
	assert(!(canUseRedZone(MF) && NeedsRealignment) &&
	"Cannot use redzone with stack realignment");
	if (!canUseRedZone(MF)) {
	// FIXME: in the case of dynamic re-alignment, NumBytes doesn't have
	// the correct value here, as NumBytes also includes padding bytes,
	// which shouldn't be counted here.
	allocateStackSpace(MBB, CalleeSavesEnd, RealignmentPadding,
	SVELocalsSize + StackOffset::getFixed(NumBytes),
	NeedsWinCFI, &HasWinCFI, EmitAsyncCFI && !HasFP,
	CFAOffset, MFI.hasVarSizedObjects());
	}

	// If we need a base pointer, set it up here. It's whatever the value of the
	// stack pointer is at this point. Any variable size objects will be allocated
	// after this, so we can still use the base pointer to reference locals.
	//
	// FIXME: Clarify FrameSetup flags here.
	// Note: Use emitFrameOffset() like above for FP if the FrameSetup flag is
	// needed.
	// For funclets the BP belongs to the containing function.
	if (!IsFunclet && RegInfo->hasBasePointer(MF)) {
	TII->copyPhysReg(MBB, MBBI, DL, RegInfo->getBaseRegister(), AArch64::SP,
	false);
	if (NeedsWinCFI) {
	HasWinCFI = true;
	BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
	.setMIFlag(MachineInstr::FrameSetup);
	}
	}

	// The very last FrameSetup instruction indicates the end of prologue. Emit a
	// SEH opcode indicating the prologue end.
	if (NeedsWinCFI && HasWinCFI) {
	BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_PrologEnd))
	.setMIFlag(MachineInstr::FrameSetup);
	}

	// SEH funclets are passed the frame pointer in X1. If the parent
	// function uses the base register, then the base register is used
	// directly, and is not retrieved from X1.
	if (IsFunclet && F.hasPersonalityFn()) {
	EHPersonality Per = classifyEHPersonality(F.getPersonalityFn());
	if (isAsynchronousEHPersonality(Per)) {
	BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::COPY), AArch64::FP)
	.addReg(AArch64::X1)
	.setMIFlag(MachineInstr::FrameSetup);
	MBB.addLiveIn(AArch64::X1);
	}
	}

	if (EmitCFI && !EmitAsyncCFI) {
	if (HasFP) {
	emitDefineCFAWithFP(MF, MBB, MBBI, DL, FixedObject);
	} else {
	StackOffset TotalSize =
	SVEStackSize + StackOffset::getFixed((int64_t)MFI.getStackSize());
	unsigned CFIIndex = MF.addFrameInst(createDefCFA(
	RegInfo, /FrameReg=/AArch64::SP, /Reg=*/AArch64::SP, TotalSize,
	/LastAdjustmentWasScalable=/false));
	BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
	.addCFIIndex(CFIIndex)
	.setMIFlags(MachineInstr::FrameSetup);
	}
	emitCalleeSavedGPRLocations(MBB, MBBI);
	emitCalleeSavedSVELocations(MBB, MBBI);
	}
	}

	static bool isFuncletReturnInstr(const MachineInstr &MI) {
	switch (MI.getOpcode()) {
	default:
	return false;
	case AArch64::CATCHRET:
	case AArch64::CLEANUPRET:
	return true;
	}
	}

	void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
	MachineBasicBlock &MBB) const {
	MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
	MachineFrameInfo &MFI = MF.getFrameInfo();
	AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
	const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
	const TargetInstrInfo *TII = Subtarget.getInstrInfo();
	DebugLoc DL;
	bool NeedsWinCFI = needsWinCFI(MF);
	bool EmitCFI = AFI->needsAsyncDwarfUnwindInfo(MF);
	bool HasWinCFI = false;
	bool IsFunclet = false;

	if (MBB.end() != MBBI) {
	DL = MBBI->getDebugLoc();
	IsFunclet = isFuncletReturnInstr(*MBBI);
	}

	MachineBasicBlock::iterator EpilogStartI = MBB.end();

	auto FinishingTouches = make_scope_exit([&]() {
	if (AFI->shouldSignReturnAddress(MF)) {
	BuildMI(MBB, MBB.getFirstTerminator(), DL,
	TII->get(AArch64::PAUTH_EPILOGUE))
	.setMIFlag(MachineInstr::FrameDestroy);
	if (NeedsWinCFI)
	HasWinCFI = true; // AArch64PointerAuth pass will insert SEH_PACSignLR
	}
	if (AFI->needsShadowCallStackPrologueEpilogue(MF))
	emitShadowCallStackEpilogue(*TII, MF, MBB, MBB.getFirstTerminator(), DL);
	if (EmitCFI)
	emitCalleeSavedGPRRestores(MBB, MBB.getFirstTerminator());
	if (HasWinCFI) {
	BuildMI(MBB, MBB.getFirstTerminator(), DL,
	TII->get(AArch64::SEH_EpilogEnd))
	.setMIFlag(MachineInstr::FrameDestroy);
	if (!MF.hasWinCFI())
	MF.setHasWinCFI(true);
	}
	if (NeedsWinCFI) {
	assert(EpilogStartI != MBB.end());
	if (!HasWinCFI)
	MBB.erase(EpilogStartI);
	}
	});

	int64_t NumBytes = IsFunclet ? getWinEHFuncletFrameSize(MF)
	: MFI.getStackSize();

	// All calls are tail calls in GHC calling conv, and functions have no
	// prologue/epilogue.
	if (MF.getFunction().getCallingConv() == CallingConv::GHC)
	return;

	// How much of the stack used by incoming arguments this function is expected
	// to restore in this particular epilogue.
	int64_t ArgumentStackToRestore = getArgumentStackToRestore(MF, MBB);
	bool IsWin64 = Subtarget.isCallingConvWin64(MF.getFunction().getCallingConv(),
	MF.getFunction().isVarArg());
	unsigned FixedObject = getFixedObjectSize(MF, AFI, IsWin64, IsFunclet);

	int64_t AfterCSRPopSize = ArgumentStackToRestore;
	auto PrologueSaveSize = AFI->getCalleeSavedStackSize() + FixedObject;
	// We cannot rely on the local stack size set in emitPrologue if the function
	// has funclets, as funclets have different local stack size requirements, and
	// the current value set in emitPrologue may be that of the containing
	// function.
	if (MF.hasEHFunclets())
	AFI->setLocalStackSize(NumBytes - PrologueSaveSize);
	if (homogeneousPrologEpilog(MF, &MBB)) {
	assert(!NeedsWinCFI);
	auto LastPopI = MBB.getFirstTerminator();
	if (LastPopI != MBB.begin()) {
	auto HomogeneousEpilog = std::prev(LastPopI);
	if (HomogeneousEpilog->getOpcode() == AArch64::HOM_Epilog)
	LastPopI = HomogeneousEpilog;
	}

	// Adjust local stack
	emitFrameOffset(MBB, LastPopI, DL, AArch64::SP, AArch64::SP,
	StackOffset::getFixed(AFI->getLocalStackSize()), TII,
	MachineInstr::FrameDestroy, false, NeedsWinCFI, &HasWinCFI);

	// SP has been already adjusted while restoring callee save regs.
	// We've bailed-out the case with adjusting SP for arguments.
	assert(AfterCSRPopSize == 0);
	return;
	}
	bool CombineSPBump = shouldCombineCSRLocalStackBumpInEpilogue(MBB, NumBytes);
	// Assume we can't combine the last pop with the sp restore.

	bool CombineAfterCSRBump = false;
	if (!CombineSPBump && PrologueSaveSize != 0) {
	MachineBasicBlock::iterator Pop = std::prev(MBB.getFirstTerminator());
	while (Pop->getOpcode() == TargetOpcode::CFI_INSTRUCTION \|\|
	AArch64InstrInfo::isSEHInstruction(*Pop))
	Pop = std::prev(Pop);
	// Converting the last ldp to a post-index ldp is valid only if the last
	// ldp's offset is 0.
	const MachineOperand &OffsetOp = Pop->getOperand(Pop->getNumOperands() - 1);
	// If the offset is 0 and the AfterCSR pop is not actually trying to
	// allocate more stack for arguments (in space that an untimely interrupt
	// may clobber), convert it to a post-index ldp.
	if (OffsetOp.getImm() == 0 && AfterCSRPopSize >= 0) {
	convertCalleeSaveRestoreToSPPrePostIncDec(
	MBB, Pop, DL, TII, PrologueSaveSize, NeedsWinCFI, &HasWinCFI, EmitCFI,
	MachineInstr::FrameDestroy, PrologueSaveSize);
	} else {
	// If not, make sure to emit an add after the last ldp.
	// We're doing this by transfering the size to be restored from the
	// adjustment before the CSR pops to the adjustment after the CSR
	// pops.
	AfterCSRPopSize += PrologueSaveSize;
	CombineAfterCSRBump = true;
	}
	}

	// Move past the restores of the callee-saved registers.
	// If we plan on combining the sp bump of the local stack size and the callee
	// save stack size, we might need to adjust the CSR save and restore offsets.
	MachineBasicBlock::iterator LastPopI = MBB.getFirstTerminator();
	MachineBasicBlock::iterator Begin = MBB.begin();
	while (LastPopI != Begin) {
	--LastPopI;
	if (!LastPopI->getFlag(MachineInstr::FrameDestroy) \|\|
	IsSVECalleeSave(LastPopI)) {
	++LastPopI;
	break;
	} else if (CombineSPBump)
	fixupCalleeSaveRestoreStackOffset(*LastPopI, AFI->getLocalStackSize(),
	NeedsWinCFI, &HasWinCFI);
	}

	if (NeedsWinCFI) {
	// Note that there are cases where we insert SEH opcodes in the
	// epilogue when we had no SEH opcodes in the prologue. For
	// example, when there is no stack frame but there are stack
	// arguments. Insert the SEH_EpilogStart and remove it later if it
	// we didn't emit any SEH opcodes to avoid generating WinCFI for
	// functions that don't need it.
	BuildMI(MBB, LastPopI, DL, TII->get(AArch64::SEH_EpilogStart))
	.setMIFlag(MachineInstr::FrameDestroy);
	EpilogStartI = LastPopI;
	--EpilogStartI;
	}

	if (hasFP(MF) && AFI->hasSwiftAsyncContext()) {
	switch (MF.getTarget().Options.SwiftAsyncFramePointer) {
	case SwiftAsyncFramePointerMode::DeploymentBased:
	// Avoid the reload as it is GOT relative, and instead fall back to the
	// hardcoded value below. This allows a mismatch between the OS and
	// application without immediately terminating on the difference.
	[[fallthrough]];
	case SwiftAsyncFramePointerMode::Always:
	// We need to reset FP to its untagged state on return. Bit 60 is
	// currently used to show the presence of an extended frame.

	// BIC x29, x29, #0x1000_0000_0000_0000
	BuildMI(MBB, MBB.getFirstTerminator(), DL, TII->get(AArch64::ANDXri),
	AArch64::FP)
	.addUse(AArch64::FP)
	.addImm(0x10fe)
	.setMIFlag(MachineInstr::FrameDestroy);
	if (NeedsWinCFI) {
	BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
	.setMIFlags(MachineInstr::FrameDestroy);
	HasWinCFI = true;
	}
	break;

	case SwiftAsyncFramePointerMode::Never:
	break;
	}
	}

	const StackOffset &SVEStackSize = getSVEStackSize(MF);

	// If there is a single SP update, insert it before the ret and we're done.
	if (CombineSPBump) {
	assert(!SVEStackSize && "Cannot combine SP bump with SVE");

	// When we are about to restore the CSRs, the CFA register is SP again.
	if (EmitCFI && hasFP(MF)) {
	const AArch64RegisterInfo &RegInfo = *Subtarget.getRegisterInfo();
	unsigned Reg = RegInfo.getDwarfRegNum(AArch64::SP, true);
	unsigned CFIIndex =
	MF.addFrameInst(MCCFIInstruction::cfiDefCfa(nullptr, Reg, NumBytes));
	BuildMI(MBB, LastPopI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
	.addCFIIndex(CFIIndex)
	.setMIFlags(MachineInstr::FrameDestroy);
	}

	emitFrameOffset(MBB, MBB.getFirstTerminator(), DL, AArch64::SP, AArch64::SP,
	StackOffset::getFixed(NumBytes + (int64_t)AfterCSRPopSize),
	TII, MachineInstr::FrameDestroy, false, NeedsWinCFI,
	&HasWinCFI, EmitCFI, StackOffset::getFixed(NumBytes));
	return;
	}

	NumBytes -= PrologueSaveSize;
	assert(NumBytes >= 0 && "Negative stack allocation size!?");

	// Process the SVE callee-saves to determine what space needs to be
	// deallocated.
	StackOffset DeallocateBefore = {}, DeallocateAfter = SVEStackSize;
	MachineBasicBlock::iterator RestoreBegin = LastPopI, RestoreEnd = LastPopI;
	if (int64_t CalleeSavedSize = AFI->getSVECalleeSavedStackSize()) {
	RestoreBegin = std::prev(RestoreEnd);
	while (RestoreBegin != MBB.begin() &&
	IsSVECalleeSave(std::prev(RestoreBegin)))
	--RestoreBegin;

	assert(IsSVECalleeSave(RestoreBegin) &&
	IsSVECalleeSave(std::prev(RestoreEnd)) && "Unexpected instruction");

	StackOffset CalleeSavedSizeAsOffset =
	StackOffset::getScalable(CalleeSavedSize);
	DeallocateBefore = SVEStackSize - CalleeSavedSizeAsOffset;
	DeallocateAfter = CalleeSavedSizeAsOffset;
	}

	// Deallocate the SVE area.
	if (SVEStackSize) {
	// If we have stack realignment or variable sized objects on the stack,
	// restore the stack pointer from the frame pointer prior to SVE CSR
	// restoration.
	if (AFI->isStackRealigned() \|\| MFI.hasVarSizedObjects()) {
	if (int64_t CalleeSavedSize = AFI->getSVECalleeSavedStackSize()) {
	// Set SP to start of SVE callee-save area from which they can
	// be reloaded. The code below will deallocate the stack space
	// space by moving FP -> SP.
	emitFrameOffset(MBB, RestoreBegin, DL, AArch64::SP, AArch64::FP,
	StackOffset::getScalable(-CalleeSavedSize), TII,
	MachineInstr::FrameDestroy);
	}
	} else {
	if (AFI->getSVECalleeSavedStackSize()) {
	// Deallocate the non-SVE locals first before we can deallocate (and
	// restore callee saves) from the SVE area.
	emitFrameOffset(
	MBB, RestoreBegin, DL, AArch64::SP, AArch64::SP,
	StackOffset::getFixed(NumBytes), TII, MachineInstr::FrameDestroy,
	false, false, nullptr, EmitCFI && !hasFP(MF),
	SVEStackSize + StackOffset::getFixed(NumBytes + PrologueSaveSize));
	NumBytes = 0;
	}

	emitFrameOffset(MBB, RestoreBegin, DL, AArch64::SP, AArch64::SP,
	DeallocateBefore, TII, MachineInstr::FrameDestroy, false,
	false, nullptr, EmitCFI && !hasFP(MF),
	SVEStackSize +
	StackOffset::getFixed(NumBytes + PrologueSaveSize));

	emitFrameOffset(MBB, RestoreEnd, DL, AArch64::SP, AArch64::SP,
	DeallocateAfter, TII, MachineInstr::FrameDestroy, false,
	false, nullptr, EmitCFI && !hasFP(MF),
	DeallocateAfter +
	StackOffset::getFixed(NumBytes + PrologueSaveSize));
	}
	if (EmitCFI)
	emitCalleeSavedSVERestores(MBB, RestoreEnd);
	}

	if (!hasFP(MF)) {
	bool RedZone = canUseRedZone(MF);
	// If this was a redzone leaf function, we don't need to restore the
	// stack pointer (but we may need to pop stack args for fastcc).
	if (RedZone && AfterCSRPopSize == 0)
	return;

	// Pop the local variables off the stack. If there are no callee-saved
	// registers, it means we are actually positioned at the terminator and can
	// combine stack increment for the locals and the stack increment for
	// callee-popped arguments into (possibly) a single instruction and be done.
	bool NoCalleeSaveRestore = PrologueSaveSize == 0;
	int64_t StackRestoreBytes = RedZone ? 0 : NumBytes;
	if (NoCalleeSaveRestore)
	StackRestoreBytes += AfterCSRPopSize;

	emitFrameOffset(
	MBB, LastPopI, DL, AArch64::SP, AArch64::SP,
	StackOffset::getFixed(StackRestoreBytes), TII,
	MachineInstr::FrameDestroy, false, NeedsWinCFI, &HasWinCFI, EmitCFI,
	StackOffset::getFixed((RedZone ? 0 : NumBytes) + PrologueSaveSize));

	// If we were able to combine the local stack pop with the argument pop,
	// then we're done.
	if (NoCalleeSaveRestore \|\| AfterCSRPopSize == 0) {
	return;
	}

	NumBytes = 0;
	}

	// Restore the original stack pointer.
	// FIXME: Rather than doing the math here, we should instead just use
	// non-post-indexed loads for the restores if we aren't actually going to
	// be able to save any instructions.
	if (!IsFunclet && (MFI.hasVarSizedObjects() \|\| AFI->isStackRealigned())) {
	emitFrameOffset(
	MBB, LastPopI, DL, AArch64::SP, AArch64::FP,
	StackOffset::getFixed(-AFI->getCalleeSaveBaseToFrameRecordOffset()),
	TII, MachineInstr::FrameDestroy, false, NeedsWinCFI, &HasWinCFI);
	} else if (NumBytes)
	emitFrameOffset(MBB, LastPopI, DL, AArch64::SP, AArch64::SP,
	StackOffset::getFixed(NumBytes), TII,
	MachineInstr::FrameDestroy, false, NeedsWinCFI, &HasWinCFI);

	// When we are about to restore the CSRs, the CFA register is SP again.
	if (EmitCFI && hasFP(MF)) {
	const AArch64RegisterInfo &RegInfo = *Subtarget.getRegisterInfo();
	unsigned Reg = RegInfo.getDwarfRegNum(AArch64::SP, true);
	unsigned CFIIndex = MF.addFrameInst(
	MCCFIInstruction::cfiDefCfa(nullptr, Reg, PrologueSaveSize));
	BuildMI(MBB, LastPopI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
	.addCFIIndex(CFIIndex)
	.setMIFlags(MachineInstr::FrameDestroy);
	}

	// This must be placed after the callee-save restore code because that code
	// assumes the SP is at the same location as it was after the callee-save save
	// code in the prologue.
	if (AfterCSRPopSize) {
	assert(AfterCSRPopSize > 0 && "attempting to reallocate arg stack that an "
	"interrupt may have clobbered");

	emitFrameOffset(
	MBB, MBB.getFirstTerminator(), DL, AArch64::SP, AArch64::SP,
	StackOffset::getFixed(AfterCSRPopSize), TII, MachineInstr::FrameDestroy,
	false, NeedsWinCFI, &HasWinCFI, EmitCFI,
	StackOffset::getFixed(CombineAfterCSRBump ? PrologueSaveSize : 0));
	}
	}

	bool AArch64FrameLowering::enableCFIFixup(MachineFunction &MF) const {
	return TargetFrameLowering::enableCFIFixup(MF) &&
	MF.getInfo<AArch64FunctionInfo>()->needsAsyncDwarfUnwindInfo(MF);
	}

	/// getFrameIndexReference - Provide a base+offset reference to an FI slot for
	/// debug info. It's the same as what we use for resolving the code-gen
	/// references for now. FIXME: This can go wrong when references are
	/// SP-relative and simple call frames aren't used.
	StackOffset
	AArch64FrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI,
	Register &FrameReg) const {
	return resolveFrameIndexReference(
	MF, FI, FrameReg,
	/PreferFP=/
	MF.getFunction().hasFnAttribute(Attribute::SanitizeHWAddress) \|\|
	MF.getFunction().hasFnAttribute(Attribute::SanitizeMemTag),
	/ForSimm=/false);
	}

	StackOffset
	AArch64FrameLowering::getFrameIndexReferenceFromSP(const MachineFunction &MF,
	int FI) const {
	// This function serves to provide a comparable offset from a single reference
	// point (the value of SP at function entry) that can be used for analysis,
	// e.g. the stack-frame-layout analysis pass. It is not guaranteed to be
	// correct for all objects in the presence of VLA-area objects or dynamic
	// stack re-alignment.

	const auto &MFI = MF.getFrameInfo();

	int64_t ObjectOffset = MFI.getObjectOffset(FI);
	StackOffset SVEStackSize = getSVEStackSize(MF);

	// For VLA-area objects, just emit an offset at the end of the stack frame.
	// Whilst not quite correct, these objects do live at the end of the frame and
	// so it is more useful for analysis for the offset to reflect this.
	if (MFI.isVariableSizedObjectIndex(FI)) {
	return StackOffset::getFixed(-((int64_t)MFI.getStackSize())) - SVEStackSize;
	}

	// This is correct in the absence of any SVE stack objects.
	if (!SVEStackSize)
	return StackOffset::getFixed(ObjectOffset - getOffsetOfLocalArea());

	const auto *AFI = MF.getInfo<AArch64FunctionInfo>();
	if (MFI.getStackID(FI) == TargetStackID::ScalableVector) {
	return StackOffset::get(-((int64_t)AFI->getCalleeSavedStackSize()),
	ObjectOffset);
	}

	bool IsFixed = MFI.isFixedObjectIndex(FI);
	bool IsCSR =
	!IsFixed && ObjectOffset >= -((int)AFI->getCalleeSavedStackSize(MFI));

	StackOffset ScalableOffset = {};
	if (!IsFixed && !IsCSR)
	ScalableOffset = -SVEStackSize;

	return StackOffset::getFixed(ObjectOffset) + ScalableOffset;
	}

	StackOffset
	AArch64FrameLowering::getNonLocalFrameIndexReference(const MachineFunction &MF,
	int FI) const {
	return StackOffset::getFixed(getSEHFrameIndexOffset(MF, FI));
	}

	static StackOffset getFPOffset(const MachineFunction &MF,
	int64_t ObjectOffset) {
	const auto *AFI = MF.getInfo<AArch64FunctionInfo>();
	const auto &Subtarget = MF.getSubtarget<AArch64Subtarget>();
	const Function &F = MF.getFunction();
	bool IsWin64 = Subtarget.isCallingConvWin64(F.getCallingConv(), F.isVarArg());
	unsigned FixedObject =
	getFixedObjectSize(MF, AFI, IsWin64, /IsFunclet=/false);
	int64_t CalleeSaveSize = AFI->getCalleeSavedStackSize(MF.getFrameInfo());
	int64_t FPAdjust =
	CalleeSaveSize - AFI->getCalleeSaveBaseToFrameRecordOffset();
	return StackOffset::getFixed(ObjectOffset + FixedObject + FPAdjust);
	}

	static StackOffset getStackOffset(const MachineFunction &MF,
	int64_t ObjectOffset) {
	const auto &MFI = MF.getFrameInfo();
	return StackOffset::getFixed(ObjectOffset + (int64_t)MFI.getStackSize());
	}

	// TODO: This function currently does not work for scalable vectors.
	int AArch64FrameLowering::getSEHFrameIndexOffset(const MachineFunction &MF,
	int FI) const {
	const auto RegInfo = static_cast<const AArch64RegisterInfo >(
	MF.getSubtarget().getRegisterInfo());
	int ObjectOffset = MF.getFrameInfo().getObjectOffset(FI);
	return RegInfo->getLocalAddressRegister(MF) == AArch64::FP
	? getFPOffset(MF, ObjectOffset).getFixed()
	: getStackOffset(MF, ObjectOffset).getFixed();
	}

	StackOffset AArch64FrameLowering::resolveFrameIndexReference(
	const MachineFunction &MF, int FI, Register &FrameReg, bool PreferFP,
	bool ForSimm) const {
	const auto &MFI = MF.getFrameInfo();
	int64_t ObjectOffset = MFI.getObjectOffset(FI);
	bool isFixed = MFI.isFixedObjectIndex(FI);
	bool isSVE = MFI.getStackID(FI) == TargetStackID::ScalableVector;
	return resolveFrameOffsetReference(MF, ObjectOffset, isFixed, isSVE, FrameReg,
	PreferFP, ForSimm);
	}

	StackOffset AArch64FrameLowering::resolveFrameOffsetReference(
	const MachineFunction &MF, int64_t ObjectOffset, bool isFixed, bool isSVE,
	Register &FrameReg, bool PreferFP, bool ForSimm) const {
	const auto &MFI = MF.getFrameInfo();
	const auto RegInfo = static_cast<const AArch64RegisterInfo >(
	MF.getSubtarget().getRegisterInfo());
	const auto *AFI = MF.getInfo<AArch64FunctionInfo>();
	const auto &Subtarget = MF.getSubtarget<AArch64Subtarget>();

	int64_t FPOffset = getFPOffset(MF, ObjectOffset).getFixed();
	int64_t Offset = getStackOffset(MF, ObjectOffset).getFixed();
	bool isCSR =
	!isFixed && ObjectOffset >= -((int)AFI->getCalleeSavedStackSize(MFI));

	const StackOffset &SVEStackSize = getSVEStackSize(MF);

	// Use frame pointer to reference fixed objects. Use it for locals if
	// there are VLAs or a dynamically realigned SP (and thus the SP isn't
	// reliable as a base). Make sure useFPForScavengingIndex() does the
	// right thing for the emergency spill slot.
	bool UseFP = false;
	if (AFI->hasStackFrame() && !isSVE) {
	// We shouldn't prefer using the FP to access fixed-sized stack objects when
	// there are scalable (SVE) objects in between the FP and the fixed-sized
	// objects.
	PreferFP &= !SVEStackSize;

	// Note: Keeping the following as multiple 'if' statements rather than
	// merging to a single expression for readability.
	//
	// Argument access should always use the FP.
	if (isFixed) {
	UseFP = hasFP(MF);
	} else if (isCSR && RegInfo->hasStackRealignment(MF)) {
	// References to the CSR area must use FP if we're re-aligning the stack
	// since the dynamically-sized alignment padding is between the SP/BP and
	// the CSR area.
	assert(hasFP(MF) && "Re-aligned stack must have frame pointer");
	UseFP = true;
	} else if (hasFP(MF) && !RegInfo->hasStackRealignment(MF)) {
	// If the FPOffset is negative and we're producing a signed immediate, we
	// have to keep in mind that the available offset range for negative
	// offsets is smaller than for positive ones. If an offset is available
	// via the FP and the SP, use whichever is closest.
	bool FPOffsetFits = !ForSimm \|\| FPOffset >= -256;
	PreferFP \|= Offset > -FPOffset && !SVEStackSize;

	if (MFI.hasVarSizedObjects()) {
	// If we have variable sized objects, we can use either FP or BP, as the
	// SP offset is unknown. We can use the base pointer if we have one and
	// FP is not preferred. If not, we're stuck with using FP.
	bool CanUseBP = RegInfo->hasBasePointer(MF);
	if (FPOffsetFits && CanUseBP) // Both are ok. Pick the best.
	UseFP = PreferFP;
	else if (!CanUseBP) // Can't use BP. Forced to use FP.
	UseFP = true;
	// else we can use BP and FP, but the offset from FP won't fit.
	// That will make us scavenge registers which we can probably avoid by
	// using BP. If it won't fit for BP either, we'll scavenge anyway.
	} else if (FPOffset >= 0) {
	// Use SP or FP, whichever gives us the best chance of the offset
	// being in range for direct access. If the FPOffset is positive,
	// that'll always be best, as the SP will be even further away.
	UseFP = true;
	} else if (MF.hasEHFunclets() && !RegInfo->hasBasePointer(MF)) {
	// Funclets access the locals contained in the parent's stack frame
	// via the frame pointer, so we have to use the FP in the parent
	// function.
	(void) Subtarget;
	assert(Subtarget.isCallingConvWin64(MF.getFunction().getCallingConv(),
	MF.getFunction().isVarArg()) &&
	"Funclets should only be present on Win64");
	UseFP = true;
	} else {
	// We have the choice between FP and (SP or BP).
	if (FPOffsetFits && PreferFP) // If FP is the best fit, use it.
	UseFP = true;
	}
	}
	}

	assert(
	((isFixed \|\| isCSR) \|\| !RegInfo->hasStackRealignment(MF) \|\| !UseFP) &&
	"In the presence of dynamic stack pointer realignment, "
	"non-argument/CSR objects cannot be accessed through the frame pointer");

	if (isSVE) {
	StackOffset FPOffset =
	StackOffset::get(-AFI->getCalleeSaveBaseToFrameRecordOffset(), ObjectOffset);
	StackOffset SPOffset =
	SVEStackSize +
	StackOffset::get(MFI.getStackSize() - AFI->getCalleeSavedStackSize(),
	ObjectOffset);
	// Always use the FP for SVE spills if available and beneficial.
	if (hasFP(MF) && (SPOffset.getFixed() \|\|
	FPOffset.getScalable() < SPOffset.getScalable() \|\|
	RegInfo->hasStackRealignment(MF))) {
	FrameReg = RegInfo->getFrameRegister(MF);
	return FPOffset;
	}

	FrameReg = RegInfo->hasBasePointer(MF) ? RegInfo->getBaseRegister()
	: (unsigned)AArch64::SP;
	return SPOffset;
	}

	StackOffset ScalableOffset = {};
	if (UseFP && !(isFixed \|\| isCSR))
	ScalableOffset = -SVEStackSize;
	if (!UseFP && (isFixed \|\| isCSR))
	ScalableOffset = SVEStackSize;

	if (UseFP) {
	FrameReg = RegInfo->getFrameRegister(MF);
	return StackOffset::getFixed(FPOffset) + ScalableOffset;
	}

	// Use the base pointer if we have one.
	if (RegInfo->hasBasePointer(MF))
	FrameReg = RegInfo->getBaseRegister();
	else {
	assert(!MFI.hasVarSizedObjects() &&
	"Can't use SP when we have var sized objects.");
	FrameReg = AArch64::SP;
	// If we're using the red zone for this function, the SP won't actually
	// be adjusted, so the offsets will be negative. They're also all
	// within range of the signed 9-bit immediate instructions.
	if (canUseRedZone(MF))
	Offset -= AFI->getLocalStackSize();
	}

	return StackOffset::getFixed(Offset) + ScalableOffset;
	}

	static unsigned getPrologueDeath(MachineFunction &MF, unsigned Reg) {
	// Do not set a kill flag on values that are also marked as live-in. This
	// happens with the @llvm-returnaddress intrinsic and with arguments passed in
	// callee saved registers.
	// Omitting the kill flags is conservatively correct even if the live-in
	// is not used after all.
	bool IsLiveIn = MF.getRegInfo().isLiveIn(Reg);
	return getKillRegState(!IsLiveIn);
	}

	static bool produceCompactUnwindFrame(MachineFunction &MF) {
	const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
	AttributeList Attrs = MF.getFunction().getAttributes();
	return Subtarget.isTargetMachO() &&
	!(Subtarget.getTargetLowering()->supportSwiftError() &&
	Attrs.hasAttrSomewhere(Attribute::SwiftError)) &&
	MF.getFunction().getCallingConv() != CallingConv::SwiftTail;
	}

	static bool invalidateWindowsRegisterPairing(unsigned Reg1, unsigned Reg2,
	bool NeedsWinCFI, bool IsFirst,
	const TargetRegisterInfo *TRI) {
	// If we are generating register pairs for a Windows function that requires
	// EH support, then pair consecutive registers only. There are no unwind
	// opcodes for saves/restores of non-consectuve register pairs.
	// The unwind opcodes are save_regp, save_regp_x, save_fregp, save_frepg_x,
	// save_lrpair.
	// https://docs.microsoft.com/en-us/cpp/build/arm64-exception-handling

	if (Reg2 == AArch64::FP)
	return true;
	if (!NeedsWinCFI)
	return false;
	if (TRI->getEncodingValue(Reg2) == TRI->getEncodingValue(Reg1) + 1)
	return false;
	// If pairing a GPR with LR, the pair can be described by the save_lrpair
	// opcode. If this is the first register pair, it would end up with a
	// predecrement, but there's no save_lrpair_x opcode, so we can only do this
	// if LR is paired with something else than the first register.
	// The save_lrpair opcode requires the first register to be an odd one.
	if (Reg1 >= AArch64::X19 && Reg1 <= AArch64::X27 &&
	(Reg1 - AArch64::X19) % 2 == 0 && Reg2 == AArch64::LR && !IsFirst)
	return false;
	return true;
	}

	/// Returns true if Reg1 and Reg2 cannot be paired using a ldp/stp instruction.
	/// WindowsCFI requires that only consecutive registers can be paired.
	/// LR and FP need to be allocated together when the frame needs to save
	/// the frame-record. This means any other register pairing with LR is invalid.
	static bool invalidateRegisterPairing(unsigned Reg1, unsigned Reg2,
	bool UsesWinAAPCS, bool NeedsWinCFI,
	bool NeedsFrameRecord, bool IsFirst,
	const TargetRegisterInfo *TRI) {
	if (UsesWinAAPCS)
	return invalidateWindowsRegisterPairing(Reg1, Reg2, NeedsWinCFI, IsFirst,
	TRI);

	// If we need to store the frame record, don't pair any register
	// with LR other than FP.
	if (NeedsFrameRecord)
	return Reg2 == AArch64::LR;

	return false;
	}

	namespace {

	struct RegPairInfo {
	unsigned Reg1 = AArch64::NoRegister;
	unsigned Reg2 = AArch64::NoRegister;
	int FrameIdx;
	int Offset;
	enum RegType { GPR, FPR64, FPR128, PPR, ZPR, VG } Type;

	RegPairInfo() = default;

	bool isPaired() const { return Reg2 != AArch64::NoRegister; }

	unsigned getScale() const {
	switch (Type) {
	case PPR:
	return 2;
	case GPR:
	case FPR64:
	case VG:
	return 8;
	case ZPR:
	case FPR128:
	return 16;
	}
	llvm_unreachable("Unsupported type");
	}

	bool isScalable() const { return Type == PPR \|\| Type == ZPR; }
	};

	} // end anonymous namespace

	-unsigned findFreePredicateReg(BitVector &SavedRegs) {
	- for (unsigned PReg = AArch64::P8; PReg <= AArch64::P15; ++PReg) {
	- if (SavedRegs.test(PReg)) {
	- unsigned PNReg = PReg - AArch64::P0 + AArch64::PN0;
	- return PNReg;
	- }
	- }
	- return AArch64::NoRegister;
	-}
	-
	static void computeCalleeSaveRegisterPairs(
	MachineFunction &MF, ArrayRef<CalleeSavedInfo> CSI,
	const TargetRegisterInfo *TRI, SmallVectorImpl<RegPairInfo> &RegPairs,
	bool NeedsFrameRecord) {

	if (CSI.empty())
	return;

	bool IsWindows = isTargetWindows(MF);
	bool NeedsWinCFI = needsWinCFI(MF);
	AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
	MachineFrameInfo &MFI = MF.getFrameInfo();
	CallingConv::ID CC = MF.getFunction().getCallingConv();
	unsigned Count = CSI.size();
	(void)CC;
	// MachO's compact unwind format relies on all registers being stored in
	// pairs.
	assert((!produceCompactUnwindFrame(MF) \|\| CC == CallingConv::PreserveMost \|\|
	CC == CallingConv::PreserveAll \|\| CC == CallingConv::CXX_FAST_TLS \|\|
	CC == CallingConv::Win64 \|\| (Count & 1) == 0) &&
	"Odd number of callee-saved regs to spill!");
	int ByteOffset = AFI->getCalleeSavedStackSize();
	int StackFillDir = -1;
	int RegInc = 1;
	unsigned FirstReg = 0;
	if (NeedsWinCFI) {
	// For WinCFI, fill the stack from the bottom up.
	ByteOffset = 0;
	StackFillDir = 1;
	// As the CSI array is reversed to match PrologEpilogInserter, iterate
	// backwards, to pair up registers starting from lower numbered registers.
	RegInc = -1;
	FirstReg = Count - 1;
	}
	int ScalableByteOffset = AFI->getSVECalleeSavedStackSize();
	bool NeedGapToAlignStack = AFI->hasCalleeSaveStackFreeSpace();
	Register LastReg = 0;

	// When iterating backwards, the loop condition relies on unsigned wraparound.
	for (unsigned i = FirstReg; i < Count; i += RegInc) {
	RegPairInfo RPI;
	RPI.Reg1 = CSI[i].getReg();

	if (AArch64::GPR64RegClass.contains(RPI.Reg1))
	RPI.Type = RegPairInfo::GPR;
	else if (AArch64::FPR64RegClass.contains(RPI.Reg1))
	RPI.Type = RegPairInfo::FPR64;
	else if (AArch64::FPR128RegClass.contains(RPI.Reg1))
	RPI.Type = RegPairInfo::FPR128;
	else if (AArch64::ZPRRegClass.contains(RPI.Reg1))
	RPI.Type = RegPairInfo::ZPR;
	else if (AArch64::PPRRegClass.contains(RPI.Reg1))
	RPI.Type = RegPairInfo::PPR;
	else if (RPI.Reg1 == AArch64::VG)
	RPI.Type = RegPairInfo::VG;
	else
	llvm_unreachable("Unsupported register class.");

	// Add the stack hazard size as we transition from GPR->FPR CSRs.
	if (AFI->hasStackHazardSlotIndex() &&
	(!LastReg \|\| !AArch64InstrInfo::isFpOrNEON(LastReg)) &&
	AArch64InstrInfo::isFpOrNEON(RPI.Reg1))
	ByteOffset += StackFillDir * StackHazardSize;
	LastReg = RPI.Reg1;

	// Add the next reg to the pair if it is in the same register class.
	if (unsigned(i + RegInc) < Count && !AFI->hasStackHazardSlotIndex()) {
	Register NextReg = CSI[i + RegInc].getReg();
	bool IsFirst = i == FirstReg;
	switch (RPI.Type) {
	case RegPairInfo::GPR:
	if (AArch64::GPR64RegClass.contains(NextReg) &&
	!invalidateRegisterPairing(RPI.Reg1, NextReg, IsWindows,
	NeedsWinCFI, NeedsFrameRecord, IsFirst,
	TRI))
	RPI.Reg2 = NextReg;
	break;
	case RegPairInfo::FPR64:
	if (AArch64::FPR64RegClass.contains(NextReg) &&
	!invalidateWindowsRegisterPairing(RPI.Reg1, NextReg, NeedsWinCFI,
	IsFirst, TRI))
	RPI.Reg2 = NextReg;
	break;
	case RegPairInfo::FPR128:
	if (AArch64::FPR128RegClass.contains(NextReg))
	RPI.Reg2 = NextReg;
	break;
	case RegPairInfo::PPR:
	break;
	case RegPairInfo::ZPR:
	if (AFI->getPredicateRegForFillSpill() != 0)
	if (((RPI.Reg1 - AArch64::Z0) & 1) == 0 && (NextReg == RPI.Reg1 + 1))
	RPI.Reg2 = NextReg;
	break;
	case RegPairInfo::VG:
	break;
	}
	}

	// GPRs and FPRs are saved in pairs of 64-bit regs. We expect the CSI
	// list to come in sorted by frame index so that we can issue the store
	// pair instructions directly. Assert if we see anything otherwise.
	//
	// The order of the registers in the list is controlled by
	// getCalleeSavedRegs(), so they will always be in-order, as well.
	assert((!RPI.isPaired() \|\|
	(CSI[i].getFrameIdx() + RegInc == CSI[i + RegInc].getFrameIdx())) &&
	"Out of order callee saved regs!");

	assert((!RPI.isPaired() \|\| !NeedsFrameRecord \|\| RPI.Reg2 != AArch64::FP \|\|
	RPI.Reg1 == AArch64::LR) &&
	"FrameRecord must be allocated together with LR");

	// Windows AAPCS has FP and LR reversed.
	assert((!RPI.isPaired() \|\| !NeedsFrameRecord \|\| RPI.Reg1 != AArch64::FP \|\|
	RPI.Reg2 == AArch64::LR) &&
	"FrameRecord must be allocated together with LR");

	// MachO's compact unwind format relies on all registers being stored in
	// adjacent register pairs.
	assert((!produceCompactUnwindFrame(MF) \|\| CC == CallingConv::PreserveMost \|\|
	CC == CallingConv::PreserveAll \|\| CC == CallingConv::CXX_FAST_TLS \|\|
	CC == CallingConv::Win64 \|\|
	(RPI.isPaired() &&
	((RPI.Reg1 == AArch64::LR && RPI.Reg2 == AArch64::FP) \|\|
	RPI.Reg1 + 1 == RPI.Reg2))) &&
	"Callee-save registers not saved as adjacent register pair!");

	RPI.FrameIdx = CSI[i].getFrameIdx();
	if (NeedsWinCFI &&
	RPI.isPaired()) // RPI.FrameIdx must be the lower index of the pair
	RPI.FrameIdx = CSI[i + RegInc].getFrameIdx();
	int Scale = RPI.getScale();

	int OffsetPre = RPI.isScalable() ? ScalableByteOffset : ByteOffset;
	assert(OffsetPre % Scale == 0);

	if (RPI.isScalable())
	ScalableByteOffset += StackFillDir * (RPI.isPaired() ? 2 * Scale : Scale);
	else
	ByteOffset += StackFillDir * (RPI.isPaired() ? 2 * Scale : Scale);

	// Swift's async context is directly before FP, so allocate an extra
	// 8 bytes for it.
	if (NeedsFrameRecord && AFI->hasSwiftAsyncContext() &&
	((!IsWindows && RPI.Reg2 == AArch64::FP) \|\|
	(IsWindows && RPI.Reg2 == AArch64::LR)))
	ByteOffset += StackFillDir * 8;

	// Round up size of non-pair to pair size if we need to pad the
	// callee-save area to ensure 16-byte alignment.
	if (NeedGapToAlignStack && !NeedsWinCFI && !RPI.isScalable() &&
	RPI.Type != RegPairInfo::FPR128 && !RPI.isPaired() &&
	ByteOffset % 16 != 0) {
	ByteOffset += 8 * StackFillDir;
	assert(MFI.getObjectAlign(RPI.FrameIdx) <= Align(16));
	// A stack frame with a gap looks like this, bottom up:
	// d9, d8. x21, gap, x20, x19.
	// Set extra alignment on the x21 object to create the gap above it.
	MFI.setObjectAlignment(RPI.FrameIdx, Align(16));
	NeedGapToAlignStack = false;
	}

	int OffsetPost = RPI.isScalable() ? ScalableByteOffset : ByteOffset;
	assert(OffsetPost % Scale == 0);
	// If filling top down (default), we want the offset after incrementing it.
	// If filling bottom up (WinCFI) we need the original offset.
	int Offset = NeedsWinCFI ? OffsetPre : OffsetPost;

	// The FP, LR pair goes 8 bytes into our expanded 24-byte slot so that the
	// Swift context can directly precede FP.
	if (NeedsFrameRecord && AFI->hasSwiftAsyncContext() &&
	((!IsWindows && RPI.Reg2 == AArch64::FP) \|\|
	(IsWindows && RPI.Reg2 == AArch64::LR)))
	Offset += 8;
	RPI.Offset = Offset / Scale;

	assert((!RPI.isPaired() \|\|
	(!RPI.isScalable() && RPI.Offset >= -64 && RPI.Offset <= 63) \|\|
	(RPI.isScalable() && RPI.Offset >= -256 && RPI.Offset <= 255)) &&
	"Offset out of bounds for LDP/STP immediate");

	// Save the offset to frame record so that the FP register can point to the
	// innermost frame record (spilled FP and LR registers).
	if (NeedsFrameRecord &&
	((!IsWindows && RPI.Reg1 == AArch64::LR && RPI.Reg2 == AArch64::FP) \|\|
	(IsWindows && RPI.Reg1 == AArch64::FP && RPI.Reg2 == AArch64::LR)))
	AFI->setCalleeSaveBaseToFrameRecordOffset(Offset);

	RegPairs.push_back(RPI);
	if (RPI.isPaired())
	i += RegInc;
	}
	if (NeedsWinCFI) {
	// If we need an alignment gap in the stack, align the topmost stack
	// object. A stack frame with a gap looks like this, bottom up:
	// x19, d8. d9, gap.
	// Set extra alignment on the topmost stack object (the first element in
	// CSI, which goes top down), to create the gap above it.
	if (AFI->hasCalleeSaveStackFreeSpace())
	MFI.setObjectAlignment(CSI[0].getFrameIdx(), Align(16));
	// We iterated bottom up over the registers; flip RegPairs back to top
	// down order.
	std::reverse(RegPairs.begin(), RegPairs.end());
	}
	}

	bool AArch64FrameLowering::spillCalleeSavedRegisters(
	MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
	ArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const {
	MachineFunction &MF = *MBB.getParent();
	const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
	AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
	bool NeedsWinCFI = needsWinCFI(MF);
	DebugLoc DL;
	SmallVector<RegPairInfo, 8> RegPairs;

	computeCalleeSaveRegisterPairs(MF, CSI, TRI, RegPairs, hasFP(MF));

	MachineRegisterInfo &MRI = MF.getRegInfo();
	// Refresh the reserved regs in case there are any potential changes since the
	// last freeze.
	MRI.freezeReservedRegs();

	if (homogeneousPrologEpilog(MF)) {
	auto MIB = BuildMI(MBB, MI, DL, TII.get(AArch64::HOM_Prolog))
	.setMIFlag(MachineInstr::FrameSetup);

	for (auto &RPI : RegPairs) {
	MIB.addReg(RPI.Reg1);
	MIB.addReg(RPI.Reg2);

	// Update register live in.
	if (!MRI.isReserved(RPI.Reg1))
	MBB.addLiveIn(RPI.Reg1);
	if (RPI.isPaired() && !MRI.isReserved(RPI.Reg2))
	MBB.addLiveIn(RPI.Reg2);
	}
	return true;
	}
	bool PTrueCreated = false;
	for (const RegPairInfo &RPI : llvm::reverse(RegPairs)) {
	unsigned Reg1 = RPI.Reg1;
	unsigned Reg2 = RPI.Reg2;
	unsigned StrOpc;

	// Issue sequence of spills for cs regs. The first spill may be converted
	// to a pre-decrement store later by emitPrologue if the callee-save stack
	// area allocation can't be combined with the local stack area allocation.
	// For example:
	// stp x22, x21, [sp, #0] // addImm(+0)
	// stp x20, x19, [sp, #16] // addImm(+2)
	// stp fp, lr, [sp, #32] // addImm(+4)
	// Rationale: This sequence saves uop updates compared to a sequence of
	// pre-increment spills like stp xi,xj,[sp,#-16]!
	// Note: Similar rationale and sequence for restores in epilog.
	unsigned Size;
	Align Alignment;
	switch (RPI.Type) {
	case RegPairInfo::GPR:
	StrOpc = RPI.isPaired() ? AArch64::STPXi : AArch64::STRXui;
	Size = 8;
	Alignment = Align(8);
	break;
	case RegPairInfo::FPR64:
	StrOpc = RPI.isPaired() ? AArch64::STPDi : AArch64::STRDui;
	Size = 8;
	Alignment = Align(8);
	break;
	case RegPairInfo::FPR128:
	StrOpc = RPI.isPaired() ? AArch64::STPQi : AArch64::STRQui;
	Size = 16;
	Alignment = Align(16);
	break;
	case RegPairInfo::ZPR:
	StrOpc = RPI.isPaired() ? AArch64::ST1B_2Z_IMM : AArch64::STR_ZXI;
	Size = 16;
	Alignment = Align(16);
	break;
	case RegPairInfo::PPR:
	StrOpc = AArch64::STR_PXI;
	Size = 2;
	Alignment = Align(2);
	break;
	case RegPairInfo::VG:
	StrOpc = AArch64::STRXui;
	Size = 8;
	Alignment = Align(8);
	break;
	}

	unsigned X0Scratch = AArch64::NoRegister;
	if (Reg1 == AArch64::VG) {
	// Find an available register to store value of VG to.
	Reg1 = findScratchNonCalleeSaveRegister(&MBB);
	assert(Reg1 != AArch64::NoRegister);
	SMEAttrs Attrs(MF.getFunction());

	if (Attrs.hasStreamingBody() && !Attrs.hasStreamingInterface() &&
	AFI->getStreamingVGIdx() == std::numeric_limits<int>::max()) {
	// For locally-streaming functions, we need to store both the streaming
	// & non-streaming VG. Spill the streaming value first.
	BuildMI(MBB, MI, DL, TII.get(AArch64::RDSVLI_XI), Reg1)
	.addImm(1)
	.setMIFlag(MachineInstr::FrameSetup);
	BuildMI(MBB, MI, DL, TII.get(AArch64::UBFMXri), Reg1)
	.addReg(Reg1)
	.addImm(3)
	.addImm(63)
	.setMIFlag(MachineInstr::FrameSetup);

	AFI->setStreamingVGIdx(RPI.FrameIdx);
	} else if (MF.getSubtarget<AArch64Subtarget>().hasSVE()) {
	BuildMI(MBB, MI, DL, TII.get(AArch64::CNTD_XPiI), Reg1)
	.addImm(31)
	.addImm(1)
	.setMIFlag(MachineInstr::FrameSetup);
	AFI->setVGIdx(RPI.FrameIdx);
	} else {
	const AArch64Subtarget &STI = MF.getSubtarget<AArch64Subtarget>();
	if (llvm::any_of(
	MBB.liveins(),
	[&STI](const MachineBasicBlock::RegisterMaskPair &LiveIn) {
	return STI.getRegisterInfo()->isSuperOrSubRegisterEq(
	AArch64::X0, LiveIn.PhysReg);
	}))
	X0Scratch = Reg1;

	if (X0Scratch != AArch64::NoRegister)
	BuildMI(MBB, MI, DL, TII.get(AArch64::ORRXrr), Reg1)
	.addReg(AArch64::XZR)
	.addReg(AArch64::X0, RegState::Undef)
	.addReg(AArch64::X0, RegState::Implicit)
	.setMIFlag(MachineInstr::FrameSetup);

	const uint32_t *RegMask = TRI->getCallPreservedMask(
	MF,
	CallingConv::AArch64_SME_ABI_Support_Routines_PreserveMost_From_X1);
	BuildMI(MBB, MI, DL, TII.get(AArch64::BL))
	.addExternalSymbol("__arm_get_current_vg")
	.addRegMask(RegMask)
	.addReg(AArch64::X0, RegState::ImplicitDefine)
	.setMIFlag(MachineInstr::FrameSetup);
	Reg1 = AArch64::X0;
	AFI->setVGIdx(RPI.FrameIdx);
	}
	}

	LLVM_DEBUG(dbgs() << "CSR spill: (" << printReg(Reg1, TRI);
	if (RPI.isPaired()) dbgs() << ", " << printReg(Reg2, TRI);
	dbgs() << ") -> fi#(" << RPI.FrameIdx;
	if (RPI.isPaired()) dbgs() << ", " << RPI.FrameIdx + 1;
	dbgs() << ")\n");

	assert((!NeedsWinCFI \|\| !(Reg1 == AArch64::LR && Reg2 == AArch64::FP)) &&
	"Windows unwdinding requires a consecutive (FP,LR) pair");
	// Windows unwind codes require consecutive registers if registers are
	// paired. Make the switch here, so that the code below will save (x,x+1)
	// and not (x+1,x).
	unsigned FrameIdxReg1 = RPI.FrameIdx;
	unsigned FrameIdxReg2 = RPI.FrameIdx + 1;
	if (NeedsWinCFI && RPI.isPaired()) {
	std::swap(Reg1, Reg2);
	std::swap(FrameIdxReg1, FrameIdxReg2);
	}

	if (RPI.isPaired() && RPI.isScalable()) {
	[[maybe_unused]] const AArch64Subtarget &Subtarget =
	MF.getSubtarget<AArch64Subtarget>();
	AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
	unsigned PnReg = AFI->getPredicateRegForFillSpill();
	assert(((Subtarget.hasSVE2p1() \|\| Subtarget.hasSME2()) && PnReg != 0) &&
	"Expects SVE2.1 or SME2 target and a predicate register");
	#ifdef EXPENSIVE_CHECKS
	auto IsPPR = [](const RegPairInfo &c) {
	return c.Reg1 == RegPairInfo::PPR;
	};
	auto PPRBegin = std::find_if(RegPairs.begin(), RegPairs.end(), IsPPR);
	auto IsZPR = [](const RegPairInfo &c) {
	return c.Type == RegPairInfo::ZPR;
	};
	auto ZPRBegin = std::find_if(RegPairs.begin(), RegPairs.end(), IsZPR);
	assert(!(PPRBegin < ZPRBegin) &&
	"Expected callee save predicate to be handled first");
	#endif
	if (!PTrueCreated) {
	PTrueCreated = true;
	BuildMI(MBB, MI, DL, TII.get(AArch64::PTRUE_C_B), PnReg)
	.setMIFlags(MachineInstr::FrameSetup);
	}
	MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(StrOpc));
	if (!MRI.isReserved(Reg1))
	MBB.addLiveIn(Reg1);
	if (!MRI.isReserved(Reg2))
	MBB.addLiveIn(Reg2);
	MIB.addReg(/PairRegs/ AArch64::Z0_Z1 + (RPI.Reg1 - AArch64::Z0));
	MIB.addMemOperand(MF.getMachineMemOperand(
	MachinePointerInfo::getFixedStack(MF, FrameIdxReg2),
	MachineMemOperand::MOStore, Size, Alignment));
	MIB.addReg(PnReg);
	MIB.addReg(AArch64::SP)
	.addImm(RPI.Offset) // [sp, #offset*scale],
	// where factor*scale is implicit
	.setMIFlag(MachineInstr::FrameSetup);
	MIB.addMemOperand(MF.getMachineMemOperand(
	MachinePointerInfo::getFixedStack(MF, FrameIdxReg1),
	MachineMemOperand::MOStore, Size, Alignment));
	if (NeedsWinCFI)
	InsertSEH(MIB, TII, MachineInstr::FrameSetup);
	} else { // The code when the pair of ZReg is not present
	MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(StrOpc));
	if (!MRI.isReserved(Reg1))
	MBB.addLiveIn(Reg1);
	if (RPI.isPaired()) {
	if (!MRI.isReserved(Reg2))
	MBB.addLiveIn(Reg2);
	MIB.addReg(Reg2, getPrologueDeath(MF, Reg2));
	MIB.addMemOperand(MF.getMachineMemOperand(
	MachinePointerInfo::getFixedStack(MF, FrameIdxReg2),
	MachineMemOperand::MOStore, Size, Alignment));
	}
	MIB.addReg(Reg1, getPrologueDeath(MF, Reg1))
	.addReg(AArch64::SP)
	.addImm(RPI.Offset) // [sp, #offset*scale],
	// where factor*scale is implicit
	.setMIFlag(MachineInstr::FrameSetup);
	MIB.addMemOperand(MF.getMachineMemOperand(
	MachinePointerInfo::getFixedStack(MF, FrameIdxReg1),
	MachineMemOperand::MOStore, Size, Alignment));
	if (NeedsWinCFI)
	InsertSEH(MIB, TII, MachineInstr::FrameSetup);
	}
	// Update the StackIDs of the SVE stack slots.
	MachineFrameInfo &MFI = MF.getFrameInfo();
	if (RPI.Type == RegPairInfo::ZPR \|\| RPI.Type == RegPairInfo::PPR) {
	MFI.setStackID(FrameIdxReg1, TargetStackID::ScalableVector);
	if (RPI.isPaired())
	MFI.setStackID(FrameIdxReg2, TargetStackID::ScalableVector);
	}

	if (X0Scratch != AArch64::NoRegister)
	BuildMI(MBB, MI, DL, TII.get(AArch64::ORRXrr), AArch64::X0)
	.addReg(AArch64::XZR)
	.addReg(X0Scratch, RegState::Undef)
	.addReg(X0Scratch, RegState::Implicit)
	.setMIFlag(MachineInstr::FrameSetup);
	}
	return true;
	}

	bool AArch64FrameLowering::restoreCalleeSavedRegisters(
	MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
	MutableArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const {
	MachineFunction &MF = *MBB.getParent();
	const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
	DebugLoc DL;
	SmallVector<RegPairInfo, 8> RegPairs;
	bool NeedsWinCFI = needsWinCFI(MF);

	if (MBBI != MBB.end())
	DL = MBBI->getDebugLoc();

	computeCalleeSaveRegisterPairs(MF, CSI, TRI, RegPairs, hasFP(MF));
	if (homogeneousPrologEpilog(MF, &MBB)) {
	auto MIB = BuildMI(MBB, MBBI, DL, TII.get(AArch64::HOM_Epilog))
	.setMIFlag(MachineInstr::FrameDestroy);
	for (auto &RPI : RegPairs) {
	MIB.addReg(RPI.Reg1, RegState::Define);
	MIB.addReg(RPI.Reg2, RegState::Define);
	}
	return true;
	}

	// For performance reasons restore SVE register in increasing order
	auto IsPPR = [](const RegPairInfo &c) { return c.Type == RegPairInfo::PPR; };
	auto PPRBegin = std::find_if(RegPairs.begin(), RegPairs.end(), IsPPR);
	auto PPREnd = std::find_if_not(PPRBegin, RegPairs.end(), IsPPR);
	std::reverse(PPRBegin, PPREnd);
	auto IsZPR = [](const RegPairInfo &c) { return c.Type == RegPairInfo::ZPR; };
	auto ZPRBegin = std::find_if(RegPairs.begin(), RegPairs.end(), IsZPR);
	auto ZPREnd = std::find_if_not(ZPRBegin, RegPairs.end(), IsZPR);
	std::reverse(ZPRBegin, ZPREnd);

	bool PTrueCreated = false;
	for (const RegPairInfo &RPI : RegPairs) {
	unsigned Reg1 = RPI.Reg1;
	unsigned Reg2 = RPI.Reg2;

	// Issue sequence of restores for cs regs. The last restore may be converted
	// to a post-increment load later by emitEpilogue if the callee-save stack
	// area allocation can't be combined with the local stack area allocation.
	// For example:
	// ldp fp, lr, [sp, #32] // addImm(+4)
	// ldp x20, x19, [sp, #16] // addImm(+2)
	// ldp x22, x21, [sp, #0] // addImm(+0)
	// Note: see comment in spillCalleeSavedRegisters()
	unsigned LdrOpc;
	unsigned Size;
	Align Alignment;
	switch (RPI.Type) {
	case RegPairInfo::GPR:
	LdrOpc = RPI.isPaired() ? AArch64::LDPXi : AArch64::LDRXui;
	Size = 8;
	Alignment = Align(8);
	break;
	case RegPairInfo::FPR64:
	LdrOpc = RPI.isPaired() ? AArch64::LDPDi : AArch64::LDRDui;
	Size = 8;
	Alignment = Align(8);
	break;
	case RegPairInfo::FPR128:
	LdrOpc = RPI.isPaired() ? AArch64::LDPQi : AArch64::LDRQui;
	Size = 16;
	Alignment = Align(16);
	break;
	case RegPairInfo::ZPR:
	LdrOpc = RPI.isPaired() ? AArch64::LD1B_2Z_IMM : AArch64::LDR_ZXI;
	Size = 16;
	Alignment = Align(16);
	break;
	case RegPairInfo::PPR:
	LdrOpc = AArch64::LDR_PXI;
	Size = 2;
	Alignment = Align(2);
	break;
	case RegPairInfo::VG:
	continue;
	}
	LLVM_DEBUG(dbgs() << "CSR restore: (" << printReg(Reg1, TRI);
	if (RPI.isPaired()) dbgs() << ", " << printReg(Reg2, TRI);
	dbgs() << ") -> fi#(" << RPI.FrameIdx;
	if (RPI.isPaired()) dbgs() << ", " << RPI.FrameIdx + 1;
	dbgs() << ")\n");

	// Windows unwind codes require consecutive registers if registers are
	// paired. Make the switch here, so that the code below will save (x,x+1)
	// and not (x+1,x).
	unsigned FrameIdxReg1 = RPI.FrameIdx;
	unsigned FrameIdxReg2 = RPI.FrameIdx + 1;
	if (NeedsWinCFI && RPI.isPaired()) {
	std::swap(Reg1, Reg2);
	std::swap(FrameIdxReg1, FrameIdxReg2);
	}

	AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
	if (RPI.isPaired() && RPI.isScalable()) {
	[[maybe_unused]] const AArch64Subtarget &Subtarget =
	MF.getSubtarget<AArch64Subtarget>();
	unsigned PnReg = AFI->getPredicateRegForFillSpill();
	assert(((Subtarget.hasSVE2p1() \|\| Subtarget.hasSME2()) && PnReg != 0) &&
	"Expects SVE2.1 or SME2 target and a predicate register");
	#ifdef EXPENSIVE_CHECKS
	assert(!(PPRBegin < ZPRBegin) &&
	"Expected callee save predicate to be handled first");
	#endif
	if (!PTrueCreated) {
	PTrueCreated = true;
	BuildMI(MBB, MBBI, DL, TII.get(AArch64::PTRUE_C_B), PnReg)
	.setMIFlags(MachineInstr::FrameDestroy);
	}
	MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII.get(LdrOpc));
	MIB.addReg(/PairRegs/ AArch64::Z0_Z1 + (RPI.Reg1 - AArch64::Z0),
	getDefRegState(true));
	MIB.addMemOperand(MF.getMachineMemOperand(
	MachinePointerInfo::getFixedStack(MF, FrameIdxReg2),
	MachineMemOperand::MOLoad, Size, Alignment));
	MIB.addReg(PnReg);
	MIB.addReg(AArch64::SP)
	.addImm(RPI.Offset) // [sp, #offset*scale]
	// where factor*scale is implicit
	.setMIFlag(MachineInstr::FrameDestroy);
	MIB.addMemOperand(MF.getMachineMemOperand(
	MachinePointerInfo::getFixedStack(MF, FrameIdxReg1),
	MachineMemOperand::MOLoad, Size, Alignment));
	if (NeedsWinCFI)
	InsertSEH(MIB, TII, MachineInstr::FrameDestroy);
	} else {
	MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII.get(LdrOpc));
	if (RPI.isPaired()) {
	MIB.addReg(Reg2, getDefRegState(true));
	MIB.addMemOperand(MF.getMachineMemOperand(
	MachinePointerInfo::getFixedStack(MF, FrameIdxReg2),
	MachineMemOperand::MOLoad, Size, Alignment));
	}
	MIB.addReg(Reg1, getDefRegState(true));
	MIB.addReg(AArch64::SP)
	.addImm(RPI.Offset) // [sp, #offset*scale]
	// where factor*scale is implicit
	.setMIFlag(MachineInstr::FrameDestroy);
	MIB.addMemOperand(MF.getMachineMemOperand(
	MachinePointerInfo::getFixedStack(MF, FrameIdxReg1),
	MachineMemOperand::MOLoad, Size, Alignment));
	if (NeedsWinCFI)
	InsertSEH(MIB, TII, MachineInstr::FrameDestroy);
	}
	}
	return true;
	}

	// Return the FrameID for a MMO.
	static std::optional<int> getMMOFrameID(MachineMemOperand *MMO,
	const MachineFrameInfo &MFI) {
	auto *PSV =
	dyn_cast_or_null<FixedStackPseudoSourceValue>(MMO->getPseudoValue());
	if (PSV)
	return std::optional<int>(PSV->getFrameIndex());

	if (MMO->getValue()) {
	if (auto *Al = dyn_cast<AllocaInst>(getUnderlyingObject(MMO->getValue()))) {
	for (int FI = MFI.getObjectIndexBegin(); FI < MFI.getObjectIndexEnd();
	FI++)
	if (MFI.getObjectAllocation(FI) == Al)
	return FI;
	}
	}

	return std::nullopt;
	}

	// Return the FrameID for a Load/Store instruction by looking at the first MMO.
	static std::optional<int> getLdStFrameID(const MachineInstr &MI,
	const MachineFrameInfo &MFI) {
	if (!MI.mayLoadOrStore() \|\| MI.getNumMemOperands() < 1)
	return std::nullopt;

	return getMMOFrameID(*MI.memoperands_begin(), MFI);
	}

	// Check if a Hazard slot is needed for the current function, and if so create
	// one for it. The index is stored in AArch64FunctionInfo->StackHazardSlotIndex,
	// which can be used to determine if any hazard padding is needed.
	void AArch64FrameLowering::determineStackHazardSlot(
	MachineFunction &MF, BitVector &SavedRegs) const {
	if (StackHazardSize == 0 \|\| StackHazardSize % 16 != 0 \|\|
	MF.getInfo<AArch64FunctionInfo>()->hasStackHazardSlotIndex())
	return;

	// Stack hazards are only needed in streaming functions.
	SMEAttrs Attrs(MF.getFunction());
	if (!StackHazardInNonStreaming && Attrs.hasNonStreamingInterfaceAndBody())
	return;

	MachineFrameInfo &MFI = MF.getFrameInfo();

	// Add a hazard slot if there are any CSR FPR registers, or are any fp-only
	// stack objects.
	bool HasFPRCSRs = any_of(SavedRegs.set_bits(), [](unsigned Reg) {
	return AArch64::FPR64RegClass.contains(Reg) \|\|
	AArch64::FPR128RegClass.contains(Reg) \|\|
	AArch64::ZPRRegClass.contains(Reg) \|\|
	AArch64::PPRRegClass.contains(Reg);
	});
	bool HasFPRStackObjects = false;
	if (!HasFPRCSRs) {
	std::vector<unsigned> FrameObjects(MFI.getObjectIndexEnd());
	for (auto &MBB : MF) {
	for (auto &MI : MBB) {
	std::optional<int> FI = getLdStFrameID(MI, MFI);
	if (FI && FI >= 0 && FI < (int)FrameObjects.size()) {
	if (MFI.getStackID(*FI) == TargetStackID::ScalableVector \|\|
	AArch64InstrInfo::isFpOrNEON(MI))
	FrameObjects[*FI] \|= 2;
	else
	FrameObjects[*FI] \|= 1;
	}
	}
	}
	HasFPRStackObjects =
	any_of(FrameObjects, [](unsigned B) { return (B & 3) == 2; });
	}

	if (HasFPRCSRs \|\| HasFPRStackObjects) {
	int ID = MFI.CreateStackObject(StackHazardSize, Align(16), false);
	LLVM_DEBUG(dbgs() << "Created Hazard slot at " << ID << " size "
	<< StackHazardSize << "\n");
	MF.getInfo<AArch64FunctionInfo>()->setStackHazardSlotIndex(ID);
	}
	}

	void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF,
	BitVector &SavedRegs,
	RegScavenger *RS) const {
	// All calls are tail calls in GHC calling conv, and functions have no
	// prologue/epilogue.
	if (MF.getFunction().getCallingConv() == CallingConv::GHC)
	return;

	TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS);
	const AArch64RegisterInfo RegInfo = static_cast<const AArch64RegisterInfo >(
	MF.getSubtarget().getRegisterInfo());
	const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
	AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
	unsigned UnspilledCSGPR = AArch64::NoRegister;
	unsigned UnspilledCSGPRPaired = AArch64::NoRegister;

	MachineFrameInfo &MFI = MF.getFrameInfo();
	const MCPhysReg *CSRegs = MF.getRegInfo().getCalleeSavedRegs();

	unsigned BasePointerReg = RegInfo->hasBasePointer(MF)
	? RegInfo->getBaseRegister()
	: (unsigned)AArch64::NoRegister;

	unsigned ExtraCSSpill = 0;
	bool HasUnpairedGPR64 = false;
	- bool HasPairZReg = false;
	// Figure out which callee-saved registers to save/restore.
	for (unsigned i = 0; CSRegs[i]; ++i) {
	const unsigned Reg = CSRegs[i];

	// Add the base pointer register to SavedRegs if it is callee-save.
	if (Reg == BasePointerReg)
	SavedRegs.set(Reg);

	bool RegUsed = SavedRegs.test(Reg);
	unsigned PairedReg = AArch64::NoRegister;
	const bool RegIsGPR64 = AArch64::GPR64RegClass.contains(Reg);
	if (RegIsGPR64 \|\| AArch64::FPR64RegClass.contains(Reg) \|\|
	AArch64::FPR128RegClass.contains(Reg)) {
	// Compensate for odd numbers of GP CSRs.
	// For now, all the known cases of odd number of CSRs are of GPRs.
	if (HasUnpairedGPR64)
	PairedReg = CSRegs[i % 2 == 0 ? i - 1 : i + 1];
	else
	PairedReg = CSRegs[i ^ 1];
	}

	// If the function requires all the GP registers to save (SavedRegs),
	// and there are an odd number of GP CSRs at the same time (CSRegs),
	// PairedReg could be in a different register class from Reg, which would
	// lead to a FPR (usually D8) accidentally being marked saved.
	if (RegIsGPR64 && !AArch64::GPR64RegClass.contains(PairedReg)) {
	PairedReg = AArch64::NoRegister;
	HasUnpairedGPR64 = true;
	}
	assert(PairedReg == AArch64::NoRegister \|\|
	AArch64::GPR64RegClass.contains(Reg, PairedReg) \|\|
	AArch64::FPR64RegClass.contains(Reg, PairedReg) \|\|
	AArch64::FPR128RegClass.contains(Reg, PairedReg));

	if (!RegUsed) {
	if (AArch64::GPR64RegClass.contains(Reg) &&
	!RegInfo->isReservedReg(MF, Reg)) {
	UnspilledCSGPR = Reg;
	UnspilledCSGPRPaired = PairedReg;
	}
	continue;
	}

	// MachO's compact unwind format relies on all registers being stored in
	// pairs.
	// FIXME: the usual format is actually better if unwinding isn't needed.
	if (producePairRegisters(MF) && PairedReg != AArch64::NoRegister &&
	!SavedRegs.test(PairedReg)) {
	SavedRegs.set(PairedReg);
	if (AArch64::GPR64RegClass.contains(PairedReg) &&
	!RegInfo->isReservedReg(MF, PairedReg))
	ExtraCSSpill = PairedReg;
	}
	- // Check if there is a pair of ZRegs, so it can select PReg for spill/fill
	- HasPairZReg \|= (AArch64::ZPRRegClass.contains(Reg, CSRegs[i ^ 1]) &&
	- SavedRegs.test(CSRegs[i ^ 1]));
	- }
	-
	- if (HasPairZReg && (Subtarget.hasSVE2p1() \|\| Subtarget.hasSME2())) {
	- AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
	- // Find a suitable predicate register for the multi-vector spill/fill
	- // instructions.
	- unsigned PnReg = findFreePredicateReg(SavedRegs);
	- if (PnReg != AArch64::NoRegister)
	- AFI->setPredicateRegForFillSpill(PnReg);
	- // If no free callee-save has been found assign one.
	- if (!AFI->getPredicateRegForFillSpill() &&
	- MF.getFunction().getCallingConv() ==
	- CallingConv::AArch64_SVE_VectorCall) {
	- SavedRegs.set(AArch64::P8);
	- AFI->setPredicateRegForFillSpill(AArch64::PN8);
	- }
	-
	- assert(!RegInfo->isReservedReg(MF, AFI->getPredicateRegForFillSpill()) &&
	- "Predicate cannot be a reserved register");
	}

	if (MF.getFunction().getCallingConv() == CallingConv::Win64 &&
	!Subtarget.isTargetWindows()) {
	// For Windows calling convention on a non-windows OS, where X18 is treated
	// as reserved, back up X18 when entering non-windows code (marked with the
	// Windows calling convention) and restore when returning regardless of
	// whether the individual function uses it - it might call other functions
	// that clobber it.
	SavedRegs.set(AArch64::X18);
	}

	// Calculates the callee saved stack size.
	unsigned CSStackSize = 0;
	unsigned SVECSStackSize = 0;
	const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
	const MachineRegisterInfo &MRI = MF.getRegInfo();
	for (unsigned Reg : SavedRegs.set_bits()) {
	auto RegSize = TRI->getRegSizeInBits(Reg, MRI) / 8;
	if (AArch64::PPRRegClass.contains(Reg) \|\|
	AArch64::ZPRRegClass.contains(Reg))
	SVECSStackSize += RegSize;
	else
	CSStackSize += RegSize;
	}

	// Increase the callee-saved stack size if the function has streaming mode
	// changes, as we will need to spill the value of the VG register.
	// For locally streaming functions, we spill both the streaming and
	// non-streaming VG value.
	const Function &F = MF.getFunction();
	SMEAttrs Attrs(F);
	if (AFI->hasStreamingModeChanges()) {
	if (Attrs.hasStreamingBody() && !Attrs.hasStreamingInterface())
	CSStackSize += 16;
	else
	CSStackSize += 8;
	}

	// Determine if a Hazard slot should be used, and increase the CSStackSize by
	// StackHazardSize if so.
	determineStackHazardSlot(MF, SavedRegs);
	if (AFI->hasStackHazardSlotIndex())
	CSStackSize += StackHazardSize;

	// Save number of saved regs, so we can easily update CSStackSize later.
	unsigned NumSavedRegs = SavedRegs.count();

	// The frame record needs to be created by saving the appropriate registers
	uint64_t EstimatedStackSize = MFI.estimateStackSize(MF);
	if (hasFP(MF) \|\|
	windowsRequiresStackProbe(MF, EstimatedStackSize + CSStackSize + 16)) {
	SavedRegs.set(AArch64::FP);
	SavedRegs.set(AArch64::LR);
	}

	LLVM_DEBUG({
	dbgs() << "*** determineCalleeSaves\nSaved CSRs:";
	for (unsigned Reg : SavedRegs.set_bits())
	dbgs() << ' ' << printReg(Reg, RegInfo);
	dbgs() << "\n";
	});

	// If any callee-saved registers are used, the frame cannot be eliminated.
	int64_t SVEStackSize =
	alignTo(SVECSStackSize + estimateSVEStackObjectOffsets(MFI), 16);
	bool CanEliminateFrame = (SavedRegs.count() == 0) && !SVEStackSize;

	// The CSR spill slots have not been allocated yet, so estimateStackSize
	// won't include them.
	unsigned EstimatedStackSizeLimit = estimateRSStackSizeLimit(MF);

	// We may address some of the stack above the canonical frame address, either
	// for our own arguments or during a call. Include that in calculating whether
	// we have complicated addressing concerns.
	int64_t CalleeStackUsed = 0;
	for (int I = MFI.getObjectIndexBegin(); I != 0; ++I) {
	int64_t FixedOff = MFI.getObjectOffset(I);
	if (FixedOff > CalleeStackUsed)
	CalleeStackUsed = FixedOff;
	}

	// Conservatively always assume BigStack when there are SVE spills.
	bool BigStack = SVEStackSize \|\| (EstimatedStackSize + CSStackSize +
	CalleeStackUsed) > EstimatedStackSizeLimit;
	if (BigStack \|\| !CanEliminateFrame \|\| RegInfo->cannotEliminateFrame(MF))
	AFI->setHasStackFrame(true);

	// Estimate if we might need to scavenge a register at some point in order
	// to materialize a stack offset. If so, either spill one additional
	// callee-saved register or reserve a special spill slot to facilitate
	// register scavenging. If we already spilled an extra callee-saved register
	// above to keep the number of spills even, we don't need to do anything else
	// here.
	if (BigStack) {
	if (!ExtraCSSpill && UnspilledCSGPR != AArch64::NoRegister) {
	LLVM_DEBUG(dbgs() << "Spilling " << printReg(UnspilledCSGPR, RegInfo)
	<< " to get a scratch register.\n");
	SavedRegs.set(UnspilledCSGPR);
	ExtraCSSpill = UnspilledCSGPR;

	// MachO's compact unwind format relies on all registers being stored in
	// pairs, so if we need to spill one extra for BigStack, then we need to
	// store the pair.
	if (producePairRegisters(MF)) {
	if (UnspilledCSGPRPaired == AArch64::NoRegister) {
	// Failed to make a pair for compact unwind format, revert spilling.
	if (produceCompactUnwindFrame(MF)) {
	SavedRegs.reset(UnspilledCSGPR);
	ExtraCSSpill = AArch64::NoRegister;
	}
	} else
	SavedRegs.set(UnspilledCSGPRPaired);
	}
	}

	// If we didn't find an extra callee-saved register to spill, create
	// an emergency spill slot.
	if (!ExtraCSSpill \|\| MF.getRegInfo().isPhysRegUsed(ExtraCSSpill)) {
	const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
	const TargetRegisterClass &RC = AArch64::GPR64RegClass;
	unsigned Size = TRI->getSpillSize(RC);
	Align Alignment = TRI->getSpillAlign(RC);
	int FI = MFI.CreateStackObject(Size, Alignment, false);
	RS->addScavengingFrameIndex(FI);
	LLVM_DEBUG(dbgs() << "No available CS registers, allocated fi#" << FI
	<< " as the emergency spill slot.\n");
	}
	}

	// Adding the size of additional 64bit GPR saves.
	CSStackSize += 8 * (SavedRegs.count() - NumSavedRegs);

	// A Swift asynchronous context extends the frame record with a pointer
	// directly before FP.
	if (hasFP(MF) && AFI->hasSwiftAsyncContext())
	CSStackSize += 8;

	uint64_t AlignedCSStackSize = alignTo(CSStackSize, 16);
	LLVM_DEBUG(dbgs() << "Estimated stack frame size: "
	<< EstimatedStackSize + AlignedCSStackSize << " bytes.\n");

	assert((!MFI.isCalleeSavedInfoValid() \|\|
	AFI->getCalleeSavedStackSize() == AlignedCSStackSize) &&
	"Should not invalidate callee saved info");

	// Round up to register pair alignment to avoid additional SP adjustment
	// instructions.
	AFI->setCalleeSavedStackSize(AlignedCSStackSize);
	AFI->setCalleeSaveStackHasFreeSpace(AlignedCSStackSize != CSStackSize);
	AFI->setSVECalleeSavedStackSize(alignTo(SVECSStackSize, 16));
	}

	bool AArch64FrameLowering::assignCalleeSavedSpillSlots(
	MachineFunction &MF, const TargetRegisterInfo *RegInfo,
	std::vector<CalleeSavedInfo> &CSI, unsigned &MinCSFrameIndex,
	unsigned &MaxCSFrameIndex) const {
	bool NeedsWinCFI = needsWinCFI(MF);
	// To match the canonical windows frame layout, reverse the list of
	// callee saved registers to get them laid out by PrologEpilogInserter
	// in the right order. (PrologEpilogInserter allocates stack objects top
	// down. Windows canonical prologs store higher numbered registers at
	// the top, thus have the CSI array start from the highest registers.)
	if (NeedsWinCFI)
	std::reverse(CSI.begin(), CSI.end());

	if (CSI.empty())
	return true; // Early exit if no callee saved registers are modified!

	// Now that we know which registers need to be saved and restored, allocate
	// stack slots for them.
	MachineFrameInfo &MFI = MF.getFrameInfo();
	auto *AFI = MF.getInfo<AArch64FunctionInfo>();

	bool UsesWinAAPCS = isTargetWindows(MF);
	if (UsesWinAAPCS && hasFP(MF) && AFI->hasSwiftAsyncContext()) {
	int FrameIdx = MFI.CreateStackObject(8, Align(16), true);
	AFI->setSwiftAsyncContextFrameIdx(FrameIdx);
	if ((unsigned)FrameIdx < MinCSFrameIndex)
	MinCSFrameIndex = FrameIdx;
	if ((unsigned)FrameIdx > MaxCSFrameIndex)
	MaxCSFrameIndex = FrameIdx;
	}

	// Insert VG into the list of CSRs, immediately before LR if saved.
	if (AFI->hasStreamingModeChanges()) {
	std::vector<CalleeSavedInfo> VGSaves;
	SMEAttrs Attrs(MF.getFunction());

	auto VGInfo = CalleeSavedInfo(AArch64::VG);
	VGInfo.setRestored(false);
	VGSaves.push_back(VGInfo);

	// Add VG again if the function is locally-streaming, as we will spill two
	// values.
	if (Attrs.hasStreamingBody() && !Attrs.hasStreamingInterface())
	VGSaves.push_back(VGInfo);

	bool InsertBeforeLR = false;

	for (unsigned I = 0; I < CSI.size(); I++)
	if (CSI[I].getReg() == AArch64::LR) {
	InsertBeforeLR = true;
	CSI.insert(CSI.begin() + I, VGSaves.begin(), VGSaves.end());
	break;
	}

	if (!InsertBeforeLR)
	CSI.insert(CSI.end(), VGSaves.begin(), VGSaves.end());
	}

	Register LastReg = 0;
	int HazardSlotIndex = std::numeric_limits<int>::max();
	for (auto &CS : CSI) {
	Register Reg = CS.getReg();
	const TargetRegisterClass *RC = RegInfo->getMinimalPhysRegClass(Reg);

	// Create a hazard slot as we switch between GPR and FPR CSRs.
	if (AFI->hasStackHazardSlotIndex() &&
	(!LastReg \|\| !AArch64InstrInfo::isFpOrNEON(LastReg)) &&
	AArch64InstrInfo::isFpOrNEON(Reg)) {
	assert(HazardSlotIndex == std::numeric_limits<int>::max() &&
	"Unexpected register order for hazard slot");
	HazardSlotIndex = MFI.CreateStackObject(StackHazardSize, Align(8), true);
	LLVM_DEBUG(dbgs() << "Created CSR Hazard at slot " << HazardSlotIndex
	<< "\n");
	AFI->setStackHazardCSRSlotIndex(HazardSlotIndex);
	if ((unsigned)HazardSlotIndex < MinCSFrameIndex)
	MinCSFrameIndex = HazardSlotIndex;
	if ((unsigned)HazardSlotIndex > MaxCSFrameIndex)
	MaxCSFrameIndex = HazardSlotIndex;
	}

	unsigned Size = RegInfo->getSpillSize(*RC);
	Align Alignment(RegInfo->getSpillAlign(*RC));
	int FrameIdx = MFI.CreateStackObject(Size, Alignment, true);
	CS.setFrameIdx(FrameIdx);

	if ((unsigned)FrameIdx < MinCSFrameIndex)
	MinCSFrameIndex = FrameIdx;
	if ((unsigned)FrameIdx > MaxCSFrameIndex)
	MaxCSFrameIndex = FrameIdx;

	// Grab 8 bytes below FP for the extended asynchronous frame info.
	if (hasFP(MF) && AFI->hasSwiftAsyncContext() && !UsesWinAAPCS &&
	Reg == AArch64::FP) {
	FrameIdx = MFI.CreateStackObject(8, Alignment, true);
	AFI->setSwiftAsyncContextFrameIdx(FrameIdx);
	if ((unsigned)FrameIdx < MinCSFrameIndex)
	MinCSFrameIndex = FrameIdx;
	if ((unsigned)FrameIdx > MaxCSFrameIndex)
	MaxCSFrameIndex = FrameIdx;
	}
	LastReg = Reg;
	}

	// Add hazard slot in the case where no FPR CSRs are present.
	if (AFI->hasStackHazardSlotIndex() &&
	HazardSlotIndex == std::numeric_limits<int>::max()) {
	HazardSlotIndex = MFI.CreateStackObject(StackHazardSize, Align(8), true);
	LLVM_DEBUG(dbgs() << "Created CSR Hazard at slot " << HazardSlotIndex
	<< "\n");
	AFI->setStackHazardCSRSlotIndex(HazardSlotIndex);
	if ((unsigned)HazardSlotIndex < MinCSFrameIndex)
	MinCSFrameIndex = HazardSlotIndex;
	if ((unsigned)HazardSlotIndex > MaxCSFrameIndex)
	MaxCSFrameIndex = HazardSlotIndex;
	}

	return true;
	}

	bool AArch64FrameLowering::enableStackSlotScavenging(
	const MachineFunction &MF) const {
	const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
	// If the function has streaming-mode changes, don't scavenge a
	// spillslot in the callee-save area, as that might require an
	// 'addvl' in the streaming-mode-changing call-sequence when the
	// function doesn't use a FP.
	if (AFI->hasStreamingModeChanges() && !hasFP(MF))
	return false;
	// Don't allow register salvaging with hazard slots, in case it moves objects
	// into the wrong place.
	if (AFI->hasStackHazardSlotIndex())
	return false;
	return AFI->hasCalleeSaveStackFreeSpace();
	}

	/// returns true if there are any SVE callee saves.
	static bool getSVECalleeSaveSlotRange(const MachineFrameInfo &MFI,
	int &Min, int &Max) {
	Min = std::numeric_limits<int>::max();
	Max = std::numeric_limits<int>::min();

	if (!MFI.isCalleeSavedInfoValid())
	return false;

	const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
	for (auto &CS : CSI) {
	if (AArch64::ZPRRegClass.contains(CS.getReg()) \|\|
	AArch64::PPRRegClass.contains(CS.getReg())) {
	assert((Max == std::numeric_limits<int>::min() \|\|
	Max + 1 == CS.getFrameIdx()) &&
	"SVE CalleeSaves are not consecutive");

	Min = std::min(Min, CS.getFrameIdx());
	Max = std::max(Max, CS.getFrameIdx());
	}
	}
	return Min != std::numeric_limits<int>::max();
	}

	// Process all the SVE stack objects and determine offsets for each
	// object. If AssignOffsets is true, the offsets get assigned.
	// Fills in the first and last callee-saved frame indices into
	// Min/MaxCSFrameIndex, respectively.
	// Returns the size of the stack.
	static int64_t determineSVEStackObjectOffsets(MachineFrameInfo &MFI,
	int &MinCSFrameIndex,
	int &MaxCSFrameIndex,
	bool AssignOffsets) {
	#ifndef NDEBUG
	// First process all fixed stack objects.
	for (int I = MFI.getObjectIndexBegin(); I != 0; ++I)
	assert(MFI.getStackID(I) != TargetStackID::ScalableVector &&
	"SVE vectors should never be passed on the stack by value, only by "
	"reference.");
	#endif

	auto Assign = [&MFI](int FI, int64_t Offset) {
	LLVM_DEBUG(dbgs() << "alloc FI(" << FI << ") at SP[" << Offset << "]\n");
	MFI.setObjectOffset(FI, Offset);
	};

	int64_t Offset = 0;

	// Then process all callee saved slots.
	if (getSVECalleeSaveSlotRange(MFI, MinCSFrameIndex, MaxCSFrameIndex)) {
	// Assign offsets to the callee save slots.
	for (int I = MinCSFrameIndex; I <= MaxCSFrameIndex; ++I) {
	Offset += MFI.getObjectSize(I);
	Offset = alignTo(Offset, MFI.getObjectAlign(I));
	if (AssignOffsets)
	Assign(I, -Offset);
	}
	}

	// Ensure that the Callee-save area is aligned to 16bytes.
	Offset = alignTo(Offset, Align(16U));

	// Create a buffer of SVE objects to allocate and sort it.
	SmallVector<int, 8> ObjectsToAllocate;
	// If we have a stack protector, and we've previously decided that we have SVE
	// objects on the stack and thus need it to go in the SVE stack area, then it
	// needs to go first.
	int StackProtectorFI = -1;
	if (MFI.hasStackProtectorIndex()) {
	StackProtectorFI = MFI.getStackProtectorIndex();
	if (MFI.getStackID(StackProtectorFI) == TargetStackID::ScalableVector)
	ObjectsToAllocate.push_back(StackProtectorFI);
	}
	for (int I = 0, E = MFI.getObjectIndexEnd(); I != E; ++I) {
	unsigned StackID = MFI.getStackID(I);
	if (StackID != TargetStackID::ScalableVector)
	continue;
	if (I == StackProtectorFI)
	continue;
	if (MaxCSFrameIndex >= I && I >= MinCSFrameIndex)
	continue;
	if (MFI.isDeadObjectIndex(I))
	continue;

	ObjectsToAllocate.push_back(I);
	}

	// Allocate all SVE locals and spills
	for (unsigned FI : ObjectsToAllocate) {
	Align Alignment = MFI.getObjectAlign(FI);
	// FIXME: Given that the length of SVE vectors is not necessarily a power of
	// two, we'd need to align every object dynamically at runtime if the
	// alignment is larger than 16. This is not yet supported.
	if (Alignment > Align(16))
	report_fatal_error(
	"Alignment of scalable vectors > 16 bytes is not yet supported");

	Offset = alignTo(Offset + MFI.getObjectSize(FI), Alignment);
	if (AssignOffsets)
	Assign(FI, -Offset);
	}

	return Offset;
	}

	int64_t AArch64FrameLowering::estimateSVEStackObjectOffsets(
	MachineFrameInfo &MFI) const {
	int MinCSFrameIndex, MaxCSFrameIndex;
	return determineSVEStackObjectOffsets(MFI, MinCSFrameIndex, MaxCSFrameIndex, false);
	}

	int64_t AArch64FrameLowering::assignSVEStackObjectOffsets(
	MachineFrameInfo &MFI, int &MinCSFrameIndex, int &MaxCSFrameIndex) const {
	return determineSVEStackObjectOffsets(MFI, MinCSFrameIndex, MaxCSFrameIndex,
	true);
	}

	void AArch64FrameLowering::processFunctionBeforeFrameFinalized(
	MachineFunction &MF, RegScavenger *RS) const {
	MachineFrameInfo &MFI = MF.getFrameInfo();

	assert(getStackGrowthDirection() == TargetFrameLowering::StackGrowsDown &&
	"Upwards growing stack unsupported");

	int MinCSFrameIndex, MaxCSFrameIndex;
	int64_t SVEStackSize =
	assignSVEStackObjectOffsets(MFI, MinCSFrameIndex, MaxCSFrameIndex);

	AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
	AFI->setStackSizeSVE(alignTo(SVEStackSize, 16U));
	AFI->setMinMaxSVECSFrameIndex(MinCSFrameIndex, MaxCSFrameIndex);

	// If this function isn't doing Win64-style C++ EH, we don't need to do
	// anything.
	if (!MF.hasEHFunclets())
	return;
	const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
	WinEHFuncInfo &EHInfo = *MF.getWinEHFuncInfo();

	MachineBasicBlock &MBB = MF.front();
	auto MBBI = MBB.begin();
	while (MBBI != MBB.end() && MBBI->getFlag(MachineInstr::FrameSetup))
	++MBBI;

	// Create an UnwindHelp object.
	// The UnwindHelp object is allocated at the start of the fixed object area
	int64_t FixedObject =
	getFixedObjectSize(MF, AFI, /IsWin64/ true, /IsFunclet/ false);
	int UnwindHelpFI = MFI.CreateFixedObject(/Size/ 8,
	/SPOffset/ -FixedObject,
	/IsImmutable=/false);
	EHInfo.UnwindHelpFrameIdx = UnwindHelpFI;

	// We need to store -2 into the UnwindHelp object at the start of the
	// function.
	DebugLoc DL;
	RS->enterBasicBlockEnd(MBB);
	RS->backward(MBBI);
	Register DstReg = RS->FindUnusedReg(&AArch64::GPR64commonRegClass);
	assert(DstReg && "There must be a free register after frame setup");
	BuildMI(MBB, MBBI, DL, TII.get(AArch64::MOVi64imm), DstReg).addImm(-2);
	BuildMI(MBB, MBBI, DL, TII.get(AArch64::STURXi))
	.addReg(DstReg, getKillRegState(true))
	.addFrameIndex(UnwindHelpFI)
	.addImm(0);
	}

	namespace {
	struct TagStoreInstr {
	MachineInstr *MI;
	int64_t Offset, Size;
	explicit TagStoreInstr(MachineInstr *MI, int64_t Offset, int64_t Size)
	: MI(MI), Offset(Offset), Size(Size) {}
	};

	class TagStoreEdit {
	MachineFunction *MF;
	MachineBasicBlock *MBB;
	MachineRegisterInfo *MRI;
	// Tag store instructions that are being replaced.
	SmallVector<TagStoreInstr, 8> TagStores;
	// Combined memref arguments of the above instructions.
	SmallVector<MachineMemOperand *, 8> CombinedMemRefs;

	// Replace allocation tags in [FrameReg + FrameRegOffset, FrameReg +
	// FrameRegOffset + Size) with the address tag of SP.
	Register FrameReg;
	StackOffset FrameRegOffset;
	int64_t Size;
	// If not std::nullopt, move FrameReg to (FrameReg + FrameRegUpdate) at the
	// end.
	std::optional<int64_t> FrameRegUpdate;
	// MIFlags for any FrameReg updating instructions.
	unsigned FrameRegUpdateFlags;

	// Use zeroing instruction variants.
	bool ZeroData;
	DebugLoc DL;

	void emitUnrolled(MachineBasicBlock::iterator InsertI);
	void emitLoop(MachineBasicBlock::iterator InsertI);

	public:
	TagStoreEdit(MachineBasicBlock *MBB, bool ZeroData)
	: MBB(MBB), ZeroData(ZeroData) {
	MF = MBB->getParent();
	MRI = &MF->getRegInfo();
	}
	// Add an instruction to be replaced. Instructions must be added in the
	// ascending order of Offset, and have to be adjacent.
	void addInstruction(TagStoreInstr I) {
	assert((TagStores.empty() \|\|
	TagStores.back().Offset + TagStores.back().Size == I.Offset) &&
	"Non-adjacent tag store instructions.");
	TagStores.push_back(I);
	}
	void clear() { TagStores.clear(); }
	// Emit equivalent code at the given location, and erase the current set of
	// instructions. May skip if the replacement is not profitable. May invalidate
	// the input iterator and replace it with a valid one.
	void emitCode(MachineBasicBlock::iterator &InsertI,
	const AArch64FrameLowering *TFI, bool TryMergeSPUpdate);
	};

	void TagStoreEdit::emitUnrolled(MachineBasicBlock::iterator InsertI) {
	const AArch64InstrInfo *TII =
	MF->getSubtarget<AArch64Subtarget>().getInstrInfo();

	const int64_t kMinOffset = -256 * 16;
	const int64_t kMaxOffset = 255 * 16;

	Register BaseReg = FrameReg;
	int64_t BaseRegOffsetBytes = FrameRegOffset.getFixed();
	if (BaseRegOffsetBytes < kMinOffset \|\|
	BaseRegOffsetBytes + (Size - Size % 32) > kMaxOffset \|\|
	// BaseReg can be FP, which is not necessarily aligned to 16-bytes. In
	// that case, BaseRegOffsetBytes will not be aligned to 16 bytes, which
	// is required for the offset of ST2G.
	BaseRegOffsetBytes % 16 != 0) {
	Register ScratchReg = MRI->createVirtualRegister(&AArch64::GPR64RegClass);
	emitFrameOffset(*MBB, InsertI, DL, ScratchReg, BaseReg,
	StackOffset::getFixed(BaseRegOffsetBytes), TII);
	BaseReg = ScratchReg;
	BaseRegOffsetBytes = 0;
	}

	MachineInstr *LastI = nullptr;
	while (Size) {
	int64_t InstrSize = (Size > 16) ? 32 : 16;
	unsigned Opcode =
	InstrSize == 16
	? (ZeroData ? AArch64::STZGi : AArch64::STGi)
	: (ZeroData ? AArch64::STZ2Gi : AArch64::ST2Gi);
	assert(BaseRegOffsetBytes % 16 == 0);
	MachineInstr I = BuildMI(MBB, InsertI, DL, TII->get(Opcode))
	.addReg(AArch64::SP)
	.addReg(BaseReg)
	.addImm(BaseRegOffsetBytes / 16)
	.setMemRefs(CombinedMemRefs);
	// A store to [BaseReg, #0] should go last for an opportunity to fold the
	// final SP adjustment in the epilogue.
	if (BaseRegOffsetBytes == 0)
	LastI = I;
	BaseRegOffsetBytes += InstrSize;
	Size -= InstrSize;
	}

	if (LastI)
	MBB->splice(InsertI, MBB, LastI);
	}

	void TagStoreEdit::emitLoop(MachineBasicBlock::iterator InsertI) {
	const AArch64InstrInfo *TII =
	MF->getSubtarget<AArch64Subtarget>().getInstrInfo();

	Register BaseReg = FrameRegUpdate
	? FrameReg
	: MRI->createVirtualRegister(&AArch64::GPR64RegClass);
	Register SizeReg = MRI->createVirtualRegister(&AArch64::GPR64RegClass);

	emitFrameOffset(*MBB, InsertI, DL, BaseReg, FrameReg, FrameRegOffset, TII);

	int64_t LoopSize = Size;
	// If the loop size is not a multiple of 32, split off one 16-byte store at
	// the end to fold BaseReg update into.
	if (FrameRegUpdate && *FrameRegUpdate)
	LoopSize -= LoopSize % 32;
	MachineInstr LoopI = BuildMI(MBB, InsertI, DL,
	TII->get(ZeroData ? AArch64::STZGloop_wback
	: AArch64::STGloop_wback))
	.addDef(SizeReg)
	.addDef(BaseReg)
	.addImm(LoopSize)
	.addReg(BaseReg)
	.setMemRefs(CombinedMemRefs);
	if (FrameRegUpdate)
	LoopI->setFlags(FrameRegUpdateFlags);

	int64_t ExtraBaseRegUpdate =
	FrameRegUpdate ? (*FrameRegUpdate - FrameRegOffset.getFixed() - Size) : 0;
	if (LoopSize < Size) {
	assert(FrameRegUpdate);
	assert(Size - LoopSize == 16);
	// Tag 16 more bytes at BaseReg and update BaseReg.
	BuildMI(*MBB, InsertI, DL,
	TII->get(ZeroData ? AArch64::STZGPostIndex : AArch64::STGPostIndex))
	.addDef(BaseReg)
	.addReg(BaseReg)
	.addReg(BaseReg)
	.addImm(1 + ExtraBaseRegUpdate / 16)
	.setMemRefs(CombinedMemRefs)
	.setMIFlags(FrameRegUpdateFlags);
	} else if (ExtraBaseRegUpdate) {
	// Update BaseReg.
	BuildMI(
	*MBB, InsertI, DL,
	TII->get(ExtraBaseRegUpdate > 0 ? AArch64::ADDXri : AArch64::SUBXri))
	.addDef(BaseReg)
	.addReg(BaseReg)
	.addImm(std::abs(ExtraBaseRegUpdate))
	.addImm(0)
	.setMIFlags(FrameRegUpdateFlags);
	}
	}

	// Check if *II is a register update that can be merged into STGloop that ends
	// at (Reg + Size). RemainingOffset is the required adjustment to Reg after the
	// end of the loop.
	bool canMergeRegUpdate(MachineBasicBlock::iterator II, unsigned Reg,
	int64_t Size, int64_t *TotalOffset) {
	MachineInstr &MI = *II;
	if ((MI.getOpcode() == AArch64::ADDXri \|\|
	MI.getOpcode() == AArch64::SUBXri) &&
	MI.getOperand(0).getReg() == Reg && MI.getOperand(1).getReg() == Reg) {
	unsigned Shift = AArch64_AM::getShiftValue(MI.getOperand(3).getImm());
	int64_t Offset = MI.getOperand(2).getImm() << Shift;
	if (MI.getOpcode() == AArch64::SUBXri)
	Offset = -Offset;
	int64_t AbsPostOffset = std::abs(Offset - Size);
	const int64_t kMaxOffset =
	0xFFF; // Max encoding for unshifted ADDXri / SUBXri
	if (AbsPostOffset <= kMaxOffset && AbsPostOffset % 16 == 0) {
	*TotalOffset = Offset;
	return true;
	}
	}
	return false;
	}

	void mergeMemRefs(const SmallVectorImpl<TagStoreInstr> &TSE,
	SmallVectorImpl<MachineMemOperand *> &MemRefs) {
	MemRefs.clear();
	for (auto &TS : TSE) {
	MachineInstr *MI = TS.MI;
	// An instruction without memory operands may access anything. Be
	// conservative and return an empty list.
	if (MI->memoperands_empty()) {
	MemRefs.clear();
	return;
	}
	MemRefs.append(MI->memoperands_begin(), MI->memoperands_end());
	}
	}

	void TagStoreEdit::emitCode(MachineBasicBlock::iterator &InsertI,
	const AArch64FrameLowering *TFI,
	bool TryMergeSPUpdate) {
	if (TagStores.empty())
	return;
	TagStoreInstr &FirstTagStore = TagStores[0];
	TagStoreInstr &LastTagStore = TagStores[TagStores.size() - 1];
	Size = LastTagStore.Offset - FirstTagStore.Offset + LastTagStore.Size;
	DL = TagStores[0].MI->getDebugLoc();

	Register Reg;
	FrameRegOffset = TFI->resolveFrameOffsetReference(
	MF, FirstTagStore.Offset, false /isFixed/, false /isSVE*/, Reg,
	/PreferFP=/false, /ForSimm=/true);
	FrameReg = Reg;
	FrameRegUpdate = std::nullopt;

	mergeMemRefs(TagStores, CombinedMemRefs);

	LLVM_DEBUG({
	dbgs() << "Replacing adjacent STG instructions:\n";
	for (const auto &Instr : TagStores) {
	dbgs() << " " << *Instr.MI;
	}
	});

	// Size threshold where a loop becomes shorter than a linear sequence of
	// tagging instructions.
	const int kSetTagLoopThreshold = 176;
	if (Size < kSetTagLoopThreshold) {
	if (TagStores.size() < 2)
	return;
	emitUnrolled(InsertI);
	} else {
	MachineInstr *UpdateInstr = nullptr;
	int64_t TotalOffset = 0;
	if (TryMergeSPUpdate) {
	// See if we can merge base register update into the STGloop.
	// This is done in AArch64LoadStoreOptimizer for "normal" stores,
	// but STGloop is way too unusual for that, and also it only
	// realistically happens in function epilogue. Also, STGloop is expanded
	// before that pass.
	if (InsertI != MBB->end() &&
	canMergeRegUpdate(InsertI, FrameReg, FrameRegOffset.getFixed() + Size,
	&TotalOffset)) {
	UpdateInstr = &*InsertI++;
	LLVM_DEBUG(dbgs() << "Folding SP update into loop:\n "
	<< *UpdateInstr);
	}
	}

	if (!UpdateInstr && TagStores.size() < 2)
	return;

	if (UpdateInstr) {
	FrameRegUpdate = TotalOffset;
	FrameRegUpdateFlags = UpdateInstr->getFlags();
	}
	emitLoop(InsertI);
	if (UpdateInstr)
	UpdateInstr->eraseFromParent();
	}

	for (auto &TS : TagStores)
	TS.MI->eraseFromParent();
	}

	bool isMergeableStackTaggingInstruction(MachineInstr &MI, int64_t &Offset,
	int64_t &Size, bool &ZeroData) {
	MachineFunction &MF = *MI.getParent()->getParent();
	const MachineFrameInfo &MFI = MF.getFrameInfo();

	unsigned Opcode = MI.getOpcode();
	ZeroData = (Opcode == AArch64::STZGloop \|\| Opcode == AArch64::STZGi \|\|
	Opcode == AArch64::STZ2Gi);

	if (Opcode == AArch64::STGloop \|\| Opcode == AArch64::STZGloop) {
	if (!MI.getOperand(0).isDead() \|\| !MI.getOperand(1).isDead())
	return false;
	if (!MI.getOperand(2).isImm() \|\| !MI.getOperand(3).isFI())
	return false;
	Offset = MFI.getObjectOffset(MI.getOperand(3).getIndex());
	Size = MI.getOperand(2).getImm();
	return true;
	}

	if (Opcode == AArch64::STGi \|\| Opcode == AArch64::STZGi)
	Size = 16;
	else if (Opcode == AArch64::ST2Gi \|\| Opcode == AArch64::STZ2Gi)
	Size = 32;
	else
	return false;

	if (MI.getOperand(0).getReg() != AArch64::SP \|\| !MI.getOperand(1).isFI())
	return false;

	Offset = MFI.getObjectOffset(MI.getOperand(1).getIndex()) +
	16 * MI.getOperand(2).getImm();
	return true;
	}

	// Detect a run of memory tagging instructions for adjacent stack frame slots,
	// and replace them with a shorter instruction sequence:
	// * replace STG + STG with ST2G
	// * replace STGloop + STGloop with STGloop
	// This code needs to run when stack slot offsets are already known, but before
	// FrameIndex operands in STG instructions are eliminated.
	MachineBasicBlock::iterator tryMergeAdjacentSTG(MachineBasicBlock::iterator II,
	const AArch64FrameLowering *TFI,
	RegScavenger *RS) {
	bool FirstZeroData;
	int64_t Size, Offset;
	MachineInstr &MI = *II;
	MachineBasicBlock *MBB = MI.getParent();
	MachineBasicBlock::iterator NextI = ++II;
	if (&MI == &MBB->instr_back())
	return II;
	if (!isMergeableStackTaggingInstruction(MI, Offset, Size, FirstZeroData))
	return II;

	SmallVector<TagStoreInstr, 4> Instrs;
	Instrs.emplace_back(&MI, Offset, Size);

	constexpr int kScanLimit = 10;
	int Count = 0;
	for (MachineBasicBlock::iterator E = MBB->end();
	NextI != E && Count < kScanLimit; ++NextI) {
	MachineInstr &MI = *NextI;
	bool ZeroData;
	int64_t Size, Offset;
	// Collect instructions that update memory tags with a FrameIndex operand
	// and (when applicable) constant size, and whose output registers are dead
	// (the latter is almost always the case in practice). Since these
	// instructions effectively have no inputs or outputs, we are free to skip
	// any non-aliasing instructions in between without tracking used registers.
	if (isMergeableStackTaggingInstruction(MI, Offset, Size, ZeroData)) {
	if (ZeroData != FirstZeroData)
	break;
	Instrs.emplace_back(&MI, Offset, Size);
	continue;
	}

	// Only count non-transient, non-tagging instructions toward the scan
	// limit.
	if (!MI.isTransient())
	++Count;

	// Just in case, stop before the epilogue code starts.
	if (MI.getFlag(MachineInstr::FrameSetup) \|\|
	MI.getFlag(MachineInstr::FrameDestroy))
	break;

	// Reject anything that may alias the collected instructions.
	if (MI.mayLoadOrStore() \|\| MI.hasUnmodeledSideEffects())
	break;
	}

	// New code will be inserted after the last tagging instruction we've found.
	MachineBasicBlock::iterator InsertI = Instrs.back().MI;

	// All the gathered stack tag instructions are merged and placed after
	// last tag store in the list. The check should be made if the nzcv
	// flag is live at the point where we are trying to insert. Otherwise
	// the nzcv flag might get clobbered if any stg loops are present.

	// FIXME : This approach of bailing out from merge is conservative in
	// some ways like even if stg loops are not present after merge the
	// insert list, this liveness check is done (which is not needed).
	LivePhysRegs LiveRegs(*(MBB->getParent()->getSubtarget().getRegisterInfo()));
	LiveRegs.addLiveOuts(*MBB);
	for (auto I = MBB->rbegin();; ++I) {
	MachineInstr &MI = *I;
	if (MI == InsertI)
	break;
	LiveRegs.stepBackward(*I);
	}
	InsertI++;
	if (LiveRegs.contains(AArch64::NZCV))
	return InsertI;

	llvm::stable_sort(Instrs,
	[](const TagStoreInstr &Left, const TagStoreInstr &Right) {
	return Left.Offset < Right.Offset;
	});

	// Make sure that we don't have any overlapping stores.
	int64_t CurOffset = Instrs[0].Offset;
	for (auto &Instr : Instrs) {
	if (CurOffset > Instr.Offset)
	return NextI;
	CurOffset = Instr.Offset + Instr.Size;
	}

	// Find contiguous runs of tagged memory and emit shorter instruction
	// sequencies for them when possible.
	TagStoreEdit TSE(MBB, FirstZeroData);
	std::optional<int64_t> EndOffset;
	for (auto &Instr : Instrs) {
	if (EndOffset && *EndOffset != Instr.Offset) {
	// Found a gap.
	TSE.emitCode(InsertI, TFI, /TryMergeSPUpdate = / false);
	TSE.clear();
	}

	TSE.addInstruction(Instr);
	EndOffset = Instr.Offset + Instr.Size;
	}

	const MachineFunction *MF = MBB->getParent();
	// Multiple FP/SP updates in a loop cannot be described by CFI instructions.
	TSE.emitCode(
	InsertI, TFI, /TryMergeSPUpdate = /
	!MF->getInfo<AArch64FunctionInfo>()->needsAsyncDwarfUnwindInfo(*MF));

	return InsertI;
	}
	} // namespace

	MachineBasicBlock::iterator emitVGSaveRestore(MachineBasicBlock::iterator II,
	const AArch64FrameLowering *TFI) {
	MachineInstr &MI = *II;
	MachineBasicBlock *MBB = MI.getParent();
	MachineFunction *MF = MBB->getParent();

	if (MI.getOpcode() != AArch64::VGSavePseudo &&
	MI.getOpcode() != AArch64::VGRestorePseudo)
	return II;

	SMEAttrs FuncAttrs(MF->getFunction());
	bool LocallyStreaming =
	FuncAttrs.hasStreamingBody() && !FuncAttrs.hasStreamingInterface();
	const AArch64FunctionInfo *AFI = MF->getInfo<AArch64FunctionInfo>();
	const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
	const AArch64InstrInfo *TII =
	MF->getSubtarget<AArch64Subtarget>().getInstrInfo();

	int64_t VGFrameIdx =
	LocallyStreaming ? AFI->getStreamingVGIdx() : AFI->getVGIdx();
	assert(VGFrameIdx != std::numeric_limits<int>::max() &&
	"Expected FrameIdx for VG");

	unsigned CFIIndex;
	if (MI.getOpcode() == AArch64::VGSavePseudo) {
	const MachineFrameInfo &MFI = MF->getFrameInfo();
	int64_t Offset =
	MFI.getObjectOffset(VGFrameIdx) - TFI->getOffsetOfLocalArea();
	CFIIndex = MF->addFrameInst(MCCFIInstruction::createOffset(
	nullptr, TRI->getDwarfRegNum(AArch64::VG, true), Offset));
	} else
	CFIIndex = MF->addFrameInst(MCCFIInstruction::createRestore(
	nullptr, TRI->getDwarfRegNum(AArch64::VG, true)));

	MachineInstr UnwindInst = BuildMI(MBB, II, II->getDebugLoc(),
	TII->get(TargetOpcode::CFI_INSTRUCTION))
	.addCFIIndex(CFIIndex);

	MI.eraseFromParent();
	return UnwindInst->getIterator();
	}

	void AArch64FrameLowering::processFunctionBeforeFrameIndicesReplaced(
	MachineFunction &MF, RegScavenger *RS = nullptr) const {
	AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
	for (auto &BB : MF)
	for (MachineBasicBlock::iterator II = BB.begin(); II != BB.end();) {
	if (AFI->hasStreamingModeChanges())
	II = emitVGSaveRestore(II, this);
	if (StackTaggingMergeSetTag)
	II = tryMergeAdjacentSTG(II, this, RS);
	}
	}

	/// For Win64 AArch64 EH, the offset to the Unwind object is from the SP
	/// before the update. This is easily retrieved as it is exactly the offset
	/// that is set in processFunctionBeforeFrameFinalized.
	StackOffset AArch64FrameLowering::getFrameIndexReferencePreferSP(
	const MachineFunction &MF, int FI, Register &FrameReg,
	bool IgnoreSPUpdates) const {
	const MachineFrameInfo &MFI = MF.getFrameInfo();
	if (IgnoreSPUpdates) {
	LLVM_DEBUG(dbgs() << "Offset from the SP for " << FI << " is "
	<< MFI.getObjectOffset(FI) << "\n");
	FrameReg = AArch64::SP;
	return StackOffset::getFixed(MFI.getObjectOffset(FI));
	}

	// Go to common code if we cannot provide sp + offset.
	if (MFI.hasVarSizedObjects() \|\|
	MF.getInfo<AArch64FunctionInfo>()->getStackSizeSVE() \|\|
	MF.getSubtarget().getRegisterInfo()->hasStackRealignment(MF))
	return getFrameIndexReference(MF, FI, FrameReg);

	FrameReg = AArch64::SP;
	return getStackOffset(MF, MFI.getObjectOffset(FI));
	}

	/// The parent frame offset (aka dispFrame) is only used on X86_64 to retrieve
	/// the parent's frame pointer
	unsigned AArch64FrameLowering::getWinEHParentFrameOffset(
	const MachineFunction &MF) const {
	return 0;
	}

	/// Funclets only need to account for space for the callee saved registers,
	/// as the locals are accounted for in the parent's stack frame.
	unsigned AArch64FrameLowering::getWinEHFuncletFrameSize(
	const MachineFunction &MF) const {
	// This is the size of the pushed CSRs.
	unsigned CSSize =
	MF.getInfo<AArch64FunctionInfo>()->getCalleeSavedStackSize();
	// This is the amount of stack a funclet needs to allocate.
	return alignTo(CSSize + MF.getFrameInfo().getMaxCallFrameSize(),
	getStackAlign());
	}

	namespace {
	struct FrameObject {
	bool IsValid = false;
	// Index of the object in MFI.
	int ObjectIndex = 0;
	// Group ID this object belongs to.
	int GroupIndex = -1;
	// This object should be placed first (closest to SP).
	bool ObjectFirst = false;
	// This object's group (which always contains the object with
	// ObjectFirst==true) should be placed first.
	bool GroupFirst = false;

	// Used to distinguish between FP and GPR accesses. The values are decided so
	// that they sort FPR < Hazard < GPR and they can be or'd together.
	unsigned Accesses = 0;
	enum { AccessFPR = 1, AccessHazard = 2, AccessGPR = 4 };
	};

	class GroupBuilder {
	SmallVector<int, 8> CurrentMembers;
	int NextGroupIndex = 0;
	std::vector<FrameObject> &Objects;

	public:
	GroupBuilder(std::vector<FrameObject> &Objects) : Objects(Objects) {}
	void AddMember(int Index) { CurrentMembers.push_back(Index); }
	void EndCurrentGroup() {
	if (CurrentMembers.size() > 1) {
	// Create a new group with the current member list. This might remove them
	// from their pre-existing groups. That's OK, dealing with overlapping
	// groups is too hard and unlikely to make a difference.
	LLVM_DEBUG(dbgs() << "group:");
	for (int Index : CurrentMembers) {
	Objects[Index].GroupIndex = NextGroupIndex;
	LLVM_DEBUG(dbgs() << " " << Index);
	}
	LLVM_DEBUG(dbgs() << "\n");
	NextGroupIndex++;
	}
	CurrentMembers.clear();
	}
	};

	bool FrameObjectCompare(const FrameObject &A, const FrameObject &B) {
	// Objects at a lower index are closer to FP; objects at a higher index are
	// closer to SP.
	//
	// For consistency in our comparison, all invalid objects are placed
	// at the end. This also allows us to stop walking when we hit the
	// first invalid item after it's all sorted.
	//
	// If we want to include a stack hazard region, order FPR accesses < the
	// hazard object < GPRs accesses in order to create a separation between the
	// two. For the Accesses field 1 = FPR, 2 = Hazard Object, 4 = GPR.
	//
	// Otherwise the "first" object goes first (closest to SP), followed by the
	// members of the "first" group.
	//
	// The rest are sorted by the group index to keep the groups together.
	// Higher numbered groups are more likely to be around longer (i.e. untagged
	// in the function epilogue and not at some earlier point). Place them closer
	// to SP.
	//
	// If all else equal, sort by the object index to keep the objects in the
	// original order.
	return std::make_tuple(!A.IsValid, A.Accesses, A.ObjectFirst, A.GroupFirst,
	A.GroupIndex, A.ObjectIndex) <
	std::make_tuple(!B.IsValid, B.Accesses, B.ObjectFirst, B.GroupFirst,
	B.GroupIndex, B.ObjectIndex);
	}
	} // namespace

	void AArch64FrameLowering::orderFrameObjects(
	const MachineFunction &MF, SmallVectorImpl<int> &ObjectsToAllocate) const {
	if (!OrderFrameObjects \|\| ObjectsToAllocate.empty())
	return;

	const AArch64FunctionInfo &AFI = *MF.getInfo<AArch64FunctionInfo>();
	const MachineFrameInfo &MFI = MF.getFrameInfo();
	std::vector<FrameObject> FrameObjects(MFI.getObjectIndexEnd());
	for (auto &Obj : ObjectsToAllocate) {
	FrameObjects[Obj].IsValid = true;
	FrameObjects[Obj].ObjectIndex = Obj;
	}

	// Identify FPR vs GPR slots for hazards, and stack slots that are tagged at
	// the same time.
	GroupBuilder GB(FrameObjects);
	for (auto &MBB : MF) {
	for (auto &MI : MBB) {
	if (MI.isDebugInstr())
	continue;

	if (AFI.hasStackHazardSlotIndex()) {
	std::optional<int> FI = getLdStFrameID(MI, MFI);
	if (FI && FI >= 0 && FI < (int)FrameObjects.size()) {
	if (MFI.getStackID(*FI) == TargetStackID::ScalableVector \|\|
	AArch64InstrInfo::isFpOrNEON(MI))
	FrameObjects[*FI].Accesses \|= FrameObject::AccessFPR;
	else
	FrameObjects[*FI].Accesses \|= FrameObject::AccessGPR;
	}
	}

	int OpIndex;
	switch (MI.getOpcode()) {
	case AArch64::STGloop:
	case AArch64::STZGloop:
	OpIndex = 3;
	break;
	case AArch64::STGi:
	case AArch64::STZGi:
	case AArch64::ST2Gi:
	case AArch64::STZ2Gi:
	OpIndex = 1;
	break;
	default:
	OpIndex = -1;
	}

	int TaggedFI = -1;
	if (OpIndex >= 0) {
	const MachineOperand &MO = MI.getOperand(OpIndex);
	if (MO.isFI()) {
	int FI = MO.getIndex();
	if (FI >= 0 && FI < MFI.getObjectIndexEnd() &&
	FrameObjects[FI].IsValid)
	TaggedFI = FI;
	}
	}

	// If this is a stack tagging instruction for a slot that is not part of a
	// group yet, either start a new group or add it to the current one.
	if (TaggedFI >= 0)
	GB.AddMember(TaggedFI);
	else
	GB.EndCurrentGroup();
	}
	// Groups should never span multiple basic blocks.
	GB.EndCurrentGroup();
	}

	if (AFI.hasStackHazardSlotIndex()) {
	FrameObjects[AFI.getStackHazardSlotIndex()].Accesses =
	FrameObject::AccessHazard;
	// If a stack object is unknown or both GPR and FPR, sort it into GPR.
	for (auto &Obj : FrameObjects)
	if (!Obj.Accesses \|\|
	Obj.Accesses == (FrameObject::AccessGPR \| FrameObject::AccessFPR))
	Obj.Accesses = FrameObject::AccessGPR;
	}

	// If the function's tagged base pointer is pinned to a stack slot, we want to
	// put that slot first when possible. This will likely place it at SP + 0,
	// and save one instruction when generating the base pointer because IRG does
	// not allow an immediate offset.
	std::optional<int> TBPI = AFI.getTaggedBasePointerIndex();
	if (TBPI) {
	FrameObjects[*TBPI].ObjectFirst = true;
	FrameObjects[*TBPI].GroupFirst = true;
	int FirstGroupIndex = FrameObjects[*TBPI].GroupIndex;
	if (FirstGroupIndex >= 0)
	for (FrameObject &Object : FrameObjects)
	if (Object.GroupIndex == FirstGroupIndex)
	Object.GroupFirst = true;
	}

	llvm::stable_sort(FrameObjects, FrameObjectCompare);

	int i = 0;
	for (auto &Obj : FrameObjects) {
	// All invalid items are sorted at the end, so it's safe to stop.
	if (!Obj.IsValid)
	break;
	ObjectsToAllocate[i++] = Obj.ObjectIndex;
	}

	LLVM_DEBUG({
	dbgs() << "Final frame order:\n";
	for (auto &Obj : FrameObjects) {
	if (!Obj.IsValid)
	break;
	dbgs() << " " << Obj.ObjectIndex << ": group " << Obj.GroupIndex;
	if (Obj.ObjectFirst)
	dbgs() << ", first";
	if (Obj.GroupFirst)
	dbgs() << ", group-first";
	dbgs() << "\n";
	}
	});
	}

	/// Emit a loop to decrement SP until it is equal to TargetReg, with probes at
	/// least every ProbeSize bytes. Returns an iterator of the first instruction
	/// after the loop. The difference between SP and TargetReg must be an exact
	/// multiple of ProbeSize.
	MachineBasicBlock::iterator
	AArch64FrameLowering::inlineStackProbeLoopExactMultiple(
	MachineBasicBlock::iterator MBBI, int64_t ProbeSize,
	Register TargetReg) const {
	MachineBasicBlock &MBB = *MBBI->getParent();
	MachineFunction &MF = *MBB.getParent();
	const AArch64InstrInfo *TII =
	MF.getSubtarget<AArch64Subtarget>().getInstrInfo();
	DebugLoc DL = MBB.findDebugLoc(MBBI);

	MachineFunction::iterator MBBInsertPoint = std::next(MBB.getIterator());
	MachineBasicBlock *LoopMBB = MF.CreateMachineBasicBlock(MBB.getBasicBlock());
	MF.insert(MBBInsertPoint, LoopMBB);
	MachineBasicBlock *ExitMBB = MF.CreateMachineBasicBlock(MBB.getBasicBlock());
	MF.insert(MBBInsertPoint, ExitMBB);

	// SUB SP, SP, #ProbeSize (or equivalent if ProbeSize is not encodable
	// in SUB).
	emitFrameOffset(*LoopMBB, LoopMBB->end(), DL, AArch64::SP, AArch64::SP,
	StackOffset::getFixed(-ProbeSize), TII,
	MachineInstr::FrameSetup);
	// STR XZR, [SP]
	BuildMI(*LoopMBB, LoopMBB->end(), DL, TII->get(AArch64::STRXui))
	.addReg(AArch64::XZR)
	.addReg(AArch64::SP)
	.addImm(0)
	.setMIFlags(MachineInstr::FrameSetup);
	// CMP SP, TargetReg
	BuildMI(*LoopMBB, LoopMBB->end(), DL, TII->get(AArch64::SUBSXrx64),
	AArch64::XZR)
	.addReg(AArch64::SP)
	.addReg(TargetReg)
	.addImm(AArch64_AM::getArithExtendImm(AArch64_AM::UXTX, 0))
	.setMIFlags(MachineInstr::FrameSetup);
	// B.CC Loop
	BuildMI(*LoopMBB, LoopMBB->end(), DL, TII->get(AArch64::Bcc))
	.addImm(AArch64CC::NE)
	.addMBB(LoopMBB)
	.setMIFlags(MachineInstr::FrameSetup);

	LoopMBB->addSuccessor(ExitMBB);
	LoopMBB->addSuccessor(LoopMBB);
	// Synthesize the exit MBB.
	ExitMBB->splice(ExitMBB->end(), &MBB, MBBI, MBB.end());
	ExitMBB->transferSuccessorsAndUpdatePHIs(&MBB);
	MBB.addSuccessor(LoopMBB);
	// Update liveins.
	fullyRecomputeLiveIns({ExitMBB, LoopMBB});

	return ExitMBB->begin();
	}

	void AArch64FrameLowering::inlineStackProbeFixed(
	MachineBasicBlock::iterator MBBI, Register ScratchReg, int64_t FrameSize,
	StackOffset CFAOffset) const {
	MachineBasicBlock *MBB = MBBI->getParent();
	MachineFunction &MF = *MBB->getParent();
	const AArch64InstrInfo *TII =
	MF.getSubtarget<AArch64Subtarget>().getInstrInfo();
	AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
	bool EmitAsyncCFI = AFI->needsAsyncDwarfUnwindInfo(MF);
	bool HasFP = hasFP(MF);

	DebugLoc DL;
	int64_t ProbeSize = MF.getInfo<AArch64FunctionInfo>()->getStackProbeSize();
	int64_t NumBlocks = FrameSize / ProbeSize;
	int64_t ResidualSize = FrameSize % ProbeSize;

	LLVM_DEBUG(dbgs() << "Stack probing: total " << FrameSize << " bytes, "
	<< NumBlocks << " blocks of " << ProbeSize
	<< " bytes, plus " << ResidualSize << " bytes\n");

	// Decrement SP by NumBlock * ProbeSize bytes, with either unrolled or
	// ordinary loop.
	if (NumBlocks <= AArch64::StackProbeMaxLoopUnroll) {
	for (int i = 0; i < NumBlocks; ++i) {
	// SUB SP, SP, #ProbeSize (or equivalent if ProbeSize is not
	// encodable in a SUB).
	emitFrameOffset(*MBB, MBBI, DL, AArch64::SP, AArch64::SP,
	StackOffset::getFixed(-ProbeSize), TII,
	MachineInstr::FrameSetup, false, false, nullptr,
	EmitAsyncCFI && !HasFP, CFAOffset);
	CFAOffset += StackOffset::getFixed(ProbeSize);
	// STR XZR, [SP]
	BuildMI(*MBB, MBBI, DL, TII->get(AArch64::STRXui))
	.addReg(AArch64::XZR)
	.addReg(AArch64::SP)
	.addImm(0)
	.setMIFlags(MachineInstr::FrameSetup);
	}
	} else if (NumBlocks != 0) {
	// SUB ScratchReg, SP, #FrameSize (or equivalent if FrameSize is not
	// encodable in ADD). ScrathReg may temporarily become the CFA register.
	emitFrameOffset(*MBB, MBBI, DL, ScratchReg, AArch64::SP,
	StackOffset::getFixed(-ProbeSize * NumBlocks), TII,
	MachineInstr::FrameSetup, false, false, nullptr,
	EmitAsyncCFI && !HasFP, CFAOffset);
	CFAOffset += StackOffset::getFixed(ProbeSize * NumBlocks);
	MBBI = inlineStackProbeLoopExactMultiple(MBBI, ProbeSize, ScratchReg);
	MBB = MBBI->getParent();
	if (EmitAsyncCFI && !HasFP) {
	// Set the CFA register back to SP.
	const AArch64RegisterInfo &RegInfo =
	*MF.getSubtarget<AArch64Subtarget>().getRegisterInfo();
	unsigned Reg = RegInfo.getDwarfRegNum(AArch64::SP, true);
	unsigned CFIIndex =
	MF.addFrameInst(MCCFIInstruction::createDefCfaRegister(nullptr, Reg));
	BuildMI(*MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
	.addCFIIndex(CFIIndex)
	.setMIFlags(MachineInstr::FrameSetup);
	}
	}

	if (ResidualSize != 0) {
	// SUB SP, SP, #ResidualSize (or equivalent if ResidualSize is not encodable
	// in SUB).
	emitFrameOffset(*MBB, MBBI, DL, AArch64::SP, AArch64::SP,
	StackOffset::getFixed(-ResidualSize), TII,
	MachineInstr::FrameSetup, false, false, nullptr,
	EmitAsyncCFI && !HasFP, CFAOffset);
	if (ResidualSize > AArch64::StackProbeMaxUnprobedStack) {
	// STR XZR, [SP]
	BuildMI(*MBB, MBBI, DL, TII->get(AArch64::STRXui))
	.addReg(AArch64::XZR)
	.addReg(AArch64::SP)
	.addImm(0)
	.setMIFlags(MachineInstr::FrameSetup);
	}
	}
	}

	void AArch64FrameLowering::inlineStackProbe(MachineFunction &MF,
	MachineBasicBlock &MBB) const {
	// Get the instructions that need to be replaced. We emit at most two of
	// these. Remember them in order to avoid complications coming from the need
	// to traverse the block while potentially creating more blocks.
	SmallVector<MachineInstr *, 4> ToReplace;
	for (MachineInstr &MI : MBB)
	if (MI.getOpcode() == AArch64::PROBED_STACKALLOC \|\|
	MI.getOpcode() == AArch64::PROBED_STACKALLOC_VAR)
	ToReplace.push_back(&MI);

	for (MachineInstr *MI : ToReplace) {
	if (MI->getOpcode() == AArch64::PROBED_STACKALLOC) {
	Register ScratchReg = MI->getOperand(0).getReg();
	int64_t FrameSize = MI->getOperand(1).getImm();
	StackOffset CFAOffset = StackOffset::get(MI->getOperand(2).getImm(),
	MI->getOperand(3).getImm());
	inlineStackProbeFixed(MI->getIterator(), ScratchReg, FrameSize,
	CFAOffset);
	} else {
	assert(MI->getOpcode() == AArch64::PROBED_STACKALLOC_VAR &&
	"Stack probe pseudo-instruction expected");
	const AArch64InstrInfo *TII =
	MI->getMF()->getSubtarget<AArch64Subtarget>().getInstrInfo();
	Register TargetReg = MI->getOperand(0).getReg();
	(void)TII->probedStackAlloc(MI->getIterator(), TargetReg, true);
	}
	MI->eraseFromParent();
	}
	}

	struct StackAccess {
	enum AccessType {
	NotAccessed = 0, // Stack object not accessed by load/store instructions.
	GPR = 1 << 0, // A general purpose register.
	PPR = 1 << 1, // A predicate register.
	FPR = 1 << 2, // A floating point/Neon/SVE register.
	};

	int Idx;
	StackOffset Offset;
	int64_t Size;
	unsigned AccessTypes;

	StackAccess() : Idx(0), Offset(), Size(0), AccessTypes(NotAccessed) {}

	bool operator<(const StackAccess &Rhs) const {
	return std::make_tuple(start(), Idx) <
	std::make_tuple(Rhs.start(), Rhs.Idx);
	}

	bool isCPU() const {
	// Predicate register load and store instructions execute on the CPU.
	return AccessTypes & (AccessType::GPR \| AccessType::PPR);
	}
	bool isSME() const { return AccessTypes & AccessType::FPR; }
	bool isMixed() const { return isCPU() && isSME(); }

	int64_t start() const { return Offset.getFixed() + Offset.getScalable(); }
	int64_t end() const { return start() + Size; }

	std::string getTypeString() const {
	switch (AccessTypes) {
	case AccessType::FPR:
	return "FPR";
	case AccessType::PPR:
	return "PPR";
	case AccessType::GPR:
	return "GPR";
	case AccessType::NotAccessed:
	return "NA";
	default:
	return "Mixed";
	}
	}

	void print(raw_ostream &OS) const {
	OS << getTypeString() << " stack object at [SP"
	<< (Offset.getFixed() < 0 ? "" : "+") << Offset.getFixed();
	if (Offset.getScalable())
	OS << (Offset.getScalable() < 0 ? "" : "+") << Offset.getScalable()
	<< " * vscale";
	OS << "]";
	}
	};

	static inline raw_ostream &operator<<(raw_ostream &OS, const StackAccess &SA) {
	SA.print(OS);
	return OS;
	}

	void AArch64FrameLowering::emitRemarks(
	const MachineFunction &MF, MachineOptimizationRemarkEmitter *ORE) const {

	SMEAttrs Attrs(MF.getFunction());
	if (Attrs.hasNonStreamingInterfaceAndBody())
	return;

	const uint64_t HazardSize =
	(StackHazardSize) ? StackHazardSize : StackHazardRemarkSize;

	if (HazardSize == 0)
	return;

	const MachineFrameInfo &MFI = MF.getFrameInfo();
	// Bail if function has no stack objects.
	if (!MFI.hasStackObjects())
	return;

	std::vector<StackAccess> StackAccesses(MFI.getNumObjects());

	size_t NumFPLdSt = 0;
	size_t NumNonFPLdSt = 0;

	// Collect stack accesses via Load/Store instructions.
	for (const MachineBasicBlock &MBB : MF) {
	for (const MachineInstr &MI : MBB) {
	if (!MI.mayLoadOrStore() \|\| MI.getNumMemOperands() < 1)
	continue;
	for (MachineMemOperand *MMO : MI.memoperands()) {
	std::optional<int> FI = getMMOFrameID(MMO, MFI);
	if (FI && !MFI.isDeadObjectIndex(*FI)) {
	int FrameIdx = *FI;

	size_t ArrIdx = FrameIdx + MFI.getNumFixedObjects();
	if (StackAccesses[ArrIdx].AccessTypes == StackAccess::NotAccessed) {
	StackAccesses[ArrIdx].Idx = FrameIdx;
	StackAccesses[ArrIdx].Offset =
	getFrameIndexReferenceFromSP(MF, FrameIdx);
	StackAccesses[ArrIdx].Size = MFI.getObjectSize(FrameIdx);
	}

	unsigned RegTy = StackAccess::AccessType::GPR;
	if (MFI.getStackID(FrameIdx) == TargetStackID::ScalableVector) {
	if (AArch64::PPRRegClass.contains(MI.getOperand(0).getReg()))
	RegTy = StackAccess::PPR;
	else
	RegTy = StackAccess::FPR;
	} else if (AArch64InstrInfo::isFpOrNEON(MI)) {
	RegTy = StackAccess::FPR;
	}

	StackAccesses[ArrIdx].AccessTypes \|= RegTy;

	if (RegTy == StackAccess::FPR)
	++NumFPLdSt;
	else
	++NumNonFPLdSt;
	}
	}
	}
	}

	if (NumFPLdSt == 0 \|\| NumNonFPLdSt == 0)
	return;

	llvm::sort(StackAccesses);
	StackAccesses.erase(llvm::remove_if(StackAccesses,
	[](const StackAccess &S) {
	return S.AccessTypes ==
	StackAccess::NotAccessed;
	}),
	StackAccesses.end());

	SmallVector<const StackAccess *> MixedObjects;
	SmallVector<std::pair<const StackAccess , const StackAccess >> HazardPairs;

	if (StackAccesses.front().isMixed())
	MixedObjects.push_back(&StackAccesses.front());

	for (auto It = StackAccesses.begin(), End = std::prev(StackAccesses.end());
	It != End; ++It) {
	const auto &First = *It;
	const auto &Second = *(It + 1);

	if (Second.isMixed())
	MixedObjects.push_back(&Second);

	if ((First.isSME() && Second.isCPU()) \|\|
	(First.isCPU() && Second.isSME())) {
	uint64_t Distance = static_cast<uint64_t>(Second.start() - First.end());
	if (Distance < HazardSize)
	HazardPairs.emplace_back(&First, &Second);
	}
	}

	auto EmitRemark = [&](llvm::StringRef Str) {
	ORE->emit([&]() {
	auto R = MachineOptimizationRemarkAnalysis(
	"sme", "StackHazard", MF.getFunction().getSubprogram(), &MF.front());
	return R << formatv("stack hazard in '{0}': ", MF.getName()).str() << Str;
	});
	};

	for (const auto &P : HazardPairs)
	EmitRemark(formatv("{0} is too close to {1}", P.first, P.second).str());

	for (const auto *Obj : MixedObjects)
	EmitRemark(
	formatv("{0} accessed by both GP and FP instructions", *Obj).str());
	}
	diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
	index 377bcd5868fb..805684ef69a5 100644
	--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
	+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
	@@ -1,10039 +1,10035 @@
	//===- AArch64InstrInfo.cpp - AArch64 Instruction Information -------------===//
	//
	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
	// See https://llvm.org/LICENSE.txt for license information.
	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
	//
	//===----------------------------------------------------------------------===//
	//
	// This file contains the AArch64 implementation of the TargetInstrInfo class.
	//
	//===----------------------------------------------------------------------===//

	#include "AArch64InstrInfo.h"
	#include "AArch64ExpandImm.h"
	#include "AArch64FrameLowering.h"
	#include "AArch64MachineFunctionInfo.h"
	#include "AArch64PointerAuth.h"
	#include "AArch64Subtarget.h"
	#include "MCTargetDesc/AArch64AddressingModes.h"
	#include "MCTargetDesc/AArch64MCTargetDesc.h"
	#include "Utils/AArch64BaseInfo.h"
	#include "llvm/ADT/ArrayRef.h"
	#include "llvm/ADT/STLExtras.h"
	#include "llvm/ADT/SmallVector.h"
	#include "llvm/CodeGen/LivePhysRegs.h"
	#include "llvm/CodeGen/MachineBasicBlock.h"
	#include "llvm/CodeGen/MachineCombinerPattern.h"
	#include "llvm/CodeGen/MachineFrameInfo.h"
	#include "llvm/CodeGen/MachineFunction.h"
	#include "llvm/CodeGen/MachineInstr.h"
	#include "llvm/CodeGen/MachineInstrBuilder.h"
	#include "llvm/CodeGen/MachineMemOperand.h"
	#include "llvm/CodeGen/MachineModuleInfo.h"
	#include "llvm/CodeGen/MachineOperand.h"
	#include "llvm/CodeGen/MachineRegisterInfo.h"
	#include "llvm/CodeGen/RegisterScavenging.h"
	#include "llvm/CodeGen/StackMaps.h"
	#include "llvm/CodeGen/TargetRegisterInfo.h"
	#include "llvm/CodeGen/TargetSubtargetInfo.h"
	#include "llvm/IR/DebugInfoMetadata.h"
	#include "llvm/IR/DebugLoc.h"
	#include "llvm/IR/GlobalValue.h"
	#include "llvm/IR/Module.h"
	#include "llvm/MC/MCAsmInfo.h"
	#include "llvm/MC/MCInst.h"
	#include "llvm/MC/MCInstBuilder.h"
	#include "llvm/MC/MCInstrDesc.h"
	#include "llvm/Support/Casting.h"
	#include "llvm/Support/CodeGen.h"
	#include "llvm/Support/CommandLine.h"
	#include "llvm/Support/ErrorHandling.h"
	#include "llvm/Support/LEB128.h"
	#include "llvm/Support/MathExtras.h"
	#include "llvm/Target/TargetMachine.h"
	#include "llvm/Target/TargetOptions.h"
	#include <cassert>
	#include <cstdint>
	#include <iterator>
	#include <utility>

	using namespace llvm;

	#define GET_INSTRINFO_CTOR_DTOR
	#include "AArch64GenInstrInfo.inc"

	static cl::opt<unsigned> TBZDisplacementBits(
	"aarch64-tbz-offset-bits", cl::Hidden, cl::init(14),
	cl::desc("Restrict range of TB[N]Z instructions (DEBUG)"));

	static cl::opt<unsigned> CBZDisplacementBits(
	"aarch64-cbz-offset-bits", cl::Hidden, cl::init(19),
	cl::desc("Restrict range of CB[N]Z instructions (DEBUG)"));

	static cl::opt<unsigned>
	BCCDisplacementBits("aarch64-bcc-offset-bits", cl::Hidden, cl::init(19),
	cl::desc("Restrict range of Bcc instructions (DEBUG)"));

	static cl::opt<unsigned>
	BDisplacementBits("aarch64-b-offset-bits", cl::Hidden, cl::init(26),
	cl::desc("Restrict range of B instructions (DEBUG)"));

	AArch64InstrInfo::AArch64InstrInfo(const AArch64Subtarget &STI)
	: AArch64GenInstrInfo(AArch64::ADJCALLSTACKDOWN, AArch64::ADJCALLSTACKUP,
	AArch64::CATCHRET),
	RI(STI.getTargetTriple()), Subtarget(STI) {}

	/// GetInstSize - Return the number of bytes of code the specified
	/// instruction may be. This returns the maximum number of bytes.
	unsigned AArch64InstrInfo::getInstSizeInBytes(const MachineInstr &MI) const {
	const MachineBasicBlock &MBB = *MI.getParent();
	const MachineFunction *MF = MBB.getParent();
	const Function &F = MF->getFunction();
	const MCAsmInfo *MAI = MF->getTarget().getMCAsmInfo();

	{
	auto Op = MI.getOpcode();
	if (Op == AArch64::INLINEASM \|\| Op == AArch64::INLINEASM_BR)
	return getInlineAsmLength(MI.getOperand(0).getSymbolName(), *MAI);
	}

	// Meta-instructions emit no code.
	if (MI.isMetaInstruction())
	return 0;

	// FIXME: We currently only handle pseudoinstructions that don't get expanded
	// before the assembly printer.
	unsigned NumBytes = 0;
	const MCInstrDesc &Desc = MI.getDesc();

	// Size should be preferably set in
	// llvm/lib/Target/AArch64/AArch64InstrInfo.td (default case).
	// Specific cases handle instructions of variable sizes
	switch (Desc.getOpcode()) {
	default:
	if (Desc.getSize())
	return Desc.getSize();

	// Anything not explicitly designated otherwise (i.e. pseudo-instructions
	// with fixed constant size but not specified in .td file) is a normal
	// 4-byte insn.
	NumBytes = 4;
	break;
	case TargetOpcode::STACKMAP:
	// The upper bound for a stackmap intrinsic is the full length of its shadow
	NumBytes = StackMapOpers(&MI).getNumPatchBytes();
	assert(NumBytes % 4 == 0 && "Invalid number of NOP bytes requested!");
	break;
	case TargetOpcode::PATCHPOINT:
	// The size of the patchpoint intrinsic is the number of bytes requested
	NumBytes = PatchPointOpers(&MI).getNumPatchBytes();
	assert(NumBytes % 4 == 0 && "Invalid number of NOP bytes requested!");
	break;
	case TargetOpcode::STATEPOINT:
	NumBytes = StatepointOpers(&MI).getNumPatchBytes();
	assert(NumBytes % 4 == 0 && "Invalid number of NOP bytes requested!");
	// No patch bytes means a normal call inst is emitted
	if (NumBytes == 0)
	NumBytes = 4;
	break;
	case TargetOpcode::PATCHABLE_FUNCTION_ENTER:
	// If `patchable-function-entry` is set, PATCHABLE_FUNCTION_ENTER
	// instructions are expanded to the specified number of NOPs. Otherwise,
	// they are expanded to 36-byte XRay sleds.
	NumBytes =
	F.getFnAttributeAsParsedInteger("patchable-function-entry", 9) * 4;
	break;
	case TargetOpcode::PATCHABLE_FUNCTION_EXIT:
	case TargetOpcode::PATCHABLE_TYPED_EVENT_CALL:
	// An XRay sled can be 4 bytes of alignment plus a 32-byte block.
	NumBytes = 36;
	break;
	case TargetOpcode::PATCHABLE_EVENT_CALL:
	// EVENT_CALL XRay sleds are exactly 6 instructions long (no alignment).
	NumBytes = 24;
	break;

	case AArch64::SPACE:
	NumBytes = MI.getOperand(1).getImm();
	break;
	case TargetOpcode::BUNDLE:
	NumBytes = getInstBundleLength(MI);
	break;
	}

	return NumBytes;
	}

	unsigned AArch64InstrInfo::getInstBundleLength(const MachineInstr &MI) const {
	unsigned Size = 0;
	MachineBasicBlock::const_instr_iterator I = MI.getIterator();
	MachineBasicBlock::const_instr_iterator E = MI.getParent()->instr_end();
	while (++I != E && I->isInsideBundle()) {
	assert(!I->isBundle() && "No nested bundle!");
	Size += getInstSizeInBytes(*I);
	}
	return Size;
	}

	static void parseCondBranch(MachineInstr LastInst, MachineBasicBlock &Target,
	SmallVectorImpl<MachineOperand> &Cond) {
	// Block ends with fall-through condbranch.
	switch (LastInst->getOpcode()) {
	default:
	llvm_unreachable("Unknown branch instruction?");
	case AArch64::Bcc:
	Target = LastInst->getOperand(1).getMBB();
	Cond.push_back(LastInst->getOperand(0));
	break;
	case AArch64::CBZW:
	case AArch64::CBZX:
	case AArch64::CBNZW:
	case AArch64::CBNZX:
	Target = LastInst->getOperand(1).getMBB();
	Cond.push_back(MachineOperand::CreateImm(-1));
	Cond.push_back(MachineOperand::CreateImm(LastInst->getOpcode()));
	Cond.push_back(LastInst->getOperand(0));
	break;
	case AArch64::TBZW:
	case AArch64::TBZX:
	case AArch64::TBNZW:
	case AArch64::TBNZX:
	Target = LastInst->getOperand(2).getMBB();
	Cond.push_back(MachineOperand::CreateImm(-1));
	Cond.push_back(MachineOperand::CreateImm(LastInst->getOpcode()));
	Cond.push_back(LastInst->getOperand(0));
	Cond.push_back(LastInst->getOperand(1));
	}
	}

	static unsigned getBranchDisplacementBits(unsigned Opc) {
	switch (Opc) {
	default:
	llvm_unreachable("unexpected opcode!");
	case AArch64::B:
	return BDisplacementBits;
	case AArch64::TBNZW:
	case AArch64::TBZW:
	case AArch64::TBNZX:
	case AArch64::TBZX:
	return TBZDisplacementBits;
	case AArch64::CBNZW:
	case AArch64::CBZW:
	case AArch64::CBNZX:
	case AArch64::CBZX:
	return CBZDisplacementBits;
	case AArch64::Bcc:
	return BCCDisplacementBits;
	}
	}

	bool AArch64InstrInfo::isBranchOffsetInRange(unsigned BranchOp,
	int64_t BrOffset) const {
	unsigned Bits = getBranchDisplacementBits(BranchOp);
	assert(Bits >= 3 && "max branch displacement must be enough to jump"
	"over conditional branch expansion");
	return isIntN(Bits, BrOffset / 4);
	}

	MachineBasicBlock *
	AArch64InstrInfo::getBranchDestBlock(const MachineInstr &MI) const {
	switch (MI.getOpcode()) {
	default:
	llvm_unreachable("unexpected opcode!");
	case AArch64::B:
	return MI.getOperand(0).getMBB();
	case AArch64::TBZW:
	case AArch64::TBNZW:
	case AArch64::TBZX:
	case AArch64::TBNZX:
	return MI.getOperand(2).getMBB();
	case AArch64::CBZW:
	case AArch64::CBNZW:
	case AArch64::CBZX:
	case AArch64::CBNZX:
	case AArch64::Bcc:
	return MI.getOperand(1).getMBB();
	}
	}

	void AArch64InstrInfo::insertIndirectBranch(MachineBasicBlock &MBB,
	MachineBasicBlock &NewDestBB,
	MachineBasicBlock &RestoreBB,
	const DebugLoc &DL,
	int64_t BrOffset,
	RegScavenger *RS) const {
	assert(RS && "RegScavenger required for long branching");
	assert(MBB.empty() &&
	"new block should be inserted for expanding unconditional branch");
	assert(MBB.pred_size() == 1);
	assert(RestoreBB.empty() &&
	"restore block should be inserted for restoring clobbered registers");

	auto buildIndirectBranch = [&](Register Reg, MachineBasicBlock &DestBB) {
	// Offsets outside of the signed 33-bit range are not supported for ADRP +
	// ADD.
	if (!isInt<33>(BrOffset))
	report_fatal_error(
	"Branch offsets outside of the signed 33-bit range not supported");

	BuildMI(MBB, MBB.end(), DL, get(AArch64::ADRP), Reg)
	.addSym(DestBB.getSymbol(), AArch64II::MO_PAGE);
	BuildMI(MBB, MBB.end(), DL, get(AArch64::ADDXri), Reg)
	.addReg(Reg)
	.addSym(DestBB.getSymbol(), AArch64II::MO_PAGEOFF \| AArch64II::MO_NC)
	.addImm(0);
	BuildMI(MBB, MBB.end(), DL, get(AArch64::BR)).addReg(Reg);
	};

	RS->enterBasicBlockEnd(MBB);
	// If X16 is unused, we can rely on the linker to insert a range extension
	// thunk if NewDestBB is out of range of a single B instruction.
	constexpr Register Reg = AArch64::X16;
	if (!RS->isRegUsed(Reg)) {
	insertUnconditionalBranch(MBB, &NewDestBB, DL);
	RS->setRegUsed(Reg);
	return;
	}

	// If there's a free register and it's worth inflating the code size,
	// manually insert the indirect branch.
	Register Scavenged = RS->FindUnusedReg(&AArch64::GPR64RegClass);
	if (Scavenged != AArch64::NoRegister &&
	MBB.getSectionID() == MBBSectionID::ColdSectionID) {
	buildIndirectBranch(Scavenged, NewDestBB);
	RS->setRegUsed(Scavenged);
	return;
	}

	// Note: Spilling X16 briefly moves the stack pointer, making it incompatible
	// with red zones.
	AArch64FunctionInfo *AFI = MBB.getParent()->getInfo<AArch64FunctionInfo>();
	if (!AFI \|\| AFI->hasRedZone().value_or(true))
	report_fatal_error(
	"Unable to insert indirect branch inside function that has red zone");

	// Otherwise, spill X16 and defer range extension to the linker.
	BuildMI(MBB, MBB.end(), DL, get(AArch64::STRXpre))
	.addReg(AArch64::SP, RegState::Define)
	.addReg(Reg)
	.addReg(AArch64::SP)
	.addImm(-16);

	BuildMI(MBB, MBB.end(), DL, get(AArch64::B)).addMBB(&RestoreBB);

	BuildMI(RestoreBB, RestoreBB.end(), DL, get(AArch64::LDRXpost))
	.addReg(AArch64::SP, RegState::Define)
	.addReg(Reg, RegState::Define)
	.addReg(AArch64::SP)
	.addImm(16);
	}

	// Branch analysis.
	bool AArch64InstrInfo::analyzeBranch(MachineBasicBlock &MBB,
	MachineBasicBlock *&TBB,
	MachineBasicBlock *&FBB,
	SmallVectorImpl<MachineOperand> &Cond,
	bool AllowModify) const {
	// If the block has no terminators, it just falls into the block after it.
	MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr();
	if (I == MBB.end())
	return false;

	// Skip over SpeculationBarrierEndBB terminators
	if (I->getOpcode() == AArch64::SpeculationBarrierISBDSBEndBB \|\|
	I->getOpcode() == AArch64::SpeculationBarrierSBEndBB) {
	--I;
	}

	if (!isUnpredicatedTerminator(*I))
	return false;

	// Get the last instruction in the block.
	MachineInstr LastInst = &I;

	// If there is only one terminator instruction, process it.
	unsigned LastOpc = LastInst->getOpcode();
	if (I == MBB.begin() \|\| !isUnpredicatedTerminator(*--I)) {
	if (isUncondBranchOpcode(LastOpc)) {
	TBB = LastInst->getOperand(0).getMBB();
	return false;
	}
	if (isCondBranchOpcode(LastOpc)) {
	// Block ends with fall-through condbranch.
	parseCondBranch(LastInst, TBB, Cond);
	return false;
	}
	return true; // Can't handle indirect branch.
	}

	// Get the instruction before it if it is a terminator.
	MachineInstr SecondLastInst = &I;
	unsigned SecondLastOpc = SecondLastInst->getOpcode();

	// If AllowModify is true and the block ends with two or more unconditional
	// branches, delete all but the first unconditional branch.
	if (AllowModify && isUncondBranchOpcode(LastOpc)) {
	while (isUncondBranchOpcode(SecondLastOpc)) {
	LastInst->eraseFromParent();
	LastInst = SecondLastInst;
	LastOpc = LastInst->getOpcode();
	if (I == MBB.begin() \|\| !isUnpredicatedTerminator(*--I)) {
	// Return now the only terminator is an unconditional branch.
	TBB = LastInst->getOperand(0).getMBB();
	return false;
	}
	SecondLastInst = &*I;
	SecondLastOpc = SecondLastInst->getOpcode();
	}
	}

	// If we're allowed to modify and the block ends in a unconditional branch
	// which could simply fallthrough, remove the branch. (Note: This case only
	// matters when we can't understand the whole sequence, otherwise it's also
	// handled by BranchFolding.cpp.)
	if (AllowModify && isUncondBranchOpcode(LastOpc) &&
	MBB.isLayoutSuccessor(getBranchDestBlock(*LastInst))) {
	LastInst->eraseFromParent();
	LastInst = SecondLastInst;
	LastOpc = LastInst->getOpcode();
	if (I == MBB.begin() \|\| !isUnpredicatedTerminator(*--I)) {
	assert(!isUncondBranchOpcode(LastOpc) &&
	"unreachable unconditional branches removed above");

	if (isCondBranchOpcode(LastOpc)) {
	// Block ends with fall-through condbranch.
	parseCondBranch(LastInst, TBB, Cond);
	return false;
	}
	return true; // Can't handle indirect branch.
	}
	SecondLastInst = &*I;
	SecondLastOpc = SecondLastInst->getOpcode();
	}

	// If there are three terminators, we don't know what sort of block this is.
	if (SecondLastInst && I != MBB.begin() && isUnpredicatedTerminator(*--I))
	return true;

	// If the block ends with a B and a Bcc, handle it.
	if (isCondBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) {
	parseCondBranch(SecondLastInst, TBB, Cond);
	FBB = LastInst->getOperand(0).getMBB();
	return false;
	}

	// If the block ends with two unconditional branches, handle it. The second
	// one is not executed, so remove it.
	if (isUncondBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) {
	TBB = SecondLastInst->getOperand(0).getMBB();
	I = LastInst;
	if (AllowModify)
	I->eraseFromParent();
	return false;
	}

	// ...likewise if it ends with an indirect branch followed by an unconditional
	// branch.
	if (isIndirectBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) {
	I = LastInst;
	if (AllowModify)
	I->eraseFromParent();
	return true;
	}

	// Otherwise, can't handle this.
	return true;
	}

	bool AArch64InstrInfo::analyzeBranchPredicate(MachineBasicBlock &MBB,
	MachineBranchPredicate &MBP,
	bool AllowModify) const {
	// For the moment, handle only a block which ends with a cb(n)zx followed by
	// a fallthrough. Why this? Because it is a common form.
	// TODO: Should we handle b.cc?

	MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr();
	if (I == MBB.end())
	return true;

	// Skip over SpeculationBarrierEndBB terminators
	if (I->getOpcode() == AArch64::SpeculationBarrierISBDSBEndBB \|\|
	I->getOpcode() == AArch64::SpeculationBarrierSBEndBB) {
	--I;
	}

	if (!isUnpredicatedTerminator(*I))
	return true;

	// Get the last instruction in the block.
	MachineInstr LastInst = &I;
	unsigned LastOpc = LastInst->getOpcode();
	if (!isCondBranchOpcode(LastOpc))
	return true;

	switch (LastOpc) {
	default:
	return true;
	case AArch64::CBZW:
	case AArch64::CBZX:
	case AArch64::CBNZW:
	case AArch64::CBNZX:
	break;
	};

	MBP.TrueDest = LastInst->getOperand(1).getMBB();
	assert(MBP.TrueDest && "expected!");
	MBP.FalseDest = MBB.getNextNode();

	MBP.ConditionDef = nullptr;
	MBP.SingleUseCondition = false;

	MBP.LHS = LastInst->getOperand(0);
	MBP.RHS = MachineOperand::CreateImm(0);
	MBP.Predicate = LastOpc == AArch64::CBNZX ? MachineBranchPredicate::PRED_NE
	: MachineBranchPredicate::PRED_EQ;
	return false;
	}

	bool AArch64InstrInfo::reverseBranchCondition(
	SmallVectorImpl<MachineOperand> &Cond) const {
	if (Cond[0].getImm() != -1) {
	// Regular Bcc
	AArch64CC::CondCode CC = (AArch64CC::CondCode)(int)Cond[0].getImm();
	Cond[0].setImm(AArch64CC::getInvertedCondCode(CC));
	} else {
	// Folded compare-and-branch
	switch (Cond[1].getImm()) {
	default:
	llvm_unreachable("Unknown conditional branch!");
	case AArch64::CBZW:
	Cond[1].setImm(AArch64::CBNZW);
	break;
	case AArch64::CBNZW:
	Cond[1].setImm(AArch64::CBZW);
	break;
	case AArch64::CBZX:
	Cond[1].setImm(AArch64::CBNZX);
	break;
	case AArch64::CBNZX:
	Cond[1].setImm(AArch64::CBZX);
	break;
	case AArch64::TBZW:
	Cond[1].setImm(AArch64::TBNZW);
	break;
	case AArch64::TBNZW:
	Cond[1].setImm(AArch64::TBZW);
	break;
	case AArch64::TBZX:
	Cond[1].setImm(AArch64::TBNZX);
	break;
	case AArch64::TBNZX:
	Cond[1].setImm(AArch64::TBZX);
	break;
	}
	}

	return false;
	}

	unsigned AArch64InstrInfo::removeBranch(MachineBasicBlock &MBB,
	int *BytesRemoved) const {
	MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr();
	if (I == MBB.end())
	return 0;

	if (!isUncondBranchOpcode(I->getOpcode()) &&
	!isCondBranchOpcode(I->getOpcode()))
	return 0;

	// Remove the branch.
	I->eraseFromParent();

	I = MBB.end();

	if (I == MBB.begin()) {
	if (BytesRemoved)
	*BytesRemoved = 4;
	return 1;
	}
	--I;
	if (!isCondBranchOpcode(I->getOpcode())) {
	if (BytesRemoved)
	*BytesRemoved = 4;
	return 1;
	}

	// Remove the branch.
	I->eraseFromParent();
	if (BytesRemoved)
	*BytesRemoved = 8;

	return 2;
	}

	void AArch64InstrInfo::instantiateCondBranch(
	MachineBasicBlock &MBB, const DebugLoc &DL, MachineBasicBlock *TBB,
	ArrayRef<MachineOperand> Cond) const {
	if (Cond[0].getImm() != -1) {
	// Regular Bcc
	BuildMI(&MBB, DL, get(AArch64::Bcc)).addImm(Cond[0].getImm()).addMBB(TBB);
	} else {
	// Folded compare-and-branch
	// Note that we use addOperand instead of addReg to keep the flags.
	const MachineInstrBuilder MIB =
	BuildMI(&MBB, DL, get(Cond[1].getImm())).add(Cond[2]);
	if (Cond.size() > 3)
	MIB.addImm(Cond[3].getImm());
	MIB.addMBB(TBB);
	}
	}

	unsigned AArch64InstrInfo::insertBranch(
	MachineBasicBlock &MBB, MachineBasicBlock TBB, MachineBasicBlock FBB,
	ArrayRef<MachineOperand> Cond, const DebugLoc &DL, int *BytesAdded) const {
	// Shouldn't be a fall through.
	assert(TBB && "insertBranch must not be told to insert a fallthrough");

	if (!FBB) {
	if (Cond.empty()) // Unconditional branch?
	BuildMI(&MBB, DL, get(AArch64::B)).addMBB(TBB);
	else
	instantiateCondBranch(MBB, DL, TBB, Cond);

	if (BytesAdded)
	*BytesAdded = 4;

	return 1;
	}

	// Two-way conditional branch.
	instantiateCondBranch(MBB, DL, TBB, Cond);
	BuildMI(&MBB, DL, get(AArch64::B)).addMBB(FBB);

	if (BytesAdded)
	*BytesAdded = 8;

	return 2;
	}

	// Find the original register that VReg is copied from.
	static unsigned removeCopies(const MachineRegisterInfo &MRI, unsigned VReg) {
	while (Register::isVirtualRegister(VReg)) {
	const MachineInstr *DefMI = MRI.getVRegDef(VReg);
	if (!DefMI->isFullCopy())
	return VReg;
	VReg = DefMI->getOperand(1).getReg();
	}
	return VReg;
	}

	// Determine if VReg is defined by an instruction that can be folded into a
	// csel instruction. If so, return the folded opcode, and the replacement
	// register.
	static unsigned canFoldIntoCSel(const MachineRegisterInfo &MRI, unsigned VReg,
	unsigned *NewVReg = nullptr) {
	VReg = removeCopies(MRI, VReg);
	if (!Register::isVirtualRegister(VReg))
	return 0;

	bool Is64Bit = AArch64::GPR64allRegClass.hasSubClassEq(MRI.getRegClass(VReg));
	const MachineInstr *DefMI = MRI.getVRegDef(VReg);
	unsigned Opc = 0;
	unsigned SrcOpNum = 0;
	switch (DefMI->getOpcode()) {
	case AArch64::ADDSXri:
	case AArch64::ADDSWri:
	// if NZCV is used, do not fold.
	if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, /TRI=/nullptr,
	true) == -1)
	return 0;
	// fall-through to ADDXri and ADDWri.
	[[fallthrough]];
	case AArch64::ADDXri:
	case AArch64::ADDWri:
	// add x, 1 -> csinc.
	if (!DefMI->getOperand(2).isImm() \|\| DefMI->getOperand(2).getImm() != 1 \|\|
	DefMI->getOperand(3).getImm() != 0)
	return 0;
	SrcOpNum = 1;
	Opc = Is64Bit ? AArch64::CSINCXr : AArch64::CSINCWr;
	break;

	case AArch64::ORNXrr:
	case AArch64::ORNWrr: {
	// not x -> csinv, represented as orn dst, xzr, src.
	unsigned ZReg = removeCopies(MRI, DefMI->getOperand(1).getReg());
	if (ZReg != AArch64::XZR && ZReg != AArch64::WZR)
	return 0;
	SrcOpNum = 2;
	Opc = Is64Bit ? AArch64::CSINVXr : AArch64::CSINVWr;
	break;
	}

	case AArch64::SUBSXrr:
	case AArch64::SUBSWrr:
	// if NZCV is used, do not fold.
	if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, /TRI=/nullptr,
	true) == -1)
	return 0;
	// fall-through to SUBXrr and SUBWrr.
	[[fallthrough]];
	case AArch64::SUBXrr:
	case AArch64::SUBWrr: {
	// neg x -> csneg, represented as sub dst, xzr, src.
	unsigned ZReg = removeCopies(MRI, DefMI->getOperand(1).getReg());
	if (ZReg != AArch64::XZR && ZReg != AArch64::WZR)
	return 0;
	SrcOpNum = 2;
	Opc = Is64Bit ? AArch64::CSNEGXr : AArch64::CSNEGWr;
	break;
	}
	default:
	return 0;
	}
	assert(Opc && SrcOpNum && "Missing parameters");

	if (NewVReg)
	*NewVReg = DefMI->getOperand(SrcOpNum).getReg();
	return Opc;
	}

	bool AArch64InstrInfo::canInsertSelect(const MachineBasicBlock &MBB,
	ArrayRef<MachineOperand> Cond,
	Register DstReg, Register TrueReg,
	Register FalseReg, int &CondCycles,
	int &TrueCycles,
	int &FalseCycles) const {
	// Check register classes.
	const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
	const TargetRegisterClass *RC =
	RI.getCommonSubClass(MRI.getRegClass(TrueReg), MRI.getRegClass(FalseReg));
	if (!RC)
	return false;

	// Also need to check the dest regclass, in case we're trying to optimize
	// something like:
	// %1(gpr) = PHI %2(fpr), bb1, %(fpr), bb2
	if (!RI.getCommonSubClass(RC, MRI.getRegClass(DstReg)))
	return false;

	// Expanding cbz/tbz requires an extra cycle of latency on the condition.
	unsigned ExtraCondLat = Cond.size() != 1;

	// GPRs are handled by csel.
	// FIXME: Fold in x+1, -x, and ~x when applicable.
	if (AArch64::GPR64allRegClass.hasSubClassEq(RC) \|\|
	AArch64::GPR32allRegClass.hasSubClassEq(RC)) {
	// Single-cycle csel, csinc, csinv, and csneg.
	CondCycles = 1 + ExtraCondLat;
	TrueCycles = FalseCycles = 1;
	if (canFoldIntoCSel(MRI, TrueReg))
	TrueCycles = 0;
	else if (canFoldIntoCSel(MRI, FalseReg))
	FalseCycles = 0;
	return true;
	}

	// Scalar floating point is handled by fcsel.
	// FIXME: Form fabs, fmin, and fmax when applicable.
	if (AArch64::FPR64RegClass.hasSubClassEq(RC) \|\|
	AArch64::FPR32RegClass.hasSubClassEq(RC)) {
	CondCycles = 5 + ExtraCondLat;
	TrueCycles = FalseCycles = 2;
	return true;
	}

	// Can't do vectors.
	return false;
	}

	void AArch64InstrInfo::insertSelect(MachineBasicBlock &MBB,
	MachineBasicBlock::iterator I,
	const DebugLoc &DL, Register DstReg,
	ArrayRef<MachineOperand> Cond,
	Register TrueReg, Register FalseReg) const {
	MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();

	// Parse the condition code, see parseCondBranch() above.
	AArch64CC::CondCode CC;
	switch (Cond.size()) {
	default:
	llvm_unreachable("Unknown condition opcode in Cond");
	case 1: // b.cc
	CC = AArch64CC::CondCode(Cond[0].getImm());
	break;
	case 3: { // cbz/cbnz
	// We must insert a compare against 0.
	bool Is64Bit;
	switch (Cond[1].getImm()) {
	default:
	llvm_unreachable("Unknown branch opcode in Cond");
	case AArch64::CBZW:
	Is64Bit = false;
	CC = AArch64CC::EQ;
	break;
	case AArch64::CBZX:
	Is64Bit = true;
	CC = AArch64CC::EQ;
	break;
	case AArch64::CBNZW:
	Is64Bit = false;
	CC = AArch64CC::NE;
	break;
	case AArch64::CBNZX:
	Is64Bit = true;
	CC = AArch64CC::NE;
	break;
	}
	Register SrcReg = Cond[2].getReg();
	if (Is64Bit) {
	// cmp reg, #0 is actually subs xzr, reg, #0.
	MRI.constrainRegClass(SrcReg, &AArch64::GPR64spRegClass);
	BuildMI(MBB, I, DL, get(AArch64::SUBSXri), AArch64::XZR)
	.addReg(SrcReg)
	.addImm(0)
	.addImm(0);
	} else {
	MRI.constrainRegClass(SrcReg, &AArch64::GPR32spRegClass);
	BuildMI(MBB, I, DL, get(AArch64::SUBSWri), AArch64::WZR)
	.addReg(SrcReg)
	.addImm(0)
	.addImm(0);
	}
	break;
	}
	case 4: { // tbz/tbnz
	// We must insert a tst instruction.
	switch (Cond[1].getImm()) {
	default:
	llvm_unreachable("Unknown branch opcode in Cond");
	case AArch64::TBZW:
	case AArch64::TBZX:
	CC = AArch64CC::EQ;
	break;
	case AArch64::TBNZW:
	case AArch64::TBNZX:
	CC = AArch64CC::NE;
	break;
	}
	// cmp reg, #foo is actually ands xzr, reg, #1<<foo.
	if (Cond[1].getImm() == AArch64::TBZW \|\| Cond[1].getImm() == AArch64::TBNZW)
	BuildMI(MBB, I, DL, get(AArch64::ANDSWri), AArch64::WZR)
	.addReg(Cond[2].getReg())
	.addImm(
	AArch64_AM::encodeLogicalImmediate(1ull << Cond[3].getImm(), 32));
	else
	BuildMI(MBB, I, DL, get(AArch64::ANDSXri), AArch64::XZR)
	.addReg(Cond[2].getReg())
	.addImm(
	AArch64_AM::encodeLogicalImmediate(1ull << Cond[3].getImm(), 64));
	break;
	}
	}

	unsigned Opc = 0;
	const TargetRegisterClass *RC = nullptr;
	bool TryFold = false;
	if (MRI.constrainRegClass(DstReg, &AArch64::GPR64RegClass)) {
	RC = &AArch64::GPR64RegClass;
	Opc = AArch64::CSELXr;
	TryFold = true;
	} else if (MRI.constrainRegClass(DstReg, &AArch64::GPR32RegClass)) {
	RC = &AArch64::GPR32RegClass;
	Opc = AArch64::CSELWr;
	TryFold = true;
	} else if (MRI.constrainRegClass(DstReg, &AArch64::FPR64RegClass)) {
	RC = &AArch64::FPR64RegClass;
	Opc = AArch64::FCSELDrrr;
	} else if (MRI.constrainRegClass(DstReg, &AArch64::FPR32RegClass)) {
	RC = &AArch64::FPR32RegClass;
	Opc = AArch64::FCSELSrrr;
	}
	assert(RC && "Unsupported regclass");

	// Try folding simple instructions into the csel.
	if (TryFold) {
	unsigned NewVReg = 0;
	unsigned FoldedOpc = canFoldIntoCSel(MRI, TrueReg, &NewVReg);
	if (FoldedOpc) {
	// The folded opcodes csinc, csinc and csneg apply the operation to
	// FalseReg, so we need to invert the condition.
	CC = AArch64CC::getInvertedCondCode(CC);
	TrueReg = FalseReg;
	} else
	FoldedOpc = canFoldIntoCSel(MRI, FalseReg, &NewVReg);

	// Fold the operation. Leave any dead instructions for DCE to clean up.
	if (FoldedOpc) {
	FalseReg = NewVReg;
	Opc = FoldedOpc;
	// The extends the live range of NewVReg.
	MRI.clearKillFlags(NewVReg);
	}
	}

	// Pull all virtual register into the appropriate class.
	MRI.constrainRegClass(TrueReg, RC);
	MRI.constrainRegClass(FalseReg, RC);

	// Insert the csel.
	BuildMI(MBB, I, DL, get(Opc), DstReg)
	.addReg(TrueReg)
	.addReg(FalseReg)
	.addImm(CC);
	}

	// Return true if Imm can be loaded into a register by a "cheap" sequence of
	// instructions. For now, "cheap" means at most two instructions.
	static bool isCheapImmediate(const MachineInstr &MI, unsigned BitSize) {
	if (BitSize == 32)
	return true;

	assert(BitSize == 64 && "Only bit sizes of 32 or 64 allowed");
	uint64_t Imm = static_cast<uint64_t>(MI.getOperand(1).getImm());
	SmallVector<AArch64_IMM::ImmInsnModel, 4> Is;
	AArch64_IMM::expandMOVImm(Imm, BitSize, Is);

	return Is.size() <= 2;
	}

	// FIXME: this implementation should be micro-architecture dependent, so a
	// micro-architecture target hook should be introduced here in future.
	bool AArch64InstrInfo::isAsCheapAsAMove(const MachineInstr &MI) const {
	if (Subtarget.hasExynosCheapAsMoveHandling()) {
	if (isExynosCheapAsMove(MI))
	return true;
	return MI.isAsCheapAsAMove();
	}

	switch (MI.getOpcode()) {
	default:
	return MI.isAsCheapAsAMove();

	case AArch64::ADDWrs:
	case AArch64::ADDXrs:
	case AArch64::SUBWrs:
	case AArch64::SUBXrs:
	return Subtarget.hasALULSLFast() && MI.getOperand(3).getImm() <= 4;

	// If MOVi32imm or MOVi64imm can be expanded into ORRWri or
	// ORRXri, it is as cheap as MOV.
	// Likewise if it can be expanded to MOVZ/MOVN/MOVK.
	case AArch64::MOVi32imm:
	return isCheapImmediate(MI, 32);
	case AArch64::MOVi64imm:
	return isCheapImmediate(MI, 64);
	}
	}

	bool AArch64InstrInfo::isFalkorShiftExtFast(const MachineInstr &MI) {
	switch (MI.getOpcode()) {
	default:
	return false;

	case AArch64::ADDWrs:
	case AArch64::ADDXrs:
	case AArch64::ADDSWrs:
	case AArch64::ADDSXrs: {
	unsigned Imm = MI.getOperand(3).getImm();
	unsigned ShiftVal = AArch64_AM::getShiftValue(Imm);
	if (ShiftVal == 0)
	return true;
	return AArch64_AM::getShiftType(Imm) == AArch64_AM::LSL && ShiftVal <= 5;
	}

	case AArch64::ADDWrx:
	case AArch64::ADDXrx:
	case AArch64::ADDXrx64:
	case AArch64::ADDSWrx:
	case AArch64::ADDSXrx:
	case AArch64::ADDSXrx64: {
	unsigned Imm = MI.getOperand(3).getImm();
	switch (AArch64_AM::getArithExtendType(Imm)) {
	default:
	return false;
	case AArch64_AM::UXTB:
	case AArch64_AM::UXTH:
	case AArch64_AM::UXTW:
	case AArch64_AM::UXTX:
	return AArch64_AM::getArithShiftValue(Imm) <= 4;
	}
	}

	case AArch64::SUBWrs:
	case AArch64::SUBSWrs: {
	unsigned Imm = MI.getOperand(3).getImm();
	unsigned ShiftVal = AArch64_AM::getShiftValue(Imm);
	return ShiftVal == 0 \|\|
	(AArch64_AM::getShiftType(Imm) == AArch64_AM::ASR && ShiftVal == 31);
	}

	case AArch64::SUBXrs:
	case AArch64::SUBSXrs: {
	unsigned Imm = MI.getOperand(3).getImm();
	unsigned ShiftVal = AArch64_AM::getShiftValue(Imm);
	return ShiftVal == 0 \|\|
	(AArch64_AM::getShiftType(Imm) == AArch64_AM::ASR && ShiftVal == 63);
	}

	case AArch64::SUBWrx:
	case AArch64::SUBXrx:
	case AArch64::SUBXrx64:
	case AArch64::SUBSWrx:
	case AArch64::SUBSXrx:
	case AArch64::SUBSXrx64: {
	unsigned Imm = MI.getOperand(3).getImm();
	switch (AArch64_AM::getArithExtendType(Imm)) {
	default:
	return false;
	case AArch64_AM::UXTB:
	case AArch64_AM::UXTH:
	case AArch64_AM::UXTW:
	case AArch64_AM::UXTX:
	return AArch64_AM::getArithShiftValue(Imm) == 0;
	}
	}

	case AArch64::LDRBBroW:
	case AArch64::LDRBBroX:
	case AArch64::LDRBroW:
	case AArch64::LDRBroX:
	case AArch64::LDRDroW:
	case AArch64::LDRDroX:
	case AArch64::LDRHHroW:
	case AArch64::LDRHHroX:
	case AArch64::LDRHroW:
	case AArch64::LDRHroX:
	case AArch64::LDRQroW:
	case AArch64::LDRQroX:
	case AArch64::LDRSBWroW:
	case AArch64::LDRSBWroX:
	case AArch64::LDRSBXroW:
	case AArch64::LDRSBXroX:
	case AArch64::LDRSHWroW:
	case AArch64::LDRSHWroX:
	case AArch64::LDRSHXroW:
	case AArch64::LDRSHXroX:
	case AArch64::LDRSWroW:
	case AArch64::LDRSWroX:
	case AArch64::LDRSroW:
	case AArch64::LDRSroX:
	case AArch64::LDRWroW:
	case AArch64::LDRWroX:
	case AArch64::LDRXroW:
	case AArch64::LDRXroX:
	case AArch64::PRFMroW:
	case AArch64::PRFMroX:
	case AArch64::STRBBroW:
	case AArch64::STRBBroX:
	case AArch64::STRBroW:
	case AArch64::STRBroX:
	case AArch64::STRDroW:
	case AArch64::STRDroX:
	case AArch64::STRHHroW:
	case AArch64::STRHHroX:
	case AArch64::STRHroW:
	case AArch64::STRHroX:
	case AArch64::STRQroW:
	case AArch64::STRQroX:
	case AArch64::STRSroW:
	case AArch64::STRSroX:
	case AArch64::STRWroW:
	case AArch64::STRWroX:
	case AArch64::STRXroW:
	case AArch64::STRXroX: {
	unsigned IsSigned = MI.getOperand(3).getImm();
	return !IsSigned;
	}
	}
	}

	bool AArch64InstrInfo::isSEHInstruction(const MachineInstr &MI) {
	unsigned Opc = MI.getOpcode();
	switch (Opc) {
	default:
	return false;
	case AArch64::SEH_StackAlloc:
	case AArch64::SEH_SaveFPLR:
	case AArch64::SEH_SaveFPLR_X:
	case AArch64::SEH_SaveReg:
	case AArch64::SEH_SaveReg_X:
	case AArch64::SEH_SaveRegP:
	case AArch64::SEH_SaveRegP_X:
	case AArch64::SEH_SaveFReg:
	case AArch64::SEH_SaveFReg_X:
	case AArch64::SEH_SaveFRegP:
	case AArch64::SEH_SaveFRegP_X:
	case AArch64::SEH_SetFP:
	case AArch64::SEH_AddFP:
	case AArch64::SEH_Nop:
	case AArch64::SEH_PrologEnd:
	case AArch64::SEH_EpilogStart:
	case AArch64::SEH_EpilogEnd:
	case AArch64::SEH_PACSignLR:
	case AArch64::SEH_SaveAnyRegQP:
	case AArch64::SEH_SaveAnyRegQPX:
	return true;
	}
	}

	bool AArch64InstrInfo::isCoalescableExtInstr(const MachineInstr &MI,
	Register &SrcReg, Register &DstReg,
	unsigned &SubIdx) const {
	switch (MI.getOpcode()) {
	default:
	return false;
	case AArch64::SBFMXri: // aka sxtw
	case AArch64::UBFMXri: // aka uxtw
	// Check for the 32 -> 64 bit extension case, these instructions can do
	// much more.
	if (MI.getOperand(2).getImm() != 0 \|\| MI.getOperand(3).getImm() != 31)
	return false;
	// This is a signed or unsigned 32 -> 64 bit extension.
	SrcReg = MI.getOperand(1).getReg();
	DstReg = MI.getOperand(0).getReg();
	SubIdx = AArch64::sub_32;
	return true;
	}
	}

	bool AArch64InstrInfo::areMemAccessesTriviallyDisjoint(
	const MachineInstr &MIa, const MachineInstr &MIb) const {
	const TargetRegisterInfo *TRI = &getRegisterInfo();
	const MachineOperand BaseOpA = nullptr, BaseOpB = nullptr;
	int64_t OffsetA = 0, OffsetB = 0;
	TypeSize WidthA(0, false), WidthB(0, false);
	bool OffsetAIsScalable = false, OffsetBIsScalable = false;

	assert(MIa.mayLoadOrStore() && "MIa must be a load or store.");
	assert(MIb.mayLoadOrStore() && "MIb must be a load or store.");

	if (MIa.hasUnmodeledSideEffects() \|\| MIb.hasUnmodeledSideEffects() \|\|
	MIa.hasOrderedMemoryRef() \|\| MIb.hasOrderedMemoryRef())
	return false;

	// Retrieve the base, offset from the base and width. Width
	// is the size of memory that is being loaded/stored (e.g. 1, 2, 4, 8). If
	// base are identical, and the offset of a lower memory access +
	// the width doesn't overlap the offset of a higher memory access,
	// then the memory accesses are different.
	// If OffsetAIsScalable and OffsetBIsScalable are both true, they
	// are assumed to have the same scale (vscale).
	if (getMemOperandWithOffsetWidth(MIa, BaseOpA, OffsetA, OffsetAIsScalable,
	WidthA, TRI) &&
	getMemOperandWithOffsetWidth(MIb, BaseOpB, OffsetB, OffsetBIsScalable,
	WidthB, TRI)) {
	if (BaseOpA->isIdenticalTo(*BaseOpB) &&
	OffsetAIsScalable == OffsetBIsScalable) {
	int LowOffset = OffsetA < OffsetB ? OffsetA : OffsetB;
	int HighOffset = OffsetA < OffsetB ? OffsetB : OffsetA;
	TypeSize LowWidth = (LowOffset == OffsetA) ? WidthA : WidthB;
	if (LowWidth.isScalable() == OffsetAIsScalable &&
	LowOffset + (int)LowWidth.getKnownMinValue() <= HighOffset)
	return true;
	}
	}
	return false;
	}

	bool AArch64InstrInfo::isSchedulingBoundary(const MachineInstr &MI,
	const MachineBasicBlock *MBB,
	const MachineFunction &MF) const {
	if (TargetInstrInfo::isSchedulingBoundary(MI, MBB, MF))
	return true;

	// Do not move an instruction that can be recognized as a branch target.
	if (hasBTISemantics(MI))
	return true;

	switch (MI.getOpcode()) {
	case AArch64::HINT:
	// CSDB hints are scheduling barriers.
	if (MI.getOperand(0).getImm() == 0x14)
	return true;
	break;
	case AArch64::DSB:
	case AArch64::ISB:
	// DSB and ISB also are scheduling barriers.
	return true;
	case AArch64::MSRpstatesvcrImm1:
	// SMSTART and SMSTOP are also scheduling barriers.
	return true;
	default:;
	}
	if (isSEHInstruction(MI))
	return true;
	auto Next = std::next(MI.getIterator());
	return Next != MBB->end() && Next->isCFIInstruction();
	}

	/// analyzeCompare - For a comparison instruction, return the source registers
	/// in SrcReg and SrcReg2, and the value it compares against in CmpValue.
	/// Return true if the comparison instruction can be analyzed.
	bool AArch64InstrInfo::analyzeCompare(const MachineInstr &MI, Register &SrcReg,
	Register &SrcReg2, int64_t &CmpMask,
	int64_t &CmpValue) const {
	// The first operand can be a frame index where we'd normally expect a
	// register.
	assert(MI.getNumOperands() >= 2 && "All AArch64 cmps should have 2 operands");
	if (!MI.getOperand(1).isReg())
	return false;

	switch (MI.getOpcode()) {
	default:
	break;
	case AArch64::PTEST_PP:
	case AArch64::PTEST_PP_ANY:
	SrcReg = MI.getOperand(0).getReg();
	SrcReg2 = MI.getOperand(1).getReg();
	// Not sure about the mask and value for now...
	CmpMask = ~0;
	CmpValue = 0;
	return true;
	case AArch64::SUBSWrr:
	case AArch64::SUBSWrs:
	case AArch64::SUBSWrx:
	case AArch64::SUBSXrr:
	case AArch64::SUBSXrs:
	case AArch64::SUBSXrx:
	case AArch64::ADDSWrr:
	case AArch64::ADDSWrs:
	case AArch64::ADDSWrx:
	case AArch64::ADDSXrr:
	case AArch64::ADDSXrs:
	case AArch64::ADDSXrx:
	// Replace SUBSWrr with SUBWrr if NZCV is not used.
	SrcReg = MI.getOperand(1).getReg();
	SrcReg2 = MI.getOperand(2).getReg();
	CmpMask = ~0;
	CmpValue = 0;
	return true;
	case AArch64::SUBSWri:
	case AArch64::ADDSWri:
	case AArch64::SUBSXri:
	case AArch64::ADDSXri:
	SrcReg = MI.getOperand(1).getReg();
	SrcReg2 = 0;
	CmpMask = ~0;
	CmpValue = MI.getOperand(2).getImm();
	return true;
	case AArch64::ANDSWri:
	case AArch64::ANDSXri:
	// ANDS does not use the same encoding scheme as the others xxxS
	// instructions.
	SrcReg = MI.getOperand(1).getReg();
	SrcReg2 = 0;
	CmpMask = ~0;
	CmpValue = AArch64_AM::decodeLogicalImmediate(
	MI.getOperand(2).getImm(),
	MI.getOpcode() == AArch64::ANDSWri ? 32 : 64);
	return true;
	}

	return false;
	}

	static bool UpdateOperandRegClass(MachineInstr &Instr) {
	MachineBasicBlock *MBB = Instr.getParent();
	assert(MBB && "Can't get MachineBasicBlock here");
	MachineFunction *MF = MBB->getParent();
	assert(MF && "Can't get MachineFunction here");
	const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
	const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
	MachineRegisterInfo *MRI = &MF->getRegInfo();

	for (unsigned OpIdx = 0, EndIdx = Instr.getNumOperands(); OpIdx < EndIdx;
	++OpIdx) {
	MachineOperand &MO = Instr.getOperand(OpIdx);
	const TargetRegisterClass *OpRegCstraints =
	Instr.getRegClassConstraint(OpIdx, TII, TRI);

	// If there's no constraint, there's nothing to do.
	if (!OpRegCstraints)
	continue;
	// If the operand is a frame index, there's nothing to do here.
	// A frame index operand will resolve correctly during PEI.
	if (MO.isFI())
	continue;

	assert(MO.isReg() &&
	"Operand has register constraints without being a register!");

	Register Reg = MO.getReg();
	if (Reg.isPhysical()) {
	if (!OpRegCstraints->contains(Reg))
	return false;
	} else if (!OpRegCstraints->hasSubClassEq(MRI->getRegClass(Reg)) &&
	!MRI->constrainRegClass(Reg, OpRegCstraints))
	return false;
	}

	return true;
	}

	/// Return the opcode that does not set flags when possible - otherwise
	/// return the original opcode. The caller is responsible to do the actual
	/// substitution and legality checking.
	static unsigned convertToNonFlagSettingOpc(const MachineInstr &MI) {
	// Don't convert all compare instructions, because for some the zero register
	// encoding becomes the sp register.
	bool MIDefinesZeroReg = false;
	if (MI.definesRegister(AArch64::WZR, /TRI=/nullptr) \|\|
	MI.definesRegister(AArch64::XZR, /TRI=/nullptr))
	MIDefinesZeroReg = true;

	switch (MI.getOpcode()) {
	default:
	return MI.getOpcode();
	case AArch64::ADDSWrr:
	return AArch64::ADDWrr;
	case AArch64::ADDSWri:
	return MIDefinesZeroReg ? AArch64::ADDSWri : AArch64::ADDWri;
	case AArch64::ADDSWrs:
	return MIDefinesZeroReg ? AArch64::ADDSWrs : AArch64::ADDWrs;
	case AArch64::ADDSWrx:
	return AArch64::ADDWrx;
	case AArch64::ADDSXrr:
	return AArch64::ADDXrr;
	case AArch64::ADDSXri:
	return MIDefinesZeroReg ? AArch64::ADDSXri : AArch64::ADDXri;
	case AArch64::ADDSXrs:
	return MIDefinesZeroReg ? AArch64::ADDSXrs : AArch64::ADDXrs;
	case AArch64::ADDSXrx:
	return AArch64::ADDXrx;
	case AArch64::SUBSWrr:
	return AArch64::SUBWrr;
	case AArch64::SUBSWri:
	return MIDefinesZeroReg ? AArch64::SUBSWri : AArch64::SUBWri;
	case AArch64::SUBSWrs:
	return MIDefinesZeroReg ? AArch64::SUBSWrs : AArch64::SUBWrs;
	case AArch64::SUBSWrx:
	return AArch64::SUBWrx;
	case AArch64::SUBSXrr:
	return AArch64::SUBXrr;
	case AArch64::SUBSXri:
	return MIDefinesZeroReg ? AArch64::SUBSXri : AArch64::SUBXri;
	case AArch64::SUBSXrs:
	return MIDefinesZeroReg ? AArch64::SUBSXrs : AArch64::SUBXrs;
	case AArch64::SUBSXrx:
	return AArch64::SUBXrx;
	}
	}

	enum AccessKind { AK_Write = 0x01, AK_Read = 0x10, AK_All = 0x11 };

	/// True when condition flags are accessed (either by writing or reading)
	/// on the instruction trace starting at From and ending at To.
	///
	/// Note: If From and To are from different blocks it's assumed CC are accessed
	/// on the path.
	static bool areCFlagsAccessedBetweenInstrs(
	MachineBasicBlock::iterator From, MachineBasicBlock::iterator To,
	const TargetRegisterInfo *TRI, const AccessKind AccessToCheck = AK_All) {
	// Early exit if To is at the beginning of the BB.
	if (To == To->getParent()->begin())
	return true;

	// Check whether the instructions are in the same basic block
	// If not, assume the condition flags might get modified somewhere.
	if (To->getParent() != From->getParent())
	return true;

	// From must be above To.
	assert(std::any_of(
	++To.getReverse(), To->getParent()->rend(),
	[From](MachineInstr &MI) { return MI.getIterator() == From; }));

	// We iterate backward starting at \p To until we hit \p From.
	for (const MachineInstr &Instr :
	instructionsWithoutDebug(++To.getReverse(), From.getReverse())) {
	if (((AccessToCheck & AK_Write) &&
	Instr.modifiesRegister(AArch64::NZCV, TRI)) \|\|
	((AccessToCheck & AK_Read) && Instr.readsRegister(AArch64::NZCV, TRI)))
	return true;
	}
	return false;
	}

	std::optional<unsigned>
	AArch64InstrInfo::canRemovePTestInstr(MachineInstr PTest, MachineInstr Mask,
	MachineInstr *Pred,
	const MachineRegisterInfo *MRI) const {
	unsigned MaskOpcode = Mask->getOpcode();
	unsigned PredOpcode = Pred->getOpcode();
	bool PredIsPTestLike = isPTestLikeOpcode(PredOpcode);
	bool PredIsWhileLike = isWhileOpcode(PredOpcode);

	if (PredIsWhileLike) {
	// For PTEST(PG, PG), PTEST is redundant when PG is the result of a WHILEcc
	// instruction and the condition is "any" since WHILcc does an implicit
	// PTEST(ALL, PG) check and PG is always a subset of ALL.
	if ((Mask == Pred) && PTest->getOpcode() == AArch64::PTEST_PP_ANY)
	return PredOpcode;

	// For PTEST(PTRUE_ALL, WHILE), if the element size matches, the PTEST is
	// redundant since WHILE performs an implicit PTEST with an all active
	// mask.
	if (isPTrueOpcode(MaskOpcode) && Mask->getOperand(1).getImm() == 31 &&
	getElementSizeForOpcode(MaskOpcode) ==
	getElementSizeForOpcode(PredOpcode))
	return PredOpcode;

	return {};
	}

	if (PredIsPTestLike) {
	// For PTEST(PG, PG), PTEST is redundant when PG is the result of an
	// instruction that sets the flags as PTEST would and the condition is
	// "any" since PG is always a subset of the governing predicate of the
	// ptest-like instruction.
	if ((Mask == Pred) && PTest->getOpcode() == AArch64::PTEST_PP_ANY)
	return PredOpcode;

	// For PTEST(PTRUE_ALL, PTEST_LIKE), the PTEST is redundant if the
	// the element size matches and either the PTEST_LIKE instruction uses
	// the same all active mask or the condition is "any".
	if (isPTrueOpcode(MaskOpcode) && Mask->getOperand(1).getImm() == 31 &&
	getElementSizeForOpcode(MaskOpcode) ==
	getElementSizeForOpcode(PredOpcode)) {
	auto PTestLikeMask = MRI->getUniqueVRegDef(Pred->getOperand(1).getReg());
	if (Mask == PTestLikeMask \|\| PTest->getOpcode() == AArch64::PTEST_PP_ANY)
	return PredOpcode;
	}

	// For PTEST(PG, PTEST_LIKE(PG, ...)), the PTEST is redundant since the
	// flags are set based on the same mask 'PG', but PTEST_LIKE must operate
	// on 8-bit predicates like the PTEST. Otherwise, for instructions like
	// compare that also support 16/32/64-bit predicates, the implicit PTEST
	// performed by the compare could consider fewer lanes for these element
	// sizes.
	//
	// For example, consider
	//
	// ptrue p0.b ; P0=1111-1111-1111-1111
	// index z0.s, #0, #1 ; Z0=<0,1,2,3>
	// index z1.s, #1, #1 ; Z1=<1,2,3,4>
	// cmphi p1.s, p0/z, z1.s, z0.s ; P1=0001-0001-0001-0001
	// ; ^ last active
	// ptest p0, p1.b ; P1=0001-0001-0001-0001
	// ; ^ last active
	//
	// where the compare generates a canonical all active 32-bit predicate
	// (equivalent to 'ptrue p1.s, all'). The implicit PTEST sets the last
	// active flag, whereas the PTEST instruction with the same mask doesn't.
	// For PTEST_ANY this doesn't apply as the flags in this case would be
	// identical regardless of element size.
	auto PTestLikeMask = MRI->getUniqueVRegDef(Pred->getOperand(1).getReg());
	uint64_t PredElementSize = getElementSizeForOpcode(PredOpcode);
	if (Mask == PTestLikeMask && (PredElementSize == AArch64::ElementSizeB \|\|
	PTest->getOpcode() == AArch64::PTEST_PP_ANY))
	return PredOpcode;

	return {};
	}

	// If OP in PTEST(PG, OP(PG, ...)) has a flag-setting variant change the
	// opcode so the PTEST becomes redundant.
	switch (PredOpcode) {
	case AArch64::AND_PPzPP:
	case AArch64::BIC_PPzPP:
	case AArch64::EOR_PPzPP:
	case AArch64::NAND_PPzPP:
	case AArch64::NOR_PPzPP:
	case AArch64::ORN_PPzPP:
	case AArch64::ORR_PPzPP:
	case AArch64::BRKA_PPzP:
	case AArch64::BRKPA_PPzPP:
	case AArch64::BRKB_PPzP:
	case AArch64::BRKPB_PPzPP:
	case AArch64::RDFFR_PPz: {
	// Check to see if our mask is the same. If not the resulting flag bits
	// may be different and we can't remove the ptest.
	auto *PredMask = MRI->getUniqueVRegDef(Pred->getOperand(1).getReg());
	if (Mask != PredMask)
	return {};
	break;
	}
	case AArch64::BRKN_PPzP: {
	// BRKN uses an all active implicit mask to set flags unlike the other
	// flag-setting instructions.
	// PTEST(PTRUE_B(31), BRKN(PG, A, B)) -> BRKNS(PG, A, B).
	if ((MaskOpcode != AArch64::PTRUE_B) \|\|
	(Mask->getOperand(1).getImm() != 31))
	return {};
	break;
	}
	case AArch64::PTRUE_B:
	// PTEST(OP=PTRUE_B(A), OP) -> PTRUES_B(A)
	break;
	default:
	// Bail out if we don't recognize the input
	return {};
	}

	return convertToFlagSettingOpc(PredOpcode);
	}

	/// optimizePTestInstr - Attempt to remove a ptest of a predicate-generating
	/// operation which could set the flags in an identical manner
	bool AArch64InstrInfo::optimizePTestInstr(
	MachineInstr *PTest, unsigned MaskReg, unsigned PredReg,
	const MachineRegisterInfo *MRI) const {
	auto *Mask = MRI->getUniqueVRegDef(MaskReg);
	auto *Pred = MRI->getUniqueVRegDef(PredReg);
	unsigned PredOpcode = Pred->getOpcode();
	auto NewOp = canRemovePTestInstr(PTest, Mask, Pred, MRI);
	if (!NewOp)
	return false;

	const TargetRegisterInfo *TRI = &getRegisterInfo();

	// If another instruction between Pred and PTest accesses flags, don't remove
	// the ptest or update the earlier instruction to modify them.
	if (areCFlagsAccessedBetweenInstrs(Pred, PTest, TRI))
	return false;

	// If we pass all the checks, it's safe to remove the PTEST and use the flags
	// as they are prior to PTEST. Sometimes this requires the tested PTEST
	// operand to be replaced with an equivalent instruction that also sets the
	// flags.
	PTest->eraseFromParent();
	if (*NewOp != PredOpcode) {
	Pred->setDesc(get(*NewOp));
	bool succeeded = UpdateOperandRegClass(*Pred);
	(void)succeeded;
	assert(succeeded && "Operands have incompatible register classes!");
	Pred->addRegisterDefined(AArch64::NZCV, TRI);
	}

	// Ensure that the flags def is live.
	if (Pred->registerDefIsDead(AArch64::NZCV, TRI)) {
	unsigned i = 0, e = Pred->getNumOperands();
	for (; i != e; ++i) {
	MachineOperand &MO = Pred->getOperand(i);
	if (MO.isReg() && MO.isDef() && MO.getReg() == AArch64::NZCV) {
	MO.setIsDead(false);
	break;
	}
	}
	}
	return true;
	}

	/// Try to optimize a compare instruction. A compare instruction is an
	/// instruction which produces AArch64::NZCV. It can be truly compare
	/// instruction
	/// when there are no uses of its destination register.
	///
	/// The following steps are tried in order:
	/// 1. Convert CmpInstr into an unconditional version.
	/// 2. Remove CmpInstr if above there is an instruction producing a needed
	/// condition code or an instruction which can be converted into such an
	/// instruction.
	/// Only comparison with zero is supported.
	bool AArch64InstrInfo::optimizeCompareInstr(
	MachineInstr &CmpInstr, Register SrcReg, Register SrcReg2, int64_t CmpMask,
	int64_t CmpValue, const MachineRegisterInfo *MRI) const {
	assert(CmpInstr.getParent());
	assert(MRI);

	// Replace SUBSWrr with SUBWrr if NZCV is not used.
	int DeadNZCVIdx =
	CmpInstr.findRegisterDefOperandIdx(AArch64::NZCV, /TRI=/nullptr, true);
	if (DeadNZCVIdx != -1) {
	if (CmpInstr.definesRegister(AArch64::WZR, /TRI=/nullptr) \|\|
	CmpInstr.definesRegister(AArch64::XZR, /TRI=/nullptr)) {
	CmpInstr.eraseFromParent();
	return true;
	}
	unsigned Opc = CmpInstr.getOpcode();
	unsigned NewOpc = convertToNonFlagSettingOpc(CmpInstr);
	if (NewOpc == Opc)
	return false;
	const MCInstrDesc &MCID = get(NewOpc);
	CmpInstr.setDesc(MCID);
	CmpInstr.removeOperand(DeadNZCVIdx);
	bool succeeded = UpdateOperandRegClass(CmpInstr);
	(void)succeeded;
	assert(succeeded && "Some operands reg class are incompatible!");
	return true;
	}

	if (CmpInstr.getOpcode() == AArch64::PTEST_PP \|\|
	CmpInstr.getOpcode() == AArch64::PTEST_PP_ANY)
	return optimizePTestInstr(&CmpInstr, SrcReg, SrcReg2, MRI);

	if (SrcReg2 != 0)
	return false;

	// CmpInstr is a Compare instruction if destination register is not used.
	if (!MRI->use_nodbg_empty(CmpInstr.getOperand(0).getReg()))
	return false;

	if (CmpValue == 0 && substituteCmpToZero(CmpInstr, SrcReg, *MRI))
	return true;
	return (CmpValue == 0 \|\| CmpValue == 1) &&
	removeCmpToZeroOrOne(CmpInstr, SrcReg, CmpValue, *MRI);
	}

	/// Get opcode of S version of Instr.
	/// If Instr is S version its opcode is returned.
	/// AArch64::INSTRUCTION_LIST_END is returned if Instr does not have S version
	/// or we are not interested in it.
	static unsigned sForm(MachineInstr &Instr) {
	switch (Instr.getOpcode()) {
	default:
	return AArch64::INSTRUCTION_LIST_END;

	case AArch64::ADDSWrr:
	case AArch64::ADDSWri:
	case AArch64::ADDSXrr:
	case AArch64::ADDSXri:
	case AArch64::SUBSWrr:
	case AArch64::SUBSWri:
	case AArch64::SUBSXrr:
	case AArch64::SUBSXri:
	return Instr.getOpcode();

	case AArch64::ADDWrr:
	return AArch64::ADDSWrr;
	case AArch64::ADDWri:
	return AArch64::ADDSWri;
	case AArch64::ADDXrr:
	return AArch64::ADDSXrr;
	case AArch64::ADDXri:
	return AArch64::ADDSXri;
	case AArch64::ADCWr:
	return AArch64::ADCSWr;
	case AArch64::ADCXr:
	return AArch64::ADCSXr;
	case AArch64::SUBWrr:
	return AArch64::SUBSWrr;
	case AArch64::SUBWri:
	return AArch64::SUBSWri;
	case AArch64::SUBXrr:
	return AArch64::SUBSXrr;
	case AArch64::SUBXri:
	return AArch64::SUBSXri;
	case AArch64::SBCWr:
	return AArch64::SBCSWr;
	case AArch64::SBCXr:
	return AArch64::SBCSXr;
	case AArch64::ANDWri:
	return AArch64::ANDSWri;
	case AArch64::ANDXri:
	return AArch64::ANDSXri;
	}
	}

	/// Check if AArch64::NZCV should be alive in successors of MBB.
	static bool areCFlagsAliveInSuccessors(const MachineBasicBlock *MBB) {
	for (auto *BB : MBB->successors())
	if (BB->isLiveIn(AArch64::NZCV))
	return true;
	return false;
	}

	/// \returns The condition code operand index for \p Instr if it is a branch
	/// or select and -1 otherwise.
	static int
	findCondCodeUseOperandIdxForBranchOrSelect(const MachineInstr &Instr) {
	switch (Instr.getOpcode()) {
	default:
	return -1;

	case AArch64::Bcc: {
	int Idx = Instr.findRegisterUseOperandIdx(AArch64::NZCV, /TRI=/nullptr);
	assert(Idx >= 2);
	return Idx - 2;
	}

	case AArch64::CSINVWr:
	case AArch64::CSINVXr:
	case AArch64::CSINCWr:
	case AArch64::CSINCXr:
	case AArch64::CSELWr:
	case AArch64::CSELXr:
	case AArch64::CSNEGWr:
	case AArch64::CSNEGXr:
	case AArch64::FCSELSrrr:
	case AArch64::FCSELDrrr: {
	int Idx = Instr.findRegisterUseOperandIdx(AArch64::NZCV, /TRI=/nullptr);
	assert(Idx >= 1);
	return Idx - 1;
	}
	}
	}

	/// Find a condition code used by the instruction.
	/// Returns AArch64CC::Invalid if either the instruction does not use condition
	/// codes or we don't optimize CmpInstr in the presence of such instructions.
	static AArch64CC::CondCode findCondCodeUsedByInstr(const MachineInstr &Instr) {
	int CCIdx = findCondCodeUseOperandIdxForBranchOrSelect(Instr);
	return CCIdx >= 0 ? static_cast<AArch64CC::CondCode>(
	Instr.getOperand(CCIdx).getImm())
	: AArch64CC::Invalid;
	}

	static UsedNZCV getUsedNZCV(AArch64CC::CondCode CC) {
	assert(CC != AArch64CC::Invalid);
	UsedNZCV UsedFlags;
	switch (CC) {
	default:
	break;

	case AArch64CC::EQ: // Z set
	case AArch64CC::NE: // Z clear
	UsedFlags.Z = true;
	break;

	case AArch64CC::HI: // Z clear and C set
	case AArch64CC::LS: // Z set or C clear
	UsedFlags.Z = true;
	[[fallthrough]];
	case AArch64CC::HS: // C set
	case AArch64CC::LO: // C clear
	UsedFlags.C = true;
	break;

	case AArch64CC::MI: // N set
	case AArch64CC::PL: // N clear
	UsedFlags.N = true;
	break;

	case AArch64CC::VS: // V set
	case AArch64CC::VC: // V clear
	UsedFlags.V = true;
	break;

	case AArch64CC::GT: // Z clear, N and V the same
	case AArch64CC::LE: // Z set, N and V differ
	UsedFlags.Z = true;
	[[fallthrough]];
	case AArch64CC::GE: // N and V the same
	case AArch64CC::LT: // N and V differ
	UsedFlags.N = true;
	UsedFlags.V = true;
	break;
	}
	return UsedFlags;
	}

	/// \returns Conditions flags used after \p CmpInstr in its MachineBB if NZCV
	/// flags are not alive in successors of the same \p CmpInstr and \p MI parent.
	/// \returns std::nullopt otherwise.
	///
	/// Collect instructions using that flags in \p CCUseInstrs if provided.
	std::optional<UsedNZCV>
	llvm::examineCFlagsUse(MachineInstr &MI, MachineInstr &CmpInstr,
	const TargetRegisterInfo &TRI,
	SmallVectorImpl<MachineInstr > CCUseInstrs) {
	MachineBasicBlock *CmpParent = CmpInstr.getParent();
	if (MI.getParent() != CmpParent)
	return std::nullopt;

	if (areCFlagsAliveInSuccessors(CmpParent))
	return std::nullopt;

	UsedNZCV NZCVUsedAfterCmp;
	for (MachineInstr &Instr : instructionsWithoutDebug(
	std::next(CmpInstr.getIterator()), CmpParent->instr_end())) {
	if (Instr.readsRegister(AArch64::NZCV, &TRI)) {
	AArch64CC::CondCode CC = findCondCodeUsedByInstr(Instr);
	if (CC == AArch64CC::Invalid) // Unsupported conditional instruction
	return std::nullopt;
	NZCVUsedAfterCmp \|= getUsedNZCV(CC);
	if (CCUseInstrs)
	CCUseInstrs->push_back(&Instr);
	}
	if (Instr.modifiesRegister(AArch64::NZCV, &TRI))
	break;
	}
	return NZCVUsedAfterCmp;
	}

	static bool isADDSRegImm(unsigned Opcode) {
	return Opcode == AArch64::ADDSWri \|\| Opcode == AArch64::ADDSXri;
	}

	static bool isSUBSRegImm(unsigned Opcode) {
	return Opcode == AArch64::SUBSWri \|\| Opcode == AArch64::SUBSXri;
	}

	/// Check if CmpInstr can be substituted by MI.
	///
	/// CmpInstr can be substituted:
	/// - CmpInstr is either 'ADDS %vreg, 0' or 'SUBS %vreg, 0'
	/// - and, MI and CmpInstr are from the same MachineBB
	/// - and, condition flags are not alive in successors of the CmpInstr parent
	/// - and, if MI opcode is the S form there must be no defs of flags between
	/// MI and CmpInstr
	/// or if MI opcode is not the S form there must be neither defs of flags
	/// nor uses of flags between MI and CmpInstr.
	/// - and, if C/V flags are not used after CmpInstr
	/// or if N flag is used but MI produces poison value if signed overflow
	/// occurs.
	static bool canInstrSubstituteCmpInstr(MachineInstr &MI, MachineInstr &CmpInstr,
	const TargetRegisterInfo &TRI) {
	// NOTE this assertion guarantees that MI.getOpcode() is add or subtraction
	// that may or may not set flags.
	assert(sForm(MI) != AArch64::INSTRUCTION_LIST_END);

	const unsigned CmpOpcode = CmpInstr.getOpcode();
	if (!isADDSRegImm(CmpOpcode) && !isSUBSRegImm(CmpOpcode))
	return false;

	assert((CmpInstr.getOperand(2).isImm() &&
	CmpInstr.getOperand(2).getImm() == 0) &&
	"Caller guarantees that CmpInstr compares with constant 0");

	std::optional<UsedNZCV> NZVCUsed = examineCFlagsUse(MI, CmpInstr, TRI);
	if (!NZVCUsed \|\| NZVCUsed->C)
	return false;

	// CmpInstr is either 'ADDS %vreg, 0' or 'SUBS %vreg, 0', and MI is either
	// '%vreg = add ...' or '%vreg = sub ...'.
	// Condition flag V is used to indicate signed overflow.
	// 1) MI and CmpInstr set N and V to the same value.
	// 2) If MI is add/sub with no-signed-wrap, it produces a poison value when
	// signed overflow occurs, so CmpInstr could still be simplified away.
	if (NZVCUsed->V && !MI.getFlag(MachineInstr::NoSWrap))
	return false;

	AccessKind AccessToCheck = AK_Write;
	if (sForm(MI) != MI.getOpcode())
	AccessToCheck = AK_All;
	return !areCFlagsAccessedBetweenInstrs(&MI, &CmpInstr, &TRI, AccessToCheck);
	}

	/// Substitute an instruction comparing to zero with another instruction
	/// which produces needed condition flags.
	///
	/// Return true on success.
	bool AArch64InstrInfo::substituteCmpToZero(
	MachineInstr &CmpInstr, unsigned SrcReg,
	const MachineRegisterInfo &MRI) const {
	// Get the unique definition of SrcReg.
	MachineInstr *MI = MRI.getUniqueVRegDef(SrcReg);
	if (!MI)
	return false;

	const TargetRegisterInfo &TRI = getRegisterInfo();

	unsigned NewOpc = sForm(*MI);
	if (NewOpc == AArch64::INSTRUCTION_LIST_END)
	return false;

	if (!canInstrSubstituteCmpInstr(*MI, CmpInstr, TRI))
	return false;

	// Update the instruction to set NZCV.
	MI->setDesc(get(NewOpc));
	CmpInstr.eraseFromParent();
	bool succeeded = UpdateOperandRegClass(*MI);
	(void)succeeded;
	assert(succeeded && "Some operands reg class are incompatible!");
	MI->addRegisterDefined(AArch64::NZCV, &TRI);
	return true;
	}

	/// \returns True if \p CmpInstr can be removed.
	///
	/// \p IsInvertCC is true if, after removing \p CmpInstr, condition
	/// codes used in \p CCUseInstrs must be inverted.
	static bool canCmpInstrBeRemoved(MachineInstr &MI, MachineInstr &CmpInstr,
	int CmpValue, const TargetRegisterInfo &TRI,
	SmallVectorImpl<MachineInstr *> &CCUseInstrs,
	bool &IsInvertCC) {
	assert((CmpValue == 0 \|\| CmpValue == 1) &&
	"Only comparisons to 0 or 1 considered for removal!");

	// MI is 'CSINCWr %vreg, wzr, wzr, <cc>' or 'CSINCXr %vreg, xzr, xzr, <cc>'
	unsigned MIOpc = MI.getOpcode();
	if (MIOpc == AArch64::CSINCWr) {
	if (MI.getOperand(1).getReg() != AArch64::WZR \|\|
	MI.getOperand(2).getReg() != AArch64::WZR)
	return false;
	} else if (MIOpc == AArch64::CSINCXr) {
	if (MI.getOperand(1).getReg() != AArch64::XZR \|\|
	MI.getOperand(2).getReg() != AArch64::XZR)
	return false;
	} else {
	return false;
	}
	AArch64CC::CondCode MICC = findCondCodeUsedByInstr(MI);
	if (MICC == AArch64CC::Invalid)
	return false;

	// NZCV needs to be defined
	if (MI.findRegisterDefOperandIdx(AArch64::NZCV, /TRI=/nullptr, true) != -1)
	return false;

	// CmpInstr is 'ADDS %vreg, 0' or 'SUBS %vreg, 0' or 'SUBS %vreg, 1'
	const unsigned CmpOpcode = CmpInstr.getOpcode();
	bool IsSubsRegImm = isSUBSRegImm(CmpOpcode);
	if (CmpValue && !IsSubsRegImm)
	return false;
	if (!CmpValue && !IsSubsRegImm && !isADDSRegImm(CmpOpcode))
	return false;

	// MI conditions allowed: eq, ne, mi, pl
	UsedNZCV MIUsedNZCV = getUsedNZCV(MICC);
	if (MIUsedNZCV.C \|\| MIUsedNZCV.V)
	return false;

	std::optional<UsedNZCV> NZCVUsedAfterCmp =
	examineCFlagsUse(MI, CmpInstr, TRI, &CCUseInstrs);
	// Condition flags are not used in CmpInstr basic block successors and only
	// Z or N flags allowed to be used after CmpInstr within its basic block
	if (!NZCVUsedAfterCmp \|\| NZCVUsedAfterCmp->C \|\| NZCVUsedAfterCmp->V)
	return false;
	// Z or N flag used after CmpInstr must correspond to the flag used in MI
	if ((MIUsedNZCV.Z && NZCVUsedAfterCmp->N) \|\|
	(MIUsedNZCV.N && NZCVUsedAfterCmp->Z))
	return false;
	// If CmpInstr is comparison to zero MI conditions are limited to eq, ne
	if (MIUsedNZCV.N && !CmpValue)
	return false;

	// There must be no defs of flags between MI and CmpInstr
	if (areCFlagsAccessedBetweenInstrs(&MI, &CmpInstr, &TRI, AK_Write))
	return false;

	// Condition code is inverted in the following cases:
	// 1. MI condition is ne; CmpInstr is 'ADDS %vreg, 0' or 'SUBS %vreg, 0'
	// 2. MI condition is eq, pl; CmpInstr is 'SUBS %vreg, 1'
	IsInvertCC = (CmpValue && (MICC == AArch64CC::EQ \|\| MICC == AArch64CC::PL)) \|\|
	(!CmpValue && MICC == AArch64CC::NE);
	return true;
	}

	/// Remove comparison in csinc-cmp sequence
	///
	/// Examples:
	/// 1. \code
	/// csinc w9, wzr, wzr, ne
	/// cmp w9, #0
	/// b.eq
	/// \endcode
	/// to
	/// \code
	/// csinc w9, wzr, wzr, ne
	/// b.ne
	/// \endcode
	///
	/// 2. \code
	/// csinc x2, xzr, xzr, mi
	/// cmp x2, #1
	/// b.pl
	/// \endcode
	/// to
	/// \code
	/// csinc x2, xzr, xzr, mi
	/// b.pl
	/// \endcode
	///
	/// \param CmpInstr comparison instruction
	/// \return True when comparison removed
	bool AArch64InstrInfo::removeCmpToZeroOrOne(
	MachineInstr &CmpInstr, unsigned SrcReg, int CmpValue,
	const MachineRegisterInfo &MRI) const {
	MachineInstr *MI = MRI.getUniqueVRegDef(SrcReg);
	if (!MI)
	return false;
	const TargetRegisterInfo &TRI = getRegisterInfo();
	SmallVector<MachineInstr *, 4> CCUseInstrs;
	bool IsInvertCC = false;
	if (!canCmpInstrBeRemoved(*MI, CmpInstr, CmpValue, TRI, CCUseInstrs,
	IsInvertCC))
	return false;
	// Make transformation
	CmpInstr.eraseFromParent();
	if (IsInvertCC) {
	// Invert condition codes in CmpInstr CC users
	for (MachineInstr *CCUseInstr : CCUseInstrs) {
	int Idx = findCondCodeUseOperandIdxForBranchOrSelect(*CCUseInstr);
	assert(Idx >= 0 && "Unexpected instruction using CC.");
	MachineOperand &CCOperand = CCUseInstr->getOperand(Idx);
	AArch64CC::CondCode CCUse = AArch64CC::getInvertedCondCode(
	static_cast<AArch64CC::CondCode>(CCOperand.getImm()));
	CCOperand.setImm(CCUse);
	}
	}
	return true;
	}

	bool AArch64InstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
	if (MI.getOpcode() != TargetOpcode::LOAD_STACK_GUARD &&
	MI.getOpcode() != AArch64::CATCHRET)
	return false;

	MachineBasicBlock &MBB = *MI.getParent();
	auto &Subtarget = MBB.getParent()->getSubtarget<AArch64Subtarget>();
	auto TRI = Subtarget.getRegisterInfo();
	DebugLoc DL = MI.getDebugLoc();

	if (MI.getOpcode() == AArch64::CATCHRET) {
	// Skip to the first instruction before the epilog.
	const TargetInstrInfo *TII =
	MBB.getParent()->getSubtarget().getInstrInfo();
	MachineBasicBlock *TargetMBB = MI.getOperand(0).getMBB();
	auto MBBI = MachineBasicBlock::iterator(MI);
	MachineBasicBlock::iterator FirstEpilogSEH = std::prev(MBBI);
	while (FirstEpilogSEH->getFlag(MachineInstr::FrameDestroy) &&
	FirstEpilogSEH != MBB.begin())
	FirstEpilogSEH = std::prev(FirstEpilogSEH);
	if (FirstEpilogSEH != MBB.begin())
	FirstEpilogSEH = std::next(FirstEpilogSEH);
	BuildMI(MBB, FirstEpilogSEH, DL, TII->get(AArch64::ADRP))
	.addReg(AArch64::X0, RegState::Define)
	.addMBB(TargetMBB);
	BuildMI(MBB, FirstEpilogSEH, DL, TII->get(AArch64::ADDXri))
	.addReg(AArch64::X0, RegState::Define)
	.addReg(AArch64::X0)
	.addMBB(TargetMBB)
	.addImm(0);
	return true;
	}

	Register Reg = MI.getOperand(0).getReg();
	Module &M = *MBB.getParent()->getFunction().getParent();
	if (M.getStackProtectorGuard() == "sysreg") {
	const AArch64SysReg::SysReg *SrcReg =
	AArch64SysReg::lookupSysRegByName(M.getStackProtectorGuardReg());
	if (!SrcReg)
	report_fatal_error("Unknown SysReg for Stack Protector Guard Register");

	// mrs xN, sysreg
	BuildMI(MBB, MI, DL, get(AArch64::MRS))
	.addDef(Reg, RegState::Renamable)
	.addImm(SrcReg->Encoding);
	int Offset = M.getStackProtectorGuardOffset();
	if (Offset >= 0 && Offset <= 32760 && Offset % 8 == 0) {
	// ldr xN, [xN, #offset]
	BuildMI(MBB, MI, DL, get(AArch64::LDRXui))
	.addDef(Reg)
	.addUse(Reg, RegState::Kill)
	.addImm(Offset / 8);
	} else if (Offset >= -256 && Offset <= 255) {
	// ldur xN, [xN, #offset]
	BuildMI(MBB, MI, DL, get(AArch64::LDURXi))
	.addDef(Reg)
	.addUse(Reg, RegState::Kill)
	.addImm(Offset);
	} else if (Offset >= -4095 && Offset <= 4095) {
	if (Offset > 0) {
	// add xN, xN, #offset
	BuildMI(MBB, MI, DL, get(AArch64::ADDXri))
	.addDef(Reg)
	.addUse(Reg, RegState::Kill)
	.addImm(Offset)
	.addImm(0);
	} else {
	// sub xN, xN, #offset
	BuildMI(MBB, MI, DL, get(AArch64::SUBXri))
	.addDef(Reg)
	.addUse(Reg, RegState::Kill)
	.addImm(-Offset)
	.addImm(0);
	}
	// ldr xN, [xN]
	BuildMI(MBB, MI, DL, get(AArch64::LDRXui))
	.addDef(Reg)
	.addUse(Reg, RegState::Kill)
	.addImm(0);
	} else {
	// Cases that are larger than +/- 4095 and not a multiple of 8, or larger
	// than 23760.
	// It might be nice to use AArch64::MOVi32imm here, which would get
	// expanded in PreSched2 after PostRA, but our lone scratch Reg already
	// contains the MRS result. findScratchNonCalleeSaveRegister() in
	// AArch64FrameLowering might help us find such a scratch register
	// though. If we failed to find a scratch register, we could emit a
	// stream of add instructions to build up the immediate. Or, we could try
	// to insert a AArch64::MOVi32imm before register allocation so that we
	// didn't need to scavenge for a scratch register.
	report_fatal_error("Unable to encode Stack Protector Guard Offset");
	}
	MBB.erase(MI);
	return true;
	}

	const GlobalValue *GV =
	cast<GlobalValue>((*MI.memoperands_begin())->getValue());
	const TargetMachine &TM = MBB.getParent()->getTarget();
	unsigned OpFlags = Subtarget.ClassifyGlobalReference(GV, TM);
	const unsigned char MO_NC = AArch64II::MO_NC;

	if ((OpFlags & AArch64II::MO_GOT) != 0) {
	BuildMI(MBB, MI, DL, get(AArch64::LOADgot), Reg)
	.addGlobalAddress(GV, 0, OpFlags);
	if (Subtarget.isTargetILP32()) {
	unsigned Reg32 = TRI->getSubReg(Reg, AArch64::sub_32);
	BuildMI(MBB, MI, DL, get(AArch64::LDRWui))
	.addDef(Reg32, RegState::Dead)
	.addUse(Reg, RegState::Kill)
	.addImm(0)
	.addMemOperand(*MI.memoperands_begin())
	.addDef(Reg, RegState::Implicit);
	} else {
	BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg)
	.addReg(Reg, RegState::Kill)
	.addImm(0)
	.addMemOperand(*MI.memoperands_begin());
	}
	} else if (TM.getCodeModel() == CodeModel::Large) {
	assert(!Subtarget.isTargetILP32() && "how can large exist in ILP32?");
	BuildMI(MBB, MI, DL, get(AArch64::MOVZXi), Reg)
	.addGlobalAddress(GV, 0, AArch64II::MO_G0 \| MO_NC)
	.addImm(0);
	BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg)
	.addReg(Reg, RegState::Kill)
	.addGlobalAddress(GV, 0, AArch64II::MO_G1 \| MO_NC)
	.addImm(16);
	BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg)
	.addReg(Reg, RegState::Kill)
	.addGlobalAddress(GV, 0, AArch64II::MO_G2 \| MO_NC)
	.addImm(32);
	BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg)
	.addReg(Reg, RegState::Kill)
	.addGlobalAddress(GV, 0, AArch64II::MO_G3)
	.addImm(48);
	BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg)
	.addReg(Reg, RegState::Kill)
	.addImm(0)
	.addMemOperand(*MI.memoperands_begin());
	} else if (TM.getCodeModel() == CodeModel::Tiny) {
	BuildMI(MBB, MI, DL, get(AArch64::ADR), Reg)
	.addGlobalAddress(GV, 0, OpFlags);
	} else {
	BuildMI(MBB, MI, DL, get(AArch64::ADRP), Reg)
	.addGlobalAddress(GV, 0, OpFlags \| AArch64II::MO_PAGE);
	unsigned char LoFlags = OpFlags \| AArch64II::MO_PAGEOFF \| MO_NC;
	if (Subtarget.isTargetILP32()) {
	unsigned Reg32 = TRI->getSubReg(Reg, AArch64::sub_32);
	BuildMI(MBB, MI, DL, get(AArch64::LDRWui))
	.addDef(Reg32, RegState::Dead)
	.addUse(Reg, RegState::Kill)
	.addGlobalAddress(GV, 0, LoFlags)
	.addMemOperand(*MI.memoperands_begin())
	.addDef(Reg, RegState::Implicit);
	} else {
	BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg)
	.addReg(Reg, RegState::Kill)
	.addGlobalAddress(GV, 0, LoFlags)
	.addMemOperand(*MI.memoperands_begin());
	}
	}

	MBB.erase(MI);

	return true;
	}

	// Return true if this instruction simply sets its single destination register
	// to zero. This is equivalent to a register rename of the zero-register.
	bool AArch64InstrInfo::isGPRZero(const MachineInstr &MI) {
	switch (MI.getOpcode()) {
	default:
	break;
	case AArch64::MOVZWi:
	case AArch64::MOVZXi: // movz Rd, #0 (LSL #0)
	if (MI.getOperand(1).isImm() && MI.getOperand(1).getImm() == 0) {
	assert(MI.getDesc().getNumOperands() == 3 &&
	MI.getOperand(2).getImm() == 0 && "invalid MOVZi operands");
	return true;
	}
	break;
	case AArch64::ANDWri: // and Rd, Rzr, #imm
	return MI.getOperand(1).getReg() == AArch64::WZR;
	case AArch64::ANDXri:
	return MI.getOperand(1).getReg() == AArch64::XZR;
	case TargetOpcode::COPY:
	return MI.getOperand(1).getReg() == AArch64::WZR;
	}
	return false;
	}

	// Return true if this instruction simply renames a general register without
	// modifying bits.
	bool AArch64InstrInfo::isGPRCopy(const MachineInstr &MI) {
	switch (MI.getOpcode()) {
	default:
	break;
	case TargetOpcode::COPY: {
	// GPR32 copies will by lowered to ORRXrs
	Register DstReg = MI.getOperand(0).getReg();
	return (AArch64::GPR32RegClass.contains(DstReg) \|\|
	AArch64::GPR64RegClass.contains(DstReg));
	}
	case AArch64::ORRXrs: // orr Xd, Xzr, Xm (LSL #0)
	if (MI.getOperand(1).getReg() == AArch64::XZR) {
	assert(MI.getDesc().getNumOperands() == 4 &&
	MI.getOperand(3).getImm() == 0 && "invalid ORRrs operands");
	return true;
	}
	break;
	case AArch64::ADDXri: // add Xd, Xn, #0 (LSL #0)
	if (MI.getOperand(2).getImm() == 0) {
	assert(MI.getDesc().getNumOperands() == 4 &&
	MI.getOperand(3).getImm() == 0 && "invalid ADDXri operands");
	return true;
	}
	break;
	}
	return false;
	}

	// Return true if this instruction simply renames a general register without
	// modifying bits.
	bool AArch64InstrInfo::isFPRCopy(const MachineInstr &MI) {
	switch (MI.getOpcode()) {
	default:
	break;
	case TargetOpcode::COPY: {
	Register DstReg = MI.getOperand(0).getReg();
	return AArch64::FPR128RegClass.contains(DstReg);
	}
	case AArch64::ORRv16i8:
	if (MI.getOperand(1).getReg() == MI.getOperand(2).getReg()) {
	assert(MI.getDesc().getNumOperands() == 3 && MI.getOperand(0).isReg() &&
	"invalid ORRv16i8 operands");
	return true;
	}
	break;
	}
	return false;
	}

	Register AArch64InstrInfo::isLoadFromStackSlot(const MachineInstr &MI,
	int &FrameIndex) const {
	switch (MI.getOpcode()) {
	default:
	break;
	case AArch64::LDRWui:
	case AArch64::LDRXui:
	case AArch64::LDRBui:
	case AArch64::LDRHui:
	case AArch64::LDRSui:
	case AArch64::LDRDui:
	case AArch64::LDRQui:
	case AArch64::LDR_PXI:
	if (MI.getOperand(0).getSubReg() == 0 && MI.getOperand(1).isFI() &&
	MI.getOperand(2).isImm() && MI.getOperand(2).getImm() == 0) {
	FrameIndex = MI.getOperand(1).getIndex();
	return MI.getOperand(0).getReg();
	}
	break;
	}

	return 0;
	}

	Register AArch64InstrInfo::isStoreToStackSlot(const MachineInstr &MI,
	int &FrameIndex) const {
	switch (MI.getOpcode()) {
	default:
	break;
	case AArch64::STRWui:
	case AArch64::STRXui:
	case AArch64::STRBui:
	case AArch64::STRHui:
	case AArch64::STRSui:
	case AArch64::STRDui:
	case AArch64::STRQui:
	case AArch64::STR_PXI:
	if (MI.getOperand(0).getSubReg() == 0 && MI.getOperand(1).isFI() &&
	MI.getOperand(2).isImm() && MI.getOperand(2).getImm() == 0) {
	FrameIndex = MI.getOperand(1).getIndex();
	return MI.getOperand(0).getReg();
	}
	break;
	}
	return 0;
	}

	/// Check all MachineMemOperands for a hint to suppress pairing.
	bool AArch64InstrInfo::isLdStPairSuppressed(const MachineInstr &MI) {
	return llvm::any_of(MI.memoperands(), [](MachineMemOperand *MMO) {
	return MMO->getFlags() & MOSuppressPair;
	});
	}

	/// Set a flag on the first MachineMemOperand to suppress pairing.
	void AArch64InstrInfo::suppressLdStPair(MachineInstr &MI) {
	if (MI.memoperands_empty())
	return;
	(*MI.memoperands_begin())->setFlags(MOSuppressPair);
	}

	/// Check all MachineMemOperands for a hint that the load/store is strided.
	bool AArch64InstrInfo::isStridedAccess(const MachineInstr &MI) {
	return llvm::any_of(MI.memoperands(), [](MachineMemOperand *MMO) {
	return MMO->getFlags() & MOStridedAccess;
	});
	}

	bool AArch64InstrInfo::hasUnscaledLdStOffset(unsigned Opc) {
	switch (Opc) {
	default:
	return false;
	case AArch64::STURSi:
	case AArch64::STRSpre:
	case AArch64::STURDi:
	case AArch64::STRDpre:
	case AArch64::STURQi:
	case AArch64::STRQpre:
	case AArch64::STURBBi:
	case AArch64::STURHHi:
	case AArch64::STURWi:
	case AArch64::STRWpre:
	case AArch64::STURXi:
	case AArch64::STRXpre:
	case AArch64::LDURSi:
	case AArch64::LDRSpre:
	case AArch64::LDURDi:
	case AArch64::LDRDpre:
	case AArch64::LDURQi:
	case AArch64::LDRQpre:
	case AArch64::LDURWi:
	case AArch64::LDRWpre:
	case AArch64::LDURXi:
	case AArch64::LDRXpre:
	case AArch64::LDRSWpre:
	case AArch64::LDURSWi:
	case AArch64::LDURHHi:
	case AArch64::LDURBBi:
	case AArch64::LDURSBWi:
	case AArch64::LDURSHWi:
	return true;
	}
	}

	std::optional<unsigned> AArch64InstrInfo::getUnscaledLdSt(unsigned Opc) {
	switch (Opc) {
	default: return {};
	case AArch64::PRFMui: return AArch64::PRFUMi;
	case AArch64::LDRXui: return AArch64::LDURXi;
	case AArch64::LDRWui: return AArch64::LDURWi;
	case AArch64::LDRBui: return AArch64::LDURBi;
	case AArch64::LDRHui: return AArch64::LDURHi;
	case AArch64::LDRSui: return AArch64::LDURSi;
	case AArch64::LDRDui: return AArch64::LDURDi;
	case AArch64::LDRQui: return AArch64::LDURQi;
	case AArch64::LDRBBui: return AArch64::LDURBBi;
	case AArch64::LDRHHui: return AArch64::LDURHHi;
	case AArch64::LDRSBXui: return AArch64::LDURSBXi;
	case AArch64::LDRSBWui: return AArch64::LDURSBWi;
	case AArch64::LDRSHXui: return AArch64::LDURSHXi;
	case AArch64::LDRSHWui: return AArch64::LDURSHWi;
	case AArch64::LDRSWui: return AArch64::LDURSWi;
	case AArch64::STRXui: return AArch64::STURXi;
	case AArch64::STRWui: return AArch64::STURWi;
	case AArch64::STRBui: return AArch64::STURBi;
	case AArch64::STRHui: return AArch64::STURHi;
	case AArch64::STRSui: return AArch64::STURSi;
	case AArch64::STRDui: return AArch64::STURDi;
	case AArch64::STRQui: return AArch64::STURQi;
	case AArch64::STRBBui: return AArch64::STURBBi;
	case AArch64::STRHHui: return AArch64::STURHHi;
	}
	}

	unsigned AArch64InstrInfo::getLoadStoreImmIdx(unsigned Opc) {
	switch (Opc) {
	default:
	return 2;
	case AArch64::LDPXi:
	case AArch64::LDPDi:
	case AArch64::STPXi:
	case AArch64::STPDi:
	case AArch64::LDNPXi:
	case AArch64::LDNPDi:
	case AArch64::STNPXi:
	case AArch64::STNPDi:
	case AArch64::LDPQi:
	case AArch64::STPQi:
	case AArch64::LDNPQi:
	case AArch64::STNPQi:
	case AArch64::LDPWi:
	case AArch64::LDPSi:
	case AArch64::STPWi:
	case AArch64::STPSi:
	case AArch64::LDNPWi:
	case AArch64::LDNPSi:
	case AArch64::STNPWi:
	case AArch64::STNPSi:
	case AArch64::LDG:
	case AArch64::STGPi:

	case AArch64::LD1B_IMM:
	case AArch64::LD1B_H_IMM:
	case AArch64::LD1B_S_IMM:
	case AArch64::LD1B_D_IMM:
	case AArch64::LD1SB_H_IMM:
	case AArch64::LD1SB_S_IMM:
	case AArch64::LD1SB_D_IMM:
	case AArch64::LD1H_IMM:
	case AArch64::LD1H_S_IMM:
	case AArch64::LD1H_D_IMM:
	case AArch64::LD1SH_S_IMM:
	case AArch64::LD1SH_D_IMM:
	case AArch64::LD1W_IMM:
	case AArch64::LD1W_D_IMM:
	case AArch64::LD1SW_D_IMM:
	case AArch64::LD1D_IMM:

	case AArch64::LD2B_IMM:
	case AArch64::LD2H_IMM:
	case AArch64::LD2W_IMM:
	case AArch64::LD2D_IMM:
	case AArch64::LD3B_IMM:
	case AArch64::LD3H_IMM:
	case AArch64::LD3W_IMM:
	case AArch64::LD3D_IMM:
	case AArch64::LD4B_IMM:
	case AArch64::LD4H_IMM:
	case AArch64::LD4W_IMM:
	case AArch64::LD4D_IMM:

	case AArch64::ST1B_IMM:
	case AArch64::ST1B_H_IMM:
	case AArch64::ST1B_S_IMM:
	case AArch64::ST1B_D_IMM:
	case AArch64::ST1H_IMM:
	case AArch64::ST1H_S_IMM:
	case AArch64::ST1H_D_IMM:
	case AArch64::ST1W_IMM:
	case AArch64::ST1W_D_IMM:
	case AArch64::ST1D_IMM:

	case AArch64::ST2B_IMM:
	case AArch64::ST2H_IMM:
	case AArch64::ST2W_IMM:
	case AArch64::ST2D_IMM:
	case AArch64::ST3B_IMM:
	case AArch64::ST3H_IMM:
	case AArch64::ST3W_IMM:
	case AArch64::ST3D_IMM:
	case AArch64::ST4B_IMM:
	case AArch64::ST4H_IMM:
	case AArch64::ST4W_IMM:
	case AArch64::ST4D_IMM:

	case AArch64::LD1RB_IMM:
	case AArch64::LD1RB_H_IMM:
	case AArch64::LD1RB_S_IMM:
	case AArch64::LD1RB_D_IMM:
	case AArch64::LD1RSB_H_IMM:
	case AArch64::LD1RSB_S_IMM:
	case AArch64::LD1RSB_D_IMM:
	case AArch64::LD1RH_IMM:
	case AArch64::LD1RH_S_IMM:
	case AArch64::LD1RH_D_IMM:
	case AArch64::LD1RSH_S_IMM:
	case AArch64::LD1RSH_D_IMM:
	case AArch64::LD1RW_IMM:
	case AArch64::LD1RW_D_IMM:
	case AArch64::LD1RSW_IMM:
	case AArch64::LD1RD_IMM:

	case AArch64::LDNT1B_ZRI:
	case AArch64::LDNT1H_ZRI:
	case AArch64::LDNT1W_ZRI:
	case AArch64::LDNT1D_ZRI:
	case AArch64::STNT1B_ZRI:
	case AArch64::STNT1H_ZRI:
	case AArch64::STNT1W_ZRI:
	case AArch64::STNT1D_ZRI:

	case AArch64::LDNF1B_IMM:
	case AArch64::LDNF1B_H_IMM:
	case AArch64::LDNF1B_S_IMM:
	case AArch64::LDNF1B_D_IMM:
	case AArch64::LDNF1SB_H_IMM:
	case AArch64::LDNF1SB_S_IMM:
	case AArch64::LDNF1SB_D_IMM:
	case AArch64::LDNF1H_IMM:
	case AArch64::LDNF1H_S_IMM:
	case AArch64::LDNF1H_D_IMM:
	case AArch64::LDNF1SH_S_IMM:
	case AArch64::LDNF1SH_D_IMM:
	case AArch64::LDNF1W_IMM:
	case AArch64::LDNF1W_D_IMM:
	case AArch64::LDNF1SW_D_IMM:
	case AArch64::LDNF1D_IMM:
	return 3;
	case AArch64::ADDG:
	case AArch64::STGi:
	case AArch64::LDR_PXI:
	case AArch64::STR_PXI:
	return 2;
	}
	}

	bool AArch64InstrInfo::isPairableLdStInst(const MachineInstr &MI) {
	switch (MI.getOpcode()) {
	default:
	return false;
	// Scaled instructions.
	case AArch64::STRSui:
	case AArch64::STRDui:
	case AArch64::STRQui:
	case AArch64::STRXui:
	case AArch64::STRWui:
	case AArch64::LDRSui:
	case AArch64::LDRDui:
	case AArch64::LDRQui:
	case AArch64::LDRXui:
	case AArch64::LDRWui:
	case AArch64::LDRSWui:
	// Unscaled instructions.
	case AArch64::STURSi:
	case AArch64::STRSpre:
	case AArch64::STURDi:
	case AArch64::STRDpre:
	case AArch64::STURQi:
	case AArch64::STRQpre:
	case AArch64::STURWi:
	case AArch64::STRWpre:
	case AArch64::STURXi:
	case AArch64::STRXpre:
	case AArch64::LDURSi:
	case AArch64::LDRSpre:
	case AArch64::LDURDi:
	case AArch64::LDRDpre:
	case AArch64::LDURQi:
	case AArch64::LDRQpre:
	case AArch64::LDURWi:
	case AArch64::LDRWpre:
	case AArch64::LDURXi:
	case AArch64::LDRXpre:
	case AArch64::LDURSWi:
	case AArch64::LDRSWpre:
	return true;
	}
	}

	bool AArch64InstrInfo::isTailCallReturnInst(const MachineInstr &MI) {
	switch (MI.getOpcode()) {
	default:
	assert((!MI.isCall() \|\| !MI.isReturn()) &&
	"Unexpected instruction - was a new tail call opcode introduced?");
	return false;
	case AArch64::TCRETURNdi:
	case AArch64::TCRETURNri:
	case AArch64::TCRETURNrix16x17:
	case AArch64::TCRETURNrix17:
	case AArch64::TCRETURNrinotx16:
	case AArch64::TCRETURNriALL:
	case AArch64::AUTH_TCRETURN:
	case AArch64::AUTH_TCRETURN_BTI:
	return true;
	}
	}

	unsigned AArch64InstrInfo::convertToFlagSettingOpc(unsigned Opc) {
	switch (Opc) {
	default:
	llvm_unreachable("Opcode has no flag setting equivalent!");
	// 32-bit cases:
	case AArch64::ADDWri:
	return AArch64::ADDSWri;
	case AArch64::ADDWrr:
	return AArch64::ADDSWrr;
	case AArch64::ADDWrs:
	return AArch64::ADDSWrs;
	case AArch64::ADDWrx:
	return AArch64::ADDSWrx;
	case AArch64::ANDWri:
	return AArch64::ANDSWri;
	case AArch64::ANDWrr:
	return AArch64::ANDSWrr;
	case AArch64::ANDWrs:
	return AArch64::ANDSWrs;
	case AArch64::BICWrr:
	return AArch64::BICSWrr;
	case AArch64::BICWrs:
	return AArch64::BICSWrs;
	case AArch64::SUBWri:
	return AArch64::SUBSWri;
	case AArch64::SUBWrr:
	return AArch64::SUBSWrr;
	case AArch64::SUBWrs:
	return AArch64::SUBSWrs;
	case AArch64::SUBWrx:
	return AArch64::SUBSWrx;
	// 64-bit cases:
	case AArch64::ADDXri:
	return AArch64::ADDSXri;
	case AArch64::ADDXrr:
	return AArch64::ADDSXrr;
	case AArch64::ADDXrs:
	return AArch64::ADDSXrs;
	case AArch64::ADDXrx:
	return AArch64::ADDSXrx;
	case AArch64::ANDXri:
	return AArch64::ANDSXri;
	case AArch64::ANDXrr:
	return AArch64::ANDSXrr;
	case AArch64::ANDXrs:
	return AArch64::ANDSXrs;
	case AArch64::BICXrr:
	return AArch64::BICSXrr;
	case AArch64::BICXrs:
	return AArch64::BICSXrs;
	case AArch64::SUBXri:
	return AArch64::SUBSXri;
	case AArch64::SUBXrr:
	return AArch64::SUBSXrr;
	case AArch64::SUBXrs:
	return AArch64::SUBSXrs;
	case AArch64::SUBXrx:
	return AArch64::SUBSXrx;
	// SVE instructions:
	case AArch64::AND_PPzPP:
	return AArch64::ANDS_PPzPP;
	case AArch64::BIC_PPzPP:
	return AArch64::BICS_PPzPP;
	case AArch64::EOR_PPzPP:
	return AArch64::EORS_PPzPP;
	case AArch64::NAND_PPzPP:
	return AArch64::NANDS_PPzPP;
	case AArch64::NOR_PPzPP:
	return AArch64::NORS_PPzPP;
	case AArch64::ORN_PPzPP:
	return AArch64::ORNS_PPzPP;
	case AArch64::ORR_PPzPP:
	return AArch64::ORRS_PPzPP;
	case AArch64::BRKA_PPzP:
	return AArch64::BRKAS_PPzP;
	case AArch64::BRKPA_PPzPP:
	return AArch64::BRKPAS_PPzPP;
	case AArch64::BRKB_PPzP:
	return AArch64::BRKBS_PPzP;
	case AArch64::BRKPB_PPzPP:
	return AArch64::BRKPBS_PPzPP;
	case AArch64::BRKN_PPzP:
	return AArch64::BRKNS_PPzP;
	case AArch64::RDFFR_PPz:
	return AArch64::RDFFRS_PPz;
	case AArch64::PTRUE_B:
	return AArch64::PTRUES_B;
	}
	}

	// Is this a candidate for ld/st merging or pairing? For example, we don't
	// touch volatiles or load/stores that have a hint to avoid pair formation.
	bool AArch64InstrInfo::isCandidateToMergeOrPair(const MachineInstr &MI) const {

	bool IsPreLdSt = isPreLdSt(MI);

	// If this is a volatile load/store, don't mess with it.
	if (MI.hasOrderedMemoryRef())
	return false;

	// Make sure this is a reg/fi+imm (as opposed to an address reloc).
	// For Pre-inc LD/ST, the operand is shifted by one.
	assert((MI.getOperand(IsPreLdSt ? 2 : 1).isReg() \|\|
	MI.getOperand(IsPreLdSt ? 2 : 1).isFI()) &&
	"Expected a reg or frame index operand.");

	// For Pre-indexed addressing quadword instructions, the third operand is the
	// immediate value.
	bool IsImmPreLdSt = IsPreLdSt && MI.getOperand(3).isImm();

	if (!MI.getOperand(2).isImm() && !IsImmPreLdSt)
	return false;

	// Can't merge/pair if the instruction modifies the base register.
	// e.g., ldr x0, [x0]
	// This case will never occur with an FI base.
	// However, if the instruction is an LDR<S,D,Q,W,X,SW>pre or
	// STR<S,D,Q,W,X>pre, it can be merged.
	// For example:
	// ldr q0, [x11, #32]!
	// ldr q1, [x11, #16]
	// to
	// ldp q0, q1, [x11, #32]!
	if (MI.getOperand(1).isReg() && !IsPreLdSt) {
	Register BaseReg = MI.getOperand(1).getReg();
	const TargetRegisterInfo *TRI = &getRegisterInfo();
	if (MI.modifiesRegister(BaseReg, TRI))
	return false;
	}

	// Check if this load/store has a hint to avoid pair formation.
	// MachineMemOperands hints are set by the AArch64StorePairSuppress pass.
	if (isLdStPairSuppressed(MI))
	return false;

	// Do not pair any callee-save store/reload instructions in the
	// prologue/epilogue if the CFI information encoded the operations as separate
	// instructions, as that will cause the size of the actual prologue to mismatch
	// with the prologue size recorded in the Windows CFI.
	const MCAsmInfo *MAI = MI.getMF()->getTarget().getMCAsmInfo();
	bool NeedsWinCFI = MAI->usesWindowsCFI() &&
	MI.getMF()->getFunction().needsUnwindTableEntry();
	if (NeedsWinCFI && (MI.getFlag(MachineInstr::FrameSetup) \|\|
	MI.getFlag(MachineInstr::FrameDestroy)))
	return false;

	// On some CPUs quad load/store pairs are slower than two single load/stores.
	if (Subtarget.isPaired128Slow()) {
	switch (MI.getOpcode()) {
	default:
	break;
	case AArch64::LDURQi:
	case AArch64::STURQi:
	case AArch64::LDRQui:
	case AArch64::STRQui:
	return false;
	}
	}

	return true;
	}

	bool AArch64InstrInfo::getMemOperandsWithOffsetWidth(
	const MachineInstr &LdSt, SmallVectorImpl<const MachineOperand *> &BaseOps,
	int64_t &Offset, bool &OffsetIsScalable, LocationSize &Width,
	const TargetRegisterInfo *TRI) const {
	if (!LdSt.mayLoadOrStore())
	return false;

	const MachineOperand *BaseOp;
	TypeSize WidthN(0, false);
	if (!getMemOperandWithOffsetWidth(LdSt, BaseOp, Offset, OffsetIsScalable,
	WidthN, TRI))
	return false;
	// The maximum vscale is 16 under AArch64, return the maximal extent for the
	// vector.
	Width = LocationSize::precise(WidthN);
	BaseOps.push_back(BaseOp);
	return true;
	}

	std::optional<ExtAddrMode>
	AArch64InstrInfo::getAddrModeFromMemoryOp(const MachineInstr &MemI,
	const TargetRegisterInfo *TRI) const {
	const MachineOperand *Base; // Filled with the base operand of MI.
	int64_t Offset; // Filled with the offset of MI.
	bool OffsetIsScalable;
	if (!getMemOperandWithOffset(MemI, Base, Offset, OffsetIsScalable, TRI))
	return std::nullopt;

	if (!Base->isReg())
	return std::nullopt;
	ExtAddrMode AM;
	AM.BaseReg = Base->getReg();
	AM.Displacement = Offset;
	AM.ScaledReg = 0;
	AM.Scale = 0;
	return AM;
	}

	bool AArch64InstrInfo::canFoldIntoAddrMode(const MachineInstr &MemI,
	Register Reg,
	const MachineInstr &AddrI,
	ExtAddrMode &AM) const {
	// Filter out instructions into which we cannot fold.
	unsigned NumBytes;
	int64_t OffsetScale = 1;
	switch (MemI.getOpcode()) {
	default:
	return false;

	case AArch64::LDURQi:
	case AArch64::STURQi:
	NumBytes = 16;
	break;

	case AArch64::LDURDi:
	case AArch64::STURDi:
	case AArch64::LDURXi:
	case AArch64::STURXi:
	NumBytes = 8;
	break;

	case AArch64::LDURWi:
	case AArch64::LDURSWi:
	case AArch64::STURWi:
	NumBytes = 4;
	break;

	case AArch64::LDURHi:
	case AArch64::STURHi:
	case AArch64::LDURHHi:
	case AArch64::STURHHi:
	case AArch64::LDURSHXi:
	case AArch64::LDURSHWi:
	NumBytes = 2;
	break;

	case AArch64::LDRBroX:
	case AArch64::LDRBBroX:
	case AArch64::LDRSBXroX:
	case AArch64::LDRSBWroX:
	case AArch64::STRBroX:
	case AArch64::STRBBroX:
	case AArch64::LDURBi:
	case AArch64::LDURBBi:
	case AArch64::LDURSBXi:
	case AArch64::LDURSBWi:
	case AArch64::STURBi:
	case AArch64::STURBBi:
	case AArch64::LDRBui:
	case AArch64::LDRBBui:
	case AArch64::LDRSBXui:
	case AArch64::LDRSBWui:
	case AArch64::STRBui:
	case AArch64::STRBBui:
	NumBytes = 1;
	break;

	case AArch64::LDRQroX:
	case AArch64::STRQroX:
	case AArch64::LDRQui:
	case AArch64::STRQui:
	NumBytes = 16;
	OffsetScale = 16;
	break;

	case AArch64::LDRDroX:
	case AArch64::STRDroX:
	case AArch64::LDRXroX:
	case AArch64::STRXroX:
	case AArch64::LDRDui:
	case AArch64::STRDui:
	case AArch64::LDRXui:
	case AArch64::STRXui:
	NumBytes = 8;
	OffsetScale = 8;
	break;

	case AArch64::LDRWroX:
	case AArch64::LDRSWroX:
	case AArch64::STRWroX:
	case AArch64::LDRWui:
	case AArch64::LDRSWui:
	case AArch64::STRWui:
	NumBytes = 4;
	OffsetScale = 4;
	break;

	case AArch64::LDRHroX:
	case AArch64::STRHroX:
	case AArch64::LDRHHroX:
	case AArch64::STRHHroX:
	case AArch64::LDRSHXroX:
	case AArch64::LDRSHWroX:
	case AArch64::LDRHui:
	case AArch64::STRHui:
	case AArch64::LDRHHui:
	case AArch64::STRHHui:
	case AArch64::LDRSHXui:
	case AArch64::LDRSHWui:
	NumBytes = 2;
	OffsetScale = 2;
	break;
	}

	// Check the fold operand is not the loaded/stored value.
	const MachineOperand &BaseRegOp = MemI.getOperand(0);
	if (BaseRegOp.isReg() && BaseRegOp.getReg() == Reg)
	return false;

	// Handle memory instructions with a [Reg, Reg] addressing mode.
	if (MemI.getOperand(2).isReg()) {
	// Bail if the addressing mode already includes extension of the offset
	// register.
	if (MemI.getOperand(3).getImm())
	return false;

	// Check if we actually have a scaled offset.
	if (MemI.getOperand(4).getImm() == 0)
	OffsetScale = 1;

	// If the address instructions is folded into the base register, then the
	// addressing mode must not have a scale. Then we can swap the base and the
	// scaled registers.
	if (MemI.getOperand(1).getReg() == Reg && OffsetScale != 1)
	return false;

	switch (AddrI.getOpcode()) {
	default:
	return false;

	case AArch64::SBFMXri:
	// sxtw Xa, Wm
	// ldr Xd, [Xn, Xa, lsl #N]
	// ->
	// ldr Xd, [Xn, Wm, sxtw #N]
	if (AddrI.getOperand(2).getImm() != 0 \|\|
	AddrI.getOperand(3).getImm() != 31)
	return false;

	AM.BaseReg = MemI.getOperand(1).getReg();
	if (AM.BaseReg == Reg)
	AM.BaseReg = MemI.getOperand(2).getReg();
	AM.ScaledReg = AddrI.getOperand(1).getReg();
	AM.Scale = OffsetScale;
	AM.Displacement = 0;
	AM.Form = ExtAddrMode::Formula::SExtScaledReg;
	return true;

	case TargetOpcode::SUBREG_TO_REG: {
	// mov Wa, Wm
	// ldr Xd, [Xn, Xa, lsl #N]
	// ->
	// ldr Xd, [Xn, Wm, uxtw #N]

	// Zero-extension looks like an ORRWrs followed by a SUBREG_TO_REG.
	if (AddrI.getOperand(1).getImm() != 0 \|\|
	AddrI.getOperand(3).getImm() != AArch64::sub_32)
	return false;

	const MachineRegisterInfo &MRI = AddrI.getMF()->getRegInfo();
	Register OffsetReg = AddrI.getOperand(2).getReg();
	if (!OffsetReg.isVirtual() \|\| !MRI.hasOneNonDBGUse(OffsetReg))
	return false;

	const MachineInstr &DefMI = *MRI.getVRegDef(OffsetReg);
	if (DefMI.getOpcode() != AArch64::ORRWrs \|\|
	DefMI.getOperand(1).getReg() != AArch64::WZR \|\|
	DefMI.getOperand(3).getImm() != 0)
	return false;

	AM.BaseReg = MemI.getOperand(1).getReg();
	if (AM.BaseReg == Reg)
	AM.BaseReg = MemI.getOperand(2).getReg();
	AM.ScaledReg = DefMI.getOperand(2).getReg();
	AM.Scale = OffsetScale;
	AM.Displacement = 0;
	AM.Form = ExtAddrMode::Formula::ZExtScaledReg;
	return true;
	}
	}
	}

	// Handle memory instructions with a [Reg, #Imm] addressing mode.

	// Check we are not breaking a potential conversion to an LDP.
	auto validateOffsetForLDP = [](unsigned NumBytes, int64_t OldOffset,
	int64_t NewOffset) -> bool {
	int64_t MinOffset, MaxOffset;
	switch (NumBytes) {
	default:
	return true;
	case 4:
	MinOffset = -256;
	MaxOffset = 252;
	break;
	case 8:
	MinOffset = -512;
	MaxOffset = 504;
	break;
	case 16:
	MinOffset = -1024;
	MaxOffset = 1008;
	break;
	}
	return OldOffset < MinOffset \|\| OldOffset > MaxOffset \|\|
	(NewOffset >= MinOffset && NewOffset <= MaxOffset);
	};
	auto canFoldAddSubImmIntoAddrMode = [&](int64_t Disp) -> bool {
	int64_t OldOffset = MemI.getOperand(2).getImm() * OffsetScale;
	int64_t NewOffset = OldOffset + Disp;
	if (!isLegalAddressingMode(NumBytes, NewOffset, /* Scale */ 0))
	return false;
	// If the old offset would fit into an LDP, but the new offset wouldn't,
	// bail out.
	if (!validateOffsetForLDP(NumBytes, OldOffset, NewOffset))
	return false;
	AM.BaseReg = AddrI.getOperand(1).getReg();
	AM.ScaledReg = 0;
	AM.Scale = 0;
	AM.Displacement = NewOffset;
	AM.Form = ExtAddrMode::Formula::Basic;
	return true;
	};

	auto canFoldAddRegIntoAddrMode =
	[&](int64_t Scale,
	ExtAddrMode::Formula Form = ExtAddrMode::Formula::Basic) -> bool {
	if (MemI.getOperand(2).getImm() != 0)
	return false;
	if (!isLegalAddressingMode(NumBytes, /* Offset */ 0, Scale))
	return false;
	AM.BaseReg = AddrI.getOperand(1).getReg();
	AM.ScaledReg = AddrI.getOperand(2).getReg();
	AM.Scale = Scale;
	AM.Displacement = 0;
	AM.Form = Form;
	return true;
	};

	auto avoidSlowSTRQ = [&](const MachineInstr &MemI) {
	unsigned Opcode = MemI.getOpcode();
	return (Opcode == AArch64::STURQi \|\| Opcode == AArch64::STRQui) &&
	Subtarget.isSTRQroSlow();
	};

	int64_t Disp = 0;
	const bool OptSize = MemI.getMF()->getFunction().hasOptSize();
	switch (AddrI.getOpcode()) {
	default:
	return false;

	case AArch64::ADDXri:
	// add Xa, Xn, #N
	// ldr Xd, [Xa, #M]
	// ->
	// ldr Xd, [Xn, #N'+M]
	Disp = AddrI.getOperand(2).getImm() << AddrI.getOperand(3).getImm();
	return canFoldAddSubImmIntoAddrMode(Disp);

	case AArch64::SUBXri:
	// sub Xa, Xn, #N
	// ldr Xd, [Xa, #M]
	// ->
	// ldr Xd, [Xn, #N'+M]
	Disp = AddrI.getOperand(2).getImm() << AddrI.getOperand(3).getImm();
	return canFoldAddSubImmIntoAddrMode(-Disp);

	case AArch64::ADDXrs: {
	// add Xa, Xn, Xm, lsl #N
	// ldr Xd, [Xa]
	// ->
	// ldr Xd, [Xn, Xm, lsl #N]

	// Don't fold the add if the result would be slower, unless optimising for
	// size.
	unsigned Shift = static_cast<unsigned>(AddrI.getOperand(3).getImm());
	if (AArch64_AM::getShiftType(Shift) != AArch64_AM::ShiftExtendType::LSL)
	return false;
	Shift = AArch64_AM::getShiftValue(Shift);
	if (!OptSize) {
	if (Shift != 2 && Shift != 3 && Subtarget.hasAddrLSLSlow14())
	return false;
	if (avoidSlowSTRQ(MemI))
	return false;
	}
	return canFoldAddRegIntoAddrMode(1ULL << Shift);
	}

	case AArch64::ADDXrr:
	// add Xa, Xn, Xm
	// ldr Xd, [Xa]
	// ->
	// ldr Xd, [Xn, Xm, lsl #0]

	// Don't fold the add if the result would be slower, unless optimising for
	// size.
	if (!OptSize && avoidSlowSTRQ(MemI))
	return false;
	return canFoldAddRegIntoAddrMode(1);

	case AArch64::ADDXrx:
	// add Xa, Xn, Wm, {s,u}xtw #N
	// ldr Xd, [Xa]
	// ->
	// ldr Xd, [Xn, Wm, {s,u}xtw #N]

	// Don't fold the add if the result would be slower, unless optimising for
	// size.
	if (!OptSize && avoidSlowSTRQ(MemI))
	return false;

	// Can fold only sign-/zero-extend of a word.
	unsigned Imm = static_cast<unsigned>(AddrI.getOperand(3).getImm());
	AArch64_AM::ShiftExtendType Extend = AArch64_AM::getArithExtendType(Imm);
	if (Extend != AArch64_AM::UXTW && Extend != AArch64_AM::SXTW)
	return false;

	return canFoldAddRegIntoAddrMode(
	1ULL << AArch64_AM::getArithShiftValue(Imm),
	(Extend == AArch64_AM::SXTW) ? ExtAddrMode::Formula::SExtScaledReg
	: ExtAddrMode::Formula::ZExtScaledReg);
	}
	}

	// Given an opcode for an instruction with a [Reg, #Imm] addressing mode,
	// return the opcode of an instruction performing the same operation, but using
	// the [Reg, Reg] addressing mode.
	static unsigned regOffsetOpcode(unsigned Opcode) {
	switch (Opcode) {
	default:
	llvm_unreachable("Address folding not implemented for instruction");

	case AArch64::LDURQi:
	case AArch64::LDRQui:
	return AArch64::LDRQroX;
	case AArch64::STURQi:
	case AArch64::STRQui:
	return AArch64::STRQroX;
	case AArch64::LDURDi:
	case AArch64::LDRDui:
	return AArch64::LDRDroX;
	case AArch64::STURDi:
	case AArch64::STRDui:
	return AArch64::STRDroX;
	case AArch64::LDURXi:
	case AArch64::LDRXui:
	return AArch64::LDRXroX;
	case AArch64::STURXi:
	case AArch64::STRXui:
	return AArch64::STRXroX;
	case AArch64::LDURWi:
	case AArch64::LDRWui:
	return AArch64::LDRWroX;
	case AArch64::LDURSWi:
	case AArch64::LDRSWui:
	return AArch64::LDRSWroX;
	case AArch64::STURWi:
	case AArch64::STRWui:
	return AArch64::STRWroX;
	case AArch64::LDURHi:
	case AArch64::LDRHui:
	return AArch64::LDRHroX;
	case AArch64::STURHi:
	case AArch64::STRHui:
	return AArch64::STRHroX;
	case AArch64::LDURHHi:
	case AArch64::LDRHHui:
	return AArch64::LDRHHroX;
	case AArch64::STURHHi:
	case AArch64::STRHHui:
	return AArch64::STRHHroX;
	case AArch64::LDURSHXi:
	case AArch64::LDRSHXui:
	return AArch64::LDRSHXroX;
	case AArch64::LDURSHWi:
	case AArch64::LDRSHWui:
	return AArch64::LDRSHWroX;
	case AArch64::LDURBi:
	case AArch64::LDRBui:
	return AArch64::LDRBroX;
	case AArch64::LDURBBi:
	case AArch64::LDRBBui:
	return AArch64::LDRBBroX;
	case AArch64::LDURSBXi:
	case AArch64::LDRSBXui:
	return AArch64::LDRSBXroX;
	case AArch64::LDURSBWi:
	case AArch64::LDRSBWui:
	return AArch64::LDRSBWroX;
	case AArch64::STURBi:
	case AArch64::STRBui:
	return AArch64::STRBroX;
	case AArch64::STURBBi:
	case AArch64::STRBBui:
	return AArch64::STRBBroX;
	}
	}

	// Given an opcode for an instruction with a [Reg, #Imm] addressing mode, return
	// the opcode of an instruction performing the same operation, but using the
	// [Reg, #Imm] addressing mode with scaled offset.
	unsigned scaledOffsetOpcode(unsigned Opcode, unsigned &Scale) {
	switch (Opcode) {
	default:
	llvm_unreachable("Address folding not implemented for instruction");

	case AArch64::LDURQi:
	Scale = 16;
	return AArch64::LDRQui;
	case AArch64::STURQi:
	Scale = 16;
	return AArch64::STRQui;
	case AArch64::LDURDi:
	Scale = 8;
	return AArch64::LDRDui;
	case AArch64::STURDi:
	Scale = 8;
	return AArch64::STRDui;
	case AArch64::LDURXi:
	Scale = 8;
	return AArch64::LDRXui;
	case AArch64::STURXi:
	Scale = 8;
	return AArch64::STRXui;
	case AArch64::LDURWi:
	Scale = 4;
	return AArch64::LDRWui;
	case AArch64::LDURSWi:
	Scale = 4;
	return AArch64::LDRSWui;
	case AArch64::STURWi:
	Scale = 4;
	return AArch64::STRWui;
	case AArch64::LDURHi:
	Scale = 2;
	return AArch64::LDRHui;
	case AArch64::STURHi:
	Scale = 2;
	return AArch64::STRHui;
	case AArch64::LDURHHi:
	Scale = 2;
	return AArch64::LDRHHui;
	case AArch64::STURHHi:
	Scale = 2;
	return AArch64::STRHHui;
	case AArch64::LDURSHXi:
	Scale = 2;
	return AArch64::LDRSHXui;
	case AArch64::LDURSHWi:
	Scale = 2;
	return AArch64::LDRSHWui;
	case AArch64::LDURBi:
	Scale = 1;
	return AArch64::LDRBui;
	case AArch64::LDURBBi:
	Scale = 1;
	return AArch64::LDRBBui;
	case AArch64::LDURSBXi:
	Scale = 1;
	return AArch64::LDRSBXui;
	case AArch64::LDURSBWi:
	Scale = 1;
	return AArch64::LDRSBWui;
	case AArch64::STURBi:
	Scale = 1;
	return AArch64::STRBui;
	case AArch64::STURBBi:
	Scale = 1;
	return AArch64::STRBBui;
	case AArch64::LDRQui:
	case AArch64::STRQui:
	Scale = 16;
	return Opcode;
	case AArch64::LDRDui:
	case AArch64::STRDui:
	case AArch64::LDRXui:
	case AArch64::STRXui:
	Scale = 8;
	return Opcode;
	case AArch64::LDRWui:
	case AArch64::LDRSWui:
	case AArch64::STRWui:
	Scale = 4;
	return Opcode;
	case AArch64::LDRHui:
	case AArch64::STRHui:
	case AArch64::LDRHHui:
	case AArch64::STRHHui:
	case AArch64::LDRSHXui:
	case AArch64::LDRSHWui:
	Scale = 2;
	return Opcode;
	case AArch64::LDRBui:
	case AArch64::LDRBBui:
	case AArch64::LDRSBXui:
	case AArch64::LDRSBWui:
	case AArch64::STRBui:
	case AArch64::STRBBui:
	Scale = 1;
	return Opcode;
	}
	}

	// Given an opcode for an instruction with a [Reg, #Imm] addressing mode, return
	// the opcode of an instruction performing the same operation, but using the
	// [Reg, #Imm] addressing mode with unscaled offset.
	unsigned unscaledOffsetOpcode(unsigned Opcode) {
	switch (Opcode) {
	default:
	llvm_unreachable("Address folding not implemented for instruction");

	case AArch64::LDURQi:
	case AArch64::STURQi:
	case AArch64::LDURDi:
	case AArch64::STURDi:
	case AArch64::LDURXi:
	case AArch64::STURXi:
	case AArch64::LDURWi:
	case AArch64::LDURSWi:
	case AArch64::STURWi:
	case AArch64::LDURHi:
	case AArch64::STURHi:
	case AArch64::LDURHHi:
	case AArch64::STURHHi:
	case AArch64::LDURSHXi:
	case AArch64::LDURSHWi:
	case AArch64::LDURBi:
	case AArch64::STURBi:
	case AArch64::LDURBBi:
	case AArch64::STURBBi:
	case AArch64::LDURSBWi:
	case AArch64::LDURSBXi:
	return Opcode;
	case AArch64::LDRQui:
	return AArch64::LDURQi;
	case AArch64::STRQui:
	return AArch64::STURQi;
	case AArch64::LDRDui:
	return AArch64::LDURDi;
	case AArch64::STRDui:
	return AArch64::STURDi;
	case AArch64::LDRXui:
	return AArch64::LDURXi;
	case AArch64::STRXui:
	return AArch64::STURXi;
	case AArch64::LDRWui:
	return AArch64::LDURWi;
	case AArch64::LDRSWui:
	return AArch64::LDURSWi;
	case AArch64::STRWui:
	return AArch64::STURWi;
	case AArch64::LDRHui:
	return AArch64::LDURHi;
	case AArch64::STRHui:
	return AArch64::STURHi;
	case AArch64::LDRHHui:
	return AArch64::LDURHHi;
	case AArch64::STRHHui:
	return AArch64::STURHHi;
	case AArch64::LDRSHXui:
	return AArch64::LDURSHXi;
	case AArch64::LDRSHWui:
	return AArch64::LDURSHWi;
	case AArch64::LDRBBui:
	return AArch64::LDURBBi;
	case AArch64::LDRBui:
	return AArch64::LDURBi;
	case AArch64::STRBBui:
	return AArch64::STURBBi;
	case AArch64::STRBui:
	return AArch64::STURBi;
	case AArch64::LDRSBWui:
	return AArch64::LDURSBWi;
	case AArch64::LDRSBXui:
	return AArch64::LDURSBXi;
	}
	}

	// Given the opcode of a memory load/store instruction, return the opcode of an
	// instruction performing the same operation, but using
	// the [Reg, Reg, {s,u}xtw #N] addressing mode with sign-/zero-extend of the
	// offset register.
	static unsigned offsetExtendOpcode(unsigned Opcode) {
	switch (Opcode) {
	default:
	llvm_unreachable("Address folding not implemented for instruction");

	case AArch64::LDRQroX:
	case AArch64::LDURQi:
	case AArch64::LDRQui:
	return AArch64::LDRQroW;
	case AArch64::STRQroX:
	case AArch64::STURQi:
	case AArch64::STRQui:
	return AArch64::STRQroW;
	case AArch64::LDRDroX:
	case AArch64::LDURDi:
	case AArch64::LDRDui:
	return AArch64::LDRDroW;
	case AArch64::STRDroX:
	case AArch64::STURDi:
	case AArch64::STRDui:
	return AArch64::STRDroW;
	case AArch64::LDRXroX:
	case AArch64::LDURXi:
	case AArch64::LDRXui:
	return AArch64::LDRXroW;
	case AArch64::STRXroX:
	case AArch64::STURXi:
	case AArch64::STRXui:
	return AArch64::STRXroW;
	case AArch64::LDRWroX:
	case AArch64::LDURWi:
	case AArch64::LDRWui:
	return AArch64::LDRWroW;
	case AArch64::LDRSWroX:
	case AArch64::LDURSWi:
	case AArch64::LDRSWui:
	return AArch64::LDRSWroW;
	case AArch64::STRWroX:
	case AArch64::STURWi:
	case AArch64::STRWui:
	return AArch64::STRWroW;
	case AArch64::LDRHroX:
	case AArch64::LDURHi:
	case AArch64::LDRHui:
	return AArch64::LDRHroW;
	case AArch64::STRHroX:
	case AArch64::STURHi:
	case AArch64::STRHui:
	return AArch64::STRHroW;
	case AArch64::LDRHHroX:
	case AArch64::LDURHHi:
	case AArch64::LDRHHui:
	return AArch64::LDRHHroW;
	case AArch64::STRHHroX:
	case AArch64::STURHHi:
	case AArch64::STRHHui:
	return AArch64::STRHHroW;
	case AArch64::LDRSHXroX:
	case AArch64::LDURSHXi:
	case AArch64::LDRSHXui:
	return AArch64::LDRSHXroW;
	case AArch64::LDRSHWroX:
	case AArch64::LDURSHWi:
	case AArch64::LDRSHWui:
	return AArch64::LDRSHWroW;
	case AArch64::LDRBroX:
	case AArch64::LDURBi:
	case AArch64::LDRBui:
	return AArch64::LDRBroW;
	case AArch64::LDRBBroX:
	case AArch64::LDURBBi:
	case AArch64::LDRBBui:
	return AArch64::LDRBBroW;
	case AArch64::LDRSBXroX:
	case AArch64::LDURSBXi:
	case AArch64::LDRSBXui:
	return AArch64::LDRSBXroW;
	case AArch64::LDRSBWroX:
	case AArch64::LDURSBWi:
	case AArch64::LDRSBWui:
	return AArch64::LDRSBWroW;
	case AArch64::STRBroX:
	case AArch64::STURBi:
	case AArch64::STRBui:
	return AArch64::STRBroW;
	case AArch64::STRBBroX:
	case AArch64::STURBBi:
	case AArch64::STRBBui:
	return AArch64::STRBBroW;
	}
	}

	MachineInstr *AArch64InstrInfo::emitLdStWithAddr(MachineInstr &MemI,
	const ExtAddrMode &AM) const {

	const DebugLoc &DL = MemI.getDebugLoc();
	MachineBasicBlock &MBB = *MemI.getParent();
	MachineRegisterInfo &MRI = MemI.getMF()->getRegInfo();

	if (AM.Form == ExtAddrMode::Formula::Basic) {
	if (AM.ScaledReg) {
	// The new instruction will be in the form `ldr Rt, [Xn, Xm, lsl #imm]`.
	unsigned Opcode = regOffsetOpcode(MemI.getOpcode());
	MRI.constrainRegClass(AM.BaseReg, &AArch64::GPR64spRegClass);
	auto B = BuildMI(MBB, MemI, DL, get(Opcode))
	.addReg(MemI.getOperand(0).getReg(),
	MemI.mayLoad() ? RegState::Define : 0)
	.addReg(AM.BaseReg)
	.addReg(AM.ScaledReg)
	.addImm(0)
	.addImm(AM.Scale > 1)
	.setMemRefs(MemI.memoperands())
	.setMIFlags(MemI.getFlags());
	return B.getInstr();
	}

	assert(AM.ScaledReg == 0 && AM.Scale == 0 &&
	"Addressing mode not supported for folding");

	// The new instruction will be in the form `ld[u]r Rt, [Xn, #imm]`.
	unsigned Scale = 1;
	unsigned Opcode = MemI.getOpcode();
	if (isInt<9>(AM.Displacement))
	Opcode = unscaledOffsetOpcode(Opcode);
	else
	Opcode = scaledOffsetOpcode(Opcode, Scale);

	auto B = BuildMI(MBB, MemI, DL, get(Opcode))
	.addReg(MemI.getOperand(0).getReg(),
	MemI.mayLoad() ? RegState::Define : 0)
	.addReg(AM.BaseReg)
	.addImm(AM.Displacement / Scale)
	.setMemRefs(MemI.memoperands())
	.setMIFlags(MemI.getFlags());
	return B.getInstr();
	}

	if (AM.Form == ExtAddrMode::Formula::SExtScaledReg \|\|
	AM.Form == ExtAddrMode::Formula::ZExtScaledReg) {
	// The new instruction will be in the form `ldr Rt, [Xn, Wm, {s,u}xtw #N]`.
	assert(AM.ScaledReg && !AM.Displacement &&
	"Address offset can be a register or an immediate, but not both");
	unsigned Opcode = offsetExtendOpcode(MemI.getOpcode());
	MRI.constrainRegClass(AM.BaseReg, &AArch64::GPR64spRegClass);
	// Make sure the offset register is in the correct register class.
	Register OffsetReg = AM.ScaledReg;
	const TargetRegisterClass *RC = MRI.getRegClass(OffsetReg);
	if (RC->hasSuperClassEq(&AArch64::GPR64RegClass)) {
	OffsetReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
	BuildMI(MBB, MemI, DL, get(TargetOpcode::COPY), OffsetReg)
	.addReg(AM.ScaledReg, 0, AArch64::sub_32);
	}
	auto B = BuildMI(MBB, MemI, DL, get(Opcode))
	.addReg(MemI.getOperand(0).getReg(),
	MemI.mayLoad() ? RegState::Define : 0)
	.addReg(AM.BaseReg)
	.addReg(OffsetReg)
	.addImm(AM.Form == ExtAddrMode::Formula::SExtScaledReg)
	.addImm(AM.Scale != 1)
	.setMemRefs(MemI.memoperands())
	.setMIFlags(MemI.getFlags());

	return B.getInstr();
	}

	llvm_unreachable(
	"Function must not be called with an addressing mode it can't handle");
	}

	bool AArch64InstrInfo::getMemOperandWithOffsetWidth(
	const MachineInstr &LdSt, const MachineOperand *&BaseOp, int64_t &Offset,
	bool &OffsetIsScalable, TypeSize &Width,
	const TargetRegisterInfo *TRI) const {
	assert(LdSt.mayLoadOrStore() && "Expected a memory operation.");
	// Handle only loads/stores with base register followed by immediate offset.
	if (LdSt.getNumExplicitOperands() == 3) {
	// Non-paired instruction (e.g., ldr x1, [x0, #8]).
	if ((!LdSt.getOperand(1).isReg() && !LdSt.getOperand(1).isFI()) \|\|
	!LdSt.getOperand(2).isImm())
	return false;
	} else if (LdSt.getNumExplicitOperands() == 4) {
	// Paired instruction (e.g., ldp x1, x2, [x0, #8]).
	if (!LdSt.getOperand(1).isReg() \|\|
	(!LdSt.getOperand(2).isReg() && !LdSt.getOperand(2).isFI()) \|\|
	!LdSt.getOperand(3).isImm())
	return false;
	} else
	return false;

	// Get the scaling factor for the instruction and set the width for the
	// instruction.
	TypeSize Scale(0U, false);
	int64_t Dummy1, Dummy2;

	// If this returns false, then it's an instruction we don't want to handle.
	if (!getMemOpInfo(LdSt.getOpcode(), Scale, Width, Dummy1, Dummy2))
	return false;

	// Compute the offset. Offset is calculated as the immediate operand
	// multiplied by the scaling factor. Unscaled instructions have scaling factor
	// set to 1.
	if (LdSt.getNumExplicitOperands() == 3) {
	BaseOp = &LdSt.getOperand(1);
	Offset = LdSt.getOperand(2).getImm() * Scale.getKnownMinValue();
	} else {
	assert(LdSt.getNumExplicitOperands() == 4 && "invalid number of operands");
	BaseOp = &LdSt.getOperand(2);
	Offset = LdSt.getOperand(3).getImm() * Scale.getKnownMinValue();
	}
	OffsetIsScalable = Scale.isScalable();

	if (!BaseOp->isReg() && !BaseOp->isFI())
	return false;

	return true;
	}

	MachineOperand &
	AArch64InstrInfo::getMemOpBaseRegImmOfsOffsetOperand(MachineInstr &LdSt) const {
	assert(LdSt.mayLoadOrStore() && "Expected a memory operation.");
	MachineOperand &OfsOp = LdSt.getOperand(LdSt.getNumExplicitOperands() - 1);
	assert(OfsOp.isImm() && "Offset operand wasn't immediate.");
	return OfsOp;
	}

	bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, TypeSize &Scale,
	TypeSize &Width, int64_t &MinOffset,
	int64_t &MaxOffset) {
	switch (Opcode) {
	// Not a memory operation or something we want to handle.
	default:
	Scale = TypeSize::getFixed(0);
	Width = TypeSize::getFixed(0);
	MinOffset = MaxOffset = 0;
	return false;
	// LDR / STR
	case AArch64::LDRQui:
	case AArch64::STRQui:
	Scale = TypeSize::getFixed(16);
	Width = TypeSize::getFixed(16);
	MinOffset = 0;
	MaxOffset = 4095;
	break;
	case AArch64::LDRXui:
	case AArch64::LDRDui:
	case AArch64::STRXui:
	case AArch64::STRDui:
	case AArch64::PRFMui:
	Scale = TypeSize::getFixed(8);
	Width = TypeSize::getFixed(8);
	MinOffset = 0;
	MaxOffset = 4095;
	break;
	case AArch64::LDRWui:
	case AArch64::LDRSui:
	case AArch64::LDRSWui:
	case AArch64::STRWui:
	case AArch64::STRSui:
	Scale = TypeSize::getFixed(4);
	Width = TypeSize::getFixed(4);
	MinOffset = 0;
	MaxOffset = 4095;
	break;
	case AArch64::LDRHui:
	case AArch64::LDRHHui:
	case AArch64::LDRSHWui:
	case AArch64::LDRSHXui:
	case AArch64::STRHui:
	case AArch64::STRHHui:
	Scale = TypeSize::getFixed(2);
	Width = TypeSize::getFixed(2);
	MinOffset = 0;
	MaxOffset = 4095;
	break;
	case AArch64::LDRBui:
	case AArch64::LDRBBui:
	case AArch64::LDRSBWui:
	case AArch64::LDRSBXui:
	case AArch64::STRBui:
	case AArch64::STRBBui:
	Scale = TypeSize::getFixed(1);
	Width = TypeSize::getFixed(1);
	MinOffset = 0;
	MaxOffset = 4095;
	break;
	// post/pre inc
	case AArch64::STRQpre:
	case AArch64::LDRQpost:
	Scale = TypeSize::getFixed(1);
	Width = TypeSize::getFixed(16);
	MinOffset = -256;
	MaxOffset = 255;
	break;
	case AArch64::STRXpre:
	case AArch64::STRDpre:
	case AArch64::LDRXpost:
	case AArch64::LDRDpost:
	Scale = TypeSize::getFixed(1);
	Width = TypeSize::getFixed(8);
	MinOffset = -256;
	MaxOffset = 255;
	break;
	case AArch64::STRWpost:
	case AArch64::LDRWpost:
	Scale = TypeSize::getFixed(4);
	Width = TypeSize::getFixed(32);
	MinOffset = -256;
	MaxOffset = 255;
	break;
	// Unscaled
	case AArch64::LDURQi:
	case AArch64::STURQi:
	Scale = TypeSize::getFixed(1);
	Width = TypeSize::getFixed(16);
	MinOffset = -256;
	MaxOffset = 255;
	break;
	case AArch64::LDURXi:
	case AArch64::LDURDi:
	case AArch64::LDAPURXi:
	case AArch64::STURXi:
	case AArch64::STURDi:
	case AArch64::STLURXi:
	case AArch64::PRFUMi:
	Scale = TypeSize::getFixed(1);
	Width = TypeSize::getFixed(8);
	MinOffset = -256;
	MaxOffset = 255;
	break;
	case AArch64::LDURWi:
	case AArch64::LDURSi:
	case AArch64::LDURSWi:
	case AArch64::LDAPURi:
	case AArch64::LDAPURSWi:
	case AArch64::STURWi:
	case AArch64::STURSi:
	case AArch64::STLURWi:
	Scale = TypeSize::getFixed(1);
	Width = TypeSize::getFixed(4);
	MinOffset = -256;
	MaxOffset = 255;
	break;
	case AArch64::LDURHi:
	case AArch64::LDURHHi:
	case AArch64::LDURSHXi:
	case AArch64::LDURSHWi:
	case AArch64::LDAPURHi:
	case AArch64::LDAPURSHWi:
	case AArch64::LDAPURSHXi:
	case AArch64::STURHi:
	case AArch64::STURHHi:
	case AArch64::STLURHi:
	Scale = TypeSize::getFixed(1);
	Width = TypeSize::getFixed(2);
	MinOffset = -256;
	MaxOffset = 255;
	break;
	case AArch64::LDURBi:
	case AArch64::LDURBBi:
	case AArch64::LDURSBXi:
	case AArch64::LDURSBWi:
	case AArch64::LDAPURBi:
	case AArch64::LDAPURSBWi:
	case AArch64::LDAPURSBXi:
	case AArch64::STURBi:
	case AArch64::STURBBi:
	case AArch64::STLURBi:
	Scale = TypeSize::getFixed(1);
	Width = TypeSize::getFixed(1);
	MinOffset = -256;
	MaxOffset = 255;
	break;
	// LDP / STP
	case AArch64::LDPQi:
	case AArch64::LDNPQi:
	case AArch64::STPQi:
	case AArch64::STNPQi:
	Scale = TypeSize::getFixed(16);
	Width = TypeSize::getFixed(32);
	MinOffset = -64;
	MaxOffset = 63;
	break;
	case AArch64::LDPXi:
	case AArch64::LDPDi:
	case AArch64::LDNPXi:
	case AArch64::LDNPDi:
	case AArch64::STPXi:
	case AArch64::STPDi:
	case AArch64::STNPXi:
	case AArch64::STNPDi:
	Scale = TypeSize::getFixed(8);
	Width = TypeSize::getFixed(16);
	MinOffset = -64;
	MaxOffset = 63;
	break;
	case AArch64::LDPWi:
	case AArch64::LDPSi:
	case AArch64::LDNPWi:
	case AArch64::LDNPSi:
	case AArch64::STPWi:
	case AArch64::STPSi:
	case AArch64::STNPWi:
	case AArch64::STNPSi:
	Scale = TypeSize::getFixed(4);
	Width = TypeSize::getFixed(8);
	MinOffset = -64;
	MaxOffset = 63;
	break;
	// pre/post inc
	case AArch64::STPQpre:
	case AArch64::LDPQpost:
	Scale = TypeSize::getFixed(16);
	Width = TypeSize::getFixed(16);
	MinOffset = -1024;
	MaxOffset = 1008;
	break;
	case AArch64::STPXpre:
	case AArch64::LDPXpost:
	case AArch64::STPDpre:
	case AArch64::LDPDpost:
	Scale = TypeSize::getFixed(8);
	Width = TypeSize::getFixed(8);
	MinOffset = -512;
	MaxOffset = 504;
	break;
	case AArch64::StoreSwiftAsyncContext:
	// Store is an STRXui, but there might be an ADDXri in the expansion too.
	Scale = TypeSize::getFixed(1);
	Width = TypeSize::getFixed(8);
	MinOffset = 0;
	MaxOffset = 4095;
	break;
	case AArch64::ADDG:
	Scale = TypeSize::getFixed(16);
	Width = TypeSize::getFixed(0);
	MinOffset = 0;
	MaxOffset = 63;
	break;
	case AArch64::TAGPstack:
	Scale = TypeSize::getFixed(16);
	Width = TypeSize::getFixed(0);
	// TAGP with a negative offset turns into SUBP, which has a maximum offset
	// of 63 (not 64!).
	MinOffset = -63;
	MaxOffset = 63;
	break;
	case AArch64::LDG:
	case AArch64::STGi:
	case AArch64::STZGi:
	Scale = TypeSize::getFixed(16);
	Width = TypeSize::getFixed(16);
	MinOffset = -256;
	MaxOffset = 255;
	break;
	// SVE
	case AArch64::STR_ZZZZXI:
	case AArch64::LDR_ZZZZXI:
	Scale = TypeSize::getScalable(16);
	Width = TypeSize::getScalable(16 * 4);
	MinOffset = -256;
	MaxOffset = 252;
	break;
	case AArch64::STR_ZZZXI:
	case AArch64::LDR_ZZZXI:
	Scale = TypeSize::getScalable(16);
	Width = TypeSize::getScalable(16 * 3);
	MinOffset = -256;
	MaxOffset = 253;
	break;
	case AArch64::STR_ZZXI:
	case AArch64::LDR_ZZXI:
	Scale = TypeSize::getScalable(16);
	Width = TypeSize::getScalable(16 * 2);
	MinOffset = -256;
	MaxOffset = 254;
	break;
	case AArch64::LDR_PXI:
	case AArch64::STR_PXI:
	Scale = TypeSize::getScalable(2);
	Width = TypeSize::getScalable(2);
	MinOffset = -256;
	MaxOffset = 255;
	break;
	case AArch64::LDR_PPXI:
	case AArch64::STR_PPXI:
	Scale = TypeSize::getScalable(2);
	Width = TypeSize::getScalable(2 * 2);
	MinOffset = -256;
	MaxOffset = 254;
	break;
	case AArch64::LDR_ZXI:
	case AArch64::STR_ZXI:
	Scale = TypeSize::getScalable(16);
	Width = TypeSize::getScalable(16);
	MinOffset = -256;
	MaxOffset = 255;
	break;
	case AArch64::LD1B_IMM:
	case AArch64::LD1H_IMM:
	case AArch64::LD1W_IMM:
	case AArch64::LD1D_IMM:
	case AArch64::LDNT1B_ZRI:
	case AArch64::LDNT1H_ZRI:
	case AArch64::LDNT1W_ZRI:
	case AArch64::LDNT1D_ZRI:
	case AArch64::ST1B_IMM:
	case AArch64::ST1H_IMM:
	case AArch64::ST1W_IMM:
	case AArch64::ST1D_IMM:
	case AArch64::STNT1B_ZRI:
	case AArch64::STNT1H_ZRI:
	case AArch64::STNT1W_ZRI:
	case AArch64::STNT1D_ZRI:
	case AArch64::LDNF1B_IMM:
	case AArch64::LDNF1H_IMM:
	case AArch64::LDNF1W_IMM:
	case AArch64::LDNF1D_IMM:
	// A full vectors worth of data
	// Width = mbytes * elements
	Scale = TypeSize::getScalable(16);
	Width = TypeSize::getScalable(16);
	MinOffset = -8;
	MaxOffset = 7;
	break;
	case AArch64::LD2B_IMM:
	case AArch64::LD2H_IMM:
	case AArch64::LD2W_IMM:
	case AArch64::LD2D_IMM:
	case AArch64::ST2B_IMM:
	case AArch64::ST2H_IMM:
	case AArch64::ST2W_IMM:
	case AArch64::ST2D_IMM:
	Scale = TypeSize::getScalable(32);
	Width = TypeSize::getScalable(16 * 2);
	MinOffset = -8;
	MaxOffset = 7;
	break;
	case AArch64::LD3B_IMM:
	case AArch64::LD3H_IMM:
	case AArch64::LD3W_IMM:
	case AArch64::LD3D_IMM:
	case AArch64::ST3B_IMM:
	case AArch64::ST3H_IMM:
	case AArch64::ST3W_IMM:
	case AArch64::ST3D_IMM:
	Scale = TypeSize::getScalable(48);
	Width = TypeSize::getScalable(16 * 3);
	MinOffset = -8;
	MaxOffset = 7;
	break;
	case AArch64::LD4B_IMM:
	case AArch64::LD4H_IMM:
	case AArch64::LD4W_IMM:
	case AArch64::LD4D_IMM:
	case AArch64::ST4B_IMM:
	case AArch64::ST4H_IMM:
	case AArch64::ST4W_IMM:
	case AArch64::ST4D_IMM:
	Scale = TypeSize::getScalable(64);
	Width = TypeSize::getScalable(16 * 4);
	MinOffset = -8;
	MaxOffset = 7;
	break;
	case AArch64::LD1B_H_IMM:
	case AArch64::LD1SB_H_IMM:
	case AArch64::LD1H_S_IMM:
	case AArch64::LD1SH_S_IMM:
	case AArch64::LD1W_D_IMM:
	case AArch64::LD1SW_D_IMM:
	case AArch64::ST1B_H_IMM:
	case AArch64::ST1H_S_IMM:
	case AArch64::ST1W_D_IMM:
	case AArch64::LDNF1B_H_IMM:
	case AArch64::LDNF1SB_H_IMM:
	case AArch64::LDNF1H_S_IMM:
	case AArch64::LDNF1SH_S_IMM:
	case AArch64::LDNF1W_D_IMM:
	case AArch64::LDNF1SW_D_IMM:
	// A half vector worth of data
	// Width = mbytes * elements
	Scale = TypeSize::getScalable(8);
	Width = TypeSize::getScalable(8);
	MinOffset = -8;
	MaxOffset = 7;
	break;
	case AArch64::LD1B_S_IMM:
	case AArch64::LD1SB_S_IMM:
	case AArch64::LD1H_D_IMM:
	case AArch64::LD1SH_D_IMM:
	case AArch64::ST1B_S_IMM:
	case AArch64::ST1H_D_IMM:
	case AArch64::LDNF1B_S_IMM:
	case AArch64::LDNF1SB_S_IMM:
	case AArch64::LDNF1H_D_IMM:
	case AArch64::LDNF1SH_D_IMM:
	// A quarter vector worth of data
	// Width = mbytes * elements
	Scale = TypeSize::getScalable(4);
	Width = TypeSize::getScalable(4);
	MinOffset = -8;
	MaxOffset = 7;
	break;
	case AArch64::LD1B_D_IMM:
	case AArch64::LD1SB_D_IMM:
	case AArch64::ST1B_D_IMM:
	case AArch64::LDNF1B_D_IMM:
	case AArch64::LDNF1SB_D_IMM:
	// A eighth vector worth of data
	// Width = mbytes * elements
	Scale = TypeSize::getScalable(2);
	Width = TypeSize::getScalable(2);
	MinOffset = -8;
	MaxOffset = 7;
	break;
	case AArch64::ST2Gi:
	case AArch64::STZ2Gi:
	Scale = TypeSize::getFixed(16);
	Width = TypeSize::getFixed(32);
	MinOffset = -256;
	MaxOffset = 255;
	break;
	case AArch64::STGPi:
	Scale = TypeSize::getFixed(16);
	Width = TypeSize::getFixed(16);
	MinOffset = -64;
	MaxOffset = 63;
	break;
	case AArch64::LD1RB_IMM:
	case AArch64::LD1RB_H_IMM:
	case AArch64::LD1RB_S_IMM:
	case AArch64::LD1RB_D_IMM:
	case AArch64::LD1RSB_H_IMM:
	case AArch64::LD1RSB_S_IMM:
	case AArch64::LD1RSB_D_IMM:
	Scale = TypeSize::getFixed(1);
	Width = TypeSize::getFixed(1);
	MinOffset = 0;
	MaxOffset = 63;
	break;
	case AArch64::LD1RH_IMM:
	case AArch64::LD1RH_S_IMM:
	case AArch64::LD1RH_D_IMM:
	case AArch64::LD1RSH_S_IMM:
	case AArch64::LD1RSH_D_IMM:
	Scale = TypeSize::getFixed(2);
	Width = TypeSize::getFixed(2);
	MinOffset = 0;
	MaxOffset = 63;
	break;
	case AArch64::LD1RW_IMM:
	case AArch64::LD1RW_D_IMM:
	case AArch64::LD1RSW_IMM:
	Scale = TypeSize::getFixed(4);
	Width = TypeSize::getFixed(4);
	MinOffset = 0;
	MaxOffset = 63;
	break;
	case AArch64::LD1RD_IMM:
	Scale = TypeSize::getFixed(8);
	Width = TypeSize::getFixed(8);
	MinOffset = 0;
	MaxOffset = 63;
	break;
	}

	return true;
	}

	// Scaling factor for unscaled load or store.
	int AArch64InstrInfo::getMemScale(unsigned Opc) {
	switch (Opc) {
	default:
	llvm_unreachable("Opcode has unknown scale!");
	case AArch64::LDRBBui:
	case AArch64::LDURBBi:
	case AArch64::LDRSBWui:
	case AArch64::LDURSBWi:
	case AArch64::STRBBui:
	case AArch64::STURBBi:
	return 1;
	case AArch64::LDRHHui:
	case AArch64::LDURHHi:
	case AArch64::LDRSHWui:
	case AArch64::LDURSHWi:
	case AArch64::STRHHui:
	case AArch64::STURHHi:
	return 2;
	case AArch64::LDRSui:
	case AArch64::LDURSi:
	case AArch64::LDRSpre:
	case AArch64::LDRSWui:
	case AArch64::LDURSWi:
	case AArch64::LDRSWpre:
	case AArch64::LDRWpre:
	case AArch64::LDRWui:
	case AArch64::LDURWi:
	case AArch64::STRSui:
	case AArch64::STURSi:
	case AArch64::STRSpre:
	case AArch64::STRWui:
	case AArch64::STURWi:
	case AArch64::STRWpre:
	case AArch64::LDPSi:
	case AArch64::LDPSWi:
	case AArch64::LDPWi:
	case AArch64::STPSi:
	case AArch64::STPWi:
	return 4;
	case AArch64::LDRDui:
	case AArch64::LDURDi:
	case AArch64::LDRDpre:
	case AArch64::LDRXui:
	case AArch64::LDURXi:
	case AArch64::LDRXpre:
	case AArch64::STRDui:
	case AArch64::STURDi:
	case AArch64::STRDpre:
	case AArch64::STRXui:
	case AArch64::STURXi:
	case AArch64::STRXpre:
	case AArch64::LDPDi:
	case AArch64::LDPXi:
	case AArch64::STPDi:
	case AArch64::STPXi:
	return 8;
	case AArch64::LDRQui:
	case AArch64::LDURQi:
	case AArch64::STRQui:
	case AArch64::STURQi:
	case AArch64::STRQpre:
	case AArch64::LDPQi:
	case AArch64::LDRQpre:
	case AArch64::STPQi:
	case AArch64::STGi:
	case AArch64::STZGi:
	case AArch64::ST2Gi:
	case AArch64::STZ2Gi:
	case AArch64::STGPi:
	return 16;
	}
	}

	bool AArch64InstrInfo::isPreLd(const MachineInstr &MI) {
	switch (MI.getOpcode()) {
	default:
	return false;
	case AArch64::LDRWpre:
	case AArch64::LDRXpre:
	case AArch64::LDRSWpre:
	case AArch64::LDRSpre:
	case AArch64::LDRDpre:
	case AArch64::LDRQpre:
	return true;
	}
	}

	bool AArch64InstrInfo::isPreSt(const MachineInstr &MI) {
	switch (MI.getOpcode()) {
	default:
	return false;
	case AArch64::STRWpre:
	case AArch64::STRXpre:
	case AArch64::STRSpre:
	case AArch64::STRDpre:
	case AArch64::STRQpre:
	return true;
	}
	}

	bool AArch64InstrInfo::isPreLdSt(const MachineInstr &MI) {
	return isPreLd(MI) \|\| isPreSt(MI);
	}

	bool AArch64InstrInfo::isPairedLdSt(const MachineInstr &MI) {
	switch (MI.getOpcode()) {
	default:
	return false;
	case AArch64::LDPSi:
	case AArch64::LDPSWi:
	case AArch64::LDPDi:
	case AArch64::LDPQi:
	case AArch64::LDPWi:
	case AArch64::LDPXi:
	case AArch64::STPSi:
	case AArch64::STPDi:
	case AArch64::STPQi:
	case AArch64::STPWi:
	case AArch64::STPXi:
	case AArch64::STGPi:
	return true;
	}
	}

	const MachineOperand &AArch64InstrInfo::getLdStBaseOp(const MachineInstr &MI) {
	unsigned Idx =
	AArch64InstrInfo::isPairedLdSt(MI) \|\| AArch64InstrInfo::isPreLdSt(MI) ? 2
	: 1;
	return MI.getOperand(Idx);
	}

	const MachineOperand &
	AArch64InstrInfo::getLdStOffsetOp(const MachineInstr &MI) {
	unsigned Idx =
	AArch64InstrInfo::isPairedLdSt(MI) \|\| AArch64InstrInfo::isPreLdSt(MI) ? 3
	: 2;
	return MI.getOperand(Idx);
	}

	static const TargetRegisterClass *getRegClass(const MachineInstr &MI,
	Register Reg) {
	if (MI.getParent() == nullptr)
	return nullptr;
	const MachineFunction *MF = MI.getParent()->getParent();
	return MF ? MF->getRegInfo().getRegClassOrNull(Reg) : nullptr;
	}

	bool AArch64InstrInfo::isHForm(const MachineInstr &MI) {
	auto IsHFPR = [&](const MachineOperand &Op) {
	if (!Op.isReg())
	return false;
	auto Reg = Op.getReg();
	if (Reg.isPhysical())
	return AArch64::FPR16RegClass.contains(Reg);
	const TargetRegisterClass *TRC = ::getRegClass(MI, Reg);
	return TRC == &AArch64::FPR16RegClass \|\|
	TRC == &AArch64::FPR16_loRegClass;
	};
	return llvm::any_of(MI.operands(), IsHFPR);
	}

	bool AArch64InstrInfo::isQForm(const MachineInstr &MI) {
	auto IsQFPR = [&](const MachineOperand &Op) {
	if (!Op.isReg())
	return false;
	auto Reg = Op.getReg();
	if (Reg.isPhysical())
	return AArch64::FPR128RegClass.contains(Reg);
	const TargetRegisterClass *TRC = ::getRegClass(MI, Reg);
	return TRC == &AArch64::FPR128RegClass \|\|
	TRC == &AArch64::FPR128_loRegClass;
	};
	return llvm::any_of(MI.operands(), IsQFPR);
	}

	bool AArch64InstrInfo::hasBTISemantics(const MachineInstr &MI) {
	switch (MI.getOpcode()) {
	case AArch64::BRK:
	case AArch64::HLT:
	case AArch64::PACIASP:
	case AArch64::PACIBSP:
	// Implicit BTI behavior.
	return true;
	case AArch64::PAUTH_PROLOGUE:
	// PAUTH_PROLOGUE expands to PACI(A\|B)SP.
	return true;
	case AArch64::HINT: {
	unsigned Imm = MI.getOperand(0).getImm();
	// Explicit BTI instruction.
	if (Imm == 32 \|\| Imm == 34 \|\| Imm == 36 \|\| Imm == 38)
	return true;
	// PACI(A\|B)SP instructions.
	if (Imm == 25 \|\| Imm == 27)
	return true;
	return false;
	}
	default:
	return false;
	}
	}

	bool AArch64InstrInfo::isFpOrNEON(Register Reg) {
	if (Reg == 0)
	return false;
	assert(Reg.isPhysical() && "Expected physical register in isFpOrNEON");
	return AArch64::FPR128RegClass.contains(Reg) \|\|
	AArch64::FPR64RegClass.contains(Reg) \|\|
	AArch64::FPR32RegClass.contains(Reg) \|\|
	AArch64::FPR16RegClass.contains(Reg) \|\|
	AArch64::FPR8RegClass.contains(Reg);
	}

	bool AArch64InstrInfo::isFpOrNEON(const MachineInstr &MI) {
	auto IsFPR = [&](const MachineOperand &Op) {
	if (!Op.isReg())
	return false;
	auto Reg = Op.getReg();
	if (Reg.isPhysical())
	return isFpOrNEON(Reg);

	const TargetRegisterClass *TRC = ::getRegClass(MI, Reg);
	return TRC == &AArch64::FPR128RegClass \|\|
	TRC == &AArch64::FPR128_loRegClass \|\|
	TRC == &AArch64::FPR64RegClass \|\|
	TRC == &AArch64::FPR64_loRegClass \|\|
	TRC == &AArch64::FPR32RegClass \|\| TRC == &AArch64::FPR16RegClass \|\|
	TRC == &AArch64::FPR8RegClass;
	};
	return llvm::any_of(MI.operands(), IsFPR);
	}

	// Scale the unscaled offsets. Returns false if the unscaled offset can't be
	// scaled.
	static bool scaleOffset(unsigned Opc, int64_t &Offset) {
	int Scale = AArch64InstrInfo::getMemScale(Opc);

	// If the byte-offset isn't a multiple of the stride, we can't scale this
	// offset.
	if (Offset % Scale != 0)
	return false;

	// Convert the byte-offset used by unscaled into an "element" offset used
	// by the scaled pair load/store instructions.
	Offset /= Scale;
	return true;
	}

	static bool canPairLdStOpc(unsigned FirstOpc, unsigned SecondOpc) {
	if (FirstOpc == SecondOpc)
	return true;
	// We can also pair sign-ext and zero-ext instructions.
	switch (FirstOpc) {
	default:
	return false;
	case AArch64::STRSui:
	case AArch64::STURSi:
	return SecondOpc == AArch64::STRSui \|\| SecondOpc == AArch64::STURSi;
	case AArch64::STRDui:
	case AArch64::STURDi:
	return SecondOpc == AArch64::STRDui \|\| SecondOpc == AArch64::STURDi;
	case AArch64::STRQui:
	case AArch64::STURQi:
	return SecondOpc == AArch64::STRQui \|\| SecondOpc == AArch64::STURQi;
	case AArch64::STRWui:
	case AArch64::STURWi:
	return SecondOpc == AArch64::STRWui \|\| SecondOpc == AArch64::STURWi;
	case AArch64::STRXui:
	case AArch64::STURXi:
	return SecondOpc == AArch64::STRXui \|\| SecondOpc == AArch64::STURXi;
	case AArch64::LDRSui:
	case AArch64::LDURSi:
	return SecondOpc == AArch64::LDRSui \|\| SecondOpc == AArch64::LDURSi;
	case AArch64::LDRDui:
	case AArch64::LDURDi:
	return SecondOpc == AArch64::LDRDui \|\| SecondOpc == AArch64::LDURDi;
	case AArch64::LDRQui:
	case AArch64::LDURQi:
	return SecondOpc == AArch64::LDRQui \|\| SecondOpc == AArch64::LDURQi;
	case AArch64::LDRWui:
	case AArch64::LDURWi:
	return SecondOpc == AArch64::LDRSWui \|\| SecondOpc == AArch64::LDURSWi;
	case AArch64::LDRSWui:
	case AArch64::LDURSWi:
	return SecondOpc == AArch64::LDRWui \|\| SecondOpc == AArch64::LDURWi;
	case AArch64::LDRXui:
	case AArch64::LDURXi:
	return SecondOpc == AArch64::LDRXui \|\| SecondOpc == AArch64::LDURXi;
	}
	// These instructions can't be paired based on their opcodes.
	return false;
	}

	static bool shouldClusterFI(const MachineFrameInfo &MFI, int FI1,
	int64_t Offset1, unsigned Opcode1, int FI2,
	int64_t Offset2, unsigned Opcode2) {
	// Accesses through fixed stack object frame indices may access a different
	// fixed stack slot. Check that the object offsets + offsets match.
	if (MFI.isFixedObjectIndex(FI1) && MFI.isFixedObjectIndex(FI2)) {
	int64_t ObjectOffset1 = MFI.getObjectOffset(FI1);
	int64_t ObjectOffset2 = MFI.getObjectOffset(FI2);
	assert(ObjectOffset1 <= ObjectOffset2 && "Object offsets are not ordered.");
	// Convert to scaled object offsets.
	int Scale1 = AArch64InstrInfo::getMemScale(Opcode1);
	if (ObjectOffset1 % Scale1 != 0)
	return false;
	ObjectOffset1 /= Scale1;
	int Scale2 = AArch64InstrInfo::getMemScale(Opcode2);
	if (ObjectOffset2 % Scale2 != 0)
	return false;
	ObjectOffset2 /= Scale2;
	ObjectOffset1 += Offset1;
	ObjectOffset2 += Offset2;
	return ObjectOffset1 + 1 == ObjectOffset2;
	}

	return FI1 == FI2;
	}

	/// Detect opportunities for ldp/stp formation.
	///
	/// Only called for LdSt for which getMemOperandWithOffset returns true.
	bool AArch64InstrInfo::shouldClusterMemOps(
	ArrayRef<const MachineOperand *> BaseOps1, int64_t OpOffset1,
	bool OffsetIsScalable1, ArrayRef<const MachineOperand *> BaseOps2,
	int64_t OpOffset2, bool OffsetIsScalable2, unsigned ClusterSize,
	unsigned NumBytes) const {
	assert(BaseOps1.size() == 1 && BaseOps2.size() == 1);
	const MachineOperand &BaseOp1 = *BaseOps1.front();
	const MachineOperand &BaseOp2 = *BaseOps2.front();
	const MachineInstr &FirstLdSt = *BaseOp1.getParent();
	const MachineInstr &SecondLdSt = *BaseOp2.getParent();
	if (BaseOp1.getType() != BaseOp2.getType())
	return false;

	assert((BaseOp1.isReg() \|\| BaseOp1.isFI()) &&
	"Only base registers and frame indices are supported.");

	// Check for both base regs and base FI.
	if (BaseOp1.isReg() && BaseOp1.getReg() != BaseOp2.getReg())
	return false;

	// Only cluster up to a single pair.
	if (ClusterSize > 2)
	return false;

	if (!isPairableLdStInst(FirstLdSt) \|\| !isPairableLdStInst(SecondLdSt))
	return false;

	// Can we pair these instructions based on their opcodes?
	unsigned FirstOpc = FirstLdSt.getOpcode();
	unsigned SecondOpc = SecondLdSt.getOpcode();
	if (!canPairLdStOpc(FirstOpc, SecondOpc))
	return false;

	// Can't merge volatiles or load/stores that have a hint to avoid pair
	// formation, for example.
	if (!isCandidateToMergeOrPair(FirstLdSt) \|\|
	!isCandidateToMergeOrPair(SecondLdSt))
	return false;

	// isCandidateToMergeOrPair guarantees that operand 2 is an immediate.
	int64_t Offset1 = FirstLdSt.getOperand(2).getImm();
	if (hasUnscaledLdStOffset(FirstOpc) && !scaleOffset(FirstOpc, Offset1))
	return false;

	int64_t Offset2 = SecondLdSt.getOperand(2).getImm();
	if (hasUnscaledLdStOffset(SecondOpc) && !scaleOffset(SecondOpc, Offset2))
	return false;

	// Pairwise instructions have a 7-bit signed offset field.
	if (Offset1 > 63 \|\| Offset1 < -64)
	return false;

	// The caller should already have ordered First/SecondLdSt by offset.
	// Note: except for non-equal frame index bases
	if (BaseOp1.isFI()) {
	assert((!BaseOp1.isIdenticalTo(BaseOp2) \|\| Offset1 <= Offset2) &&
	"Caller should have ordered offsets.");

	const MachineFrameInfo &MFI =
	FirstLdSt.getParent()->getParent()->getFrameInfo();
	return shouldClusterFI(MFI, BaseOp1.getIndex(), Offset1, FirstOpc,
	BaseOp2.getIndex(), Offset2, SecondOpc);
	}

	assert(Offset1 <= Offset2 && "Caller should have ordered offsets.");

	return Offset1 + 1 == Offset2;
	}

	static const MachineInstrBuilder &AddSubReg(const MachineInstrBuilder &MIB,
	unsigned Reg, unsigned SubIdx,
	unsigned State,
	const TargetRegisterInfo *TRI) {
	if (!SubIdx)
	return MIB.addReg(Reg, State);

	if (Register::isPhysicalRegister(Reg))
	return MIB.addReg(TRI->getSubReg(Reg, SubIdx), State);
	return MIB.addReg(Reg, State, SubIdx);
	}

	static bool forwardCopyWillClobberTuple(unsigned DestReg, unsigned SrcReg,
	unsigned NumRegs) {
	// We really want the positive remainder mod 32 here, that happens to be
	// easily obtainable with a mask.
	return ((DestReg - SrcReg) & 0x1f) < NumRegs;
	}

	void AArch64InstrInfo::copyPhysRegTuple(MachineBasicBlock &MBB,
	MachineBasicBlock::iterator I,
	const DebugLoc &DL, MCRegister DestReg,
	MCRegister SrcReg, bool KillSrc,
	unsigned Opcode,
	ArrayRef<unsigned> Indices) const {
	assert(Subtarget.hasNEON() && "Unexpected register copy without NEON");
	const TargetRegisterInfo *TRI = &getRegisterInfo();
	uint16_t DestEncoding = TRI->getEncodingValue(DestReg);
	uint16_t SrcEncoding = TRI->getEncodingValue(SrcReg);
	unsigned NumRegs = Indices.size();

	int SubReg = 0, End = NumRegs, Incr = 1;
	if (forwardCopyWillClobberTuple(DestEncoding, SrcEncoding, NumRegs)) {
	SubReg = NumRegs - 1;
	End = -1;
	Incr = -1;
	}

	for (; SubReg != End; SubReg += Incr) {
	const MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opcode));
	AddSubReg(MIB, DestReg, Indices[SubReg], RegState::Define, TRI);
	AddSubReg(MIB, SrcReg, Indices[SubReg], 0, TRI);
	AddSubReg(MIB, SrcReg, Indices[SubReg], getKillRegState(KillSrc), TRI);
	}
	}

	void AArch64InstrInfo::copyGPRRegTuple(MachineBasicBlock &MBB,
	MachineBasicBlock::iterator I,
	DebugLoc DL, unsigned DestReg,
	unsigned SrcReg, bool KillSrc,
	unsigned Opcode, unsigned ZeroReg,
	llvm::ArrayRef<unsigned> Indices) const {
	const TargetRegisterInfo *TRI = &getRegisterInfo();
	unsigned NumRegs = Indices.size();

	#ifndef NDEBUG
	uint16_t DestEncoding = TRI->getEncodingValue(DestReg);
	uint16_t SrcEncoding = TRI->getEncodingValue(SrcReg);
	assert(DestEncoding % NumRegs == 0 && SrcEncoding % NumRegs == 0 &&
	"GPR reg sequences should not be able to overlap");
	#endif

	for (unsigned SubReg = 0; SubReg != NumRegs; ++SubReg) {
	const MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opcode));
	AddSubReg(MIB, DestReg, Indices[SubReg], RegState::Define, TRI);
	MIB.addReg(ZeroReg);
	AddSubReg(MIB, SrcReg, Indices[SubReg], getKillRegState(KillSrc), TRI);
	MIB.addImm(0);
	}
	}

	void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
	MachineBasicBlock::iterator I,
	const DebugLoc &DL, MCRegister DestReg,
	MCRegister SrcReg, bool KillSrc) const {
	if (AArch64::GPR32spRegClass.contains(DestReg) &&
	(AArch64::GPR32spRegClass.contains(SrcReg) \|\| SrcReg == AArch64::WZR)) {
	const TargetRegisterInfo *TRI = &getRegisterInfo();

	if (DestReg == AArch64::WSP \|\| SrcReg == AArch64::WSP) {
	// If either operand is WSP, expand to ADD #0.
	if (Subtarget.hasZeroCycleRegMove()) {
	// Cyclone recognizes "ADD Xd, Xn, #0" as a zero-cycle register move.
	MCRegister DestRegX = TRI->getMatchingSuperReg(
	DestReg, AArch64::sub_32, &AArch64::GPR64spRegClass);
	MCRegister SrcRegX = TRI->getMatchingSuperReg(
	SrcReg, AArch64::sub_32, &AArch64::GPR64spRegClass);
	// This instruction is reading and writing X registers. This may upset
	// the register scavenger and machine verifier, so we need to indicate
	// that we are reading an undefined value from SrcRegX, but a proper
	// value from SrcReg.
	BuildMI(MBB, I, DL, get(AArch64::ADDXri), DestRegX)
	.addReg(SrcRegX, RegState::Undef)
	.addImm(0)
	.addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0))
	.addReg(SrcReg, RegState::Implicit \| getKillRegState(KillSrc));
	} else {
	BuildMI(MBB, I, DL, get(AArch64::ADDWri), DestReg)
	.addReg(SrcReg, getKillRegState(KillSrc))
	.addImm(0)
	.addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0));
	}
	} else if (SrcReg == AArch64::WZR && Subtarget.hasZeroCycleZeroingGP()) {
	BuildMI(MBB, I, DL, get(AArch64::MOVZWi), DestReg)
	.addImm(0)
	.addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0));
	} else {
	if (Subtarget.hasZeroCycleRegMove()) {
	// Cyclone recognizes "ORR Xd, XZR, Xm" as a zero-cycle register move.
	MCRegister DestRegX = TRI->getMatchingSuperReg(
	DestReg, AArch64::sub_32, &AArch64::GPR64spRegClass);
	MCRegister SrcRegX = TRI->getMatchingSuperReg(
	SrcReg, AArch64::sub_32, &AArch64::GPR64spRegClass);
	// This instruction is reading and writing X registers. This may upset
	// the register scavenger and machine verifier, so we need to indicate
	// that we are reading an undefined value from SrcRegX, but a proper
	// value from SrcReg.
	BuildMI(MBB, I, DL, get(AArch64::ORRXrr), DestRegX)
	.addReg(AArch64::XZR)
	.addReg(SrcRegX, RegState::Undef)
	.addReg(SrcReg, RegState::Implicit \| getKillRegState(KillSrc));
	} else {
	// Otherwise, expand to ORR WZR.
	BuildMI(MBB, I, DL, get(AArch64::ORRWrr), DestReg)
	.addReg(AArch64::WZR)
	.addReg(SrcReg, getKillRegState(KillSrc));
	}
	}
	return;
	}

	// Copy a Predicate register by ORRing with itself.
	if (AArch64::PPRRegClass.contains(DestReg) &&
	AArch64::PPRRegClass.contains(SrcReg)) {
	assert(Subtarget.isSVEorStreamingSVEAvailable() &&
	"Unexpected SVE register.");
	BuildMI(MBB, I, DL, get(AArch64::ORR_PPzPP), DestReg)
	.addReg(SrcReg) // Pg
	.addReg(SrcReg)
	.addReg(SrcReg, getKillRegState(KillSrc));
	return;
	}

	// Copy a predicate-as-counter register by ORRing with itself as if it
	// were a regular predicate (mask) register.
	bool DestIsPNR = AArch64::PNRRegClass.contains(DestReg);
	bool SrcIsPNR = AArch64::PNRRegClass.contains(SrcReg);
	if (DestIsPNR \|\| SrcIsPNR) {
	auto ToPPR = [](MCRegister R) -> MCRegister {
	return (R - AArch64::PN0) + AArch64::P0;
	};
	MCRegister PPRSrcReg = SrcIsPNR ? ToPPR(SrcReg) : SrcReg;
	MCRegister PPRDestReg = DestIsPNR ? ToPPR(DestReg) : DestReg;

	if (PPRSrcReg != PPRDestReg) {
	auto NewMI = BuildMI(MBB, I, DL, get(AArch64::ORR_PPzPP), PPRDestReg)
	.addReg(PPRSrcReg) // Pg
	.addReg(PPRSrcReg)
	.addReg(PPRSrcReg, getKillRegState(KillSrc));
	if (DestIsPNR)
	NewMI.addDef(DestReg, RegState::Implicit);
	}
	return;
	}

	// Copy a Z register by ORRing with itself.
	if (AArch64::ZPRRegClass.contains(DestReg) &&
	AArch64::ZPRRegClass.contains(SrcReg)) {
	assert(Subtarget.isSVEorStreamingSVEAvailable() &&
	"Unexpected SVE register.");
	BuildMI(MBB, I, DL, get(AArch64::ORR_ZZZ), DestReg)
	.addReg(SrcReg)
	.addReg(SrcReg, getKillRegState(KillSrc));
	return;
	}

	// Copy a Z register pair by copying the individual sub-registers.
	if ((AArch64::ZPR2RegClass.contains(DestReg) \|\|
	AArch64::ZPR2StridedOrContiguousRegClass.contains(DestReg)) &&
	(AArch64::ZPR2RegClass.contains(SrcReg) \|\|
	AArch64::ZPR2StridedOrContiguousRegClass.contains(SrcReg))) {
	assert(Subtarget.isSVEorStreamingSVEAvailable() &&
	"Unexpected SVE register.");
	static const unsigned Indices[] = {AArch64::zsub0, AArch64::zsub1};
	copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORR_ZZZ,
	Indices);
	return;
	}

	// Copy a Z register triple by copying the individual sub-registers.
	if (AArch64::ZPR3RegClass.contains(DestReg) &&
	AArch64::ZPR3RegClass.contains(SrcReg)) {
	assert(Subtarget.isSVEorStreamingSVEAvailable() &&
	"Unexpected SVE register.");
	static const unsigned Indices[] = {AArch64::zsub0, AArch64::zsub1,
	AArch64::zsub2};
	copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORR_ZZZ,
	Indices);
	return;
	}

	// Copy a Z register quad by copying the individual sub-registers.
	if ((AArch64::ZPR4RegClass.contains(DestReg) \|\|
	AArch64::ZPR4StridedOrContiguousRegClass.contains(DestReg)) &&
	(AArch64::ZPR4RegClass.contains(SrcReg) \|\|
	AArch64::ZPR4StridedOrContiguousRegClass.contains(SrcReg))) {
	assert(Subtarget.isSVEorStreamingSVEAvailable() &&
	"Unexpected SVE register.");
	static const unsigned Indices[] = {AArch64::zsub0, AArch64::zsub1,
	AArch64::zsub2, AArch64::zsub3};
	copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORR_ZZZ,
	Indices);
	return;
	}

	if (AArch64::GPR64spRegClass.contains(DestReg) &&
	(AArch64::GPR64spRegClass.contains(SrcReg) \|\| SrcReg == AArch64::XZR)) {
	if (DestReg == AArch64::SP \|\| SrcReg == AArch64::SP) {
	// If either operand is SP, expand to ADD #0.
	BuildMI(MBB, I, DL, get(AArch64::ADDXri), DestReg)
	.addReg(SrcReg, getKillRegState(KillSrc))
	.addImm(0)
	.addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0));
	} else if (SrcReg == AArch64::XZR && Subtarget.hasZeroCycleZeroingGP()) {
	BuildMI(MBB, I, DL, get(AArch64::MOVZXi), DestReg)
	.addImm(0)
	.addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0));
	} else {
	// Otherwise, expand to ORR XZR.
	BuildMI(MBB, I, DL, get(AArch64::ORRXrr), DestReg)
	.addReg(AArch64::XZR)
	.addReg(SrcReg, getKillRegState(KillSrc));
	}
	return;
	}

	// Copy a DDDD register quad by copying the individual sub-registers.
	if (AArch64::DDDDRegClass.contains(DestReg) &&
	AArch64::DDDDRegClass.contains(SrcReg)) {
	static const unsigned Indices[] = {AArch64::dsub0, AArch64::dsub1,
	AArch64::dsub2, AArch64::dsub3};
	copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8,
	Indices);
	return;
	}

	// Copy a DDD register triple by copying the individual sub-registers.
	if (AArch64::DDDRegClass.contains(DestReg) &&
	AArch64::DDDRegClass.contains(SrcReg)) {
	static const unsigned Indices[] = {AArch64::dsub0, AArch64::dsub1,
	AArch64::dsub2};
	copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8,
	Indices);
	return;
	}

	// Copy a DD register pair by copying the individual sub-registers.
	if (AArch64::DDRegClass.contains(DestReg) &&
	AArch64::DDRegClass.contains(SrcReg)) {
	static const unsigned Indices[] = {AArch64::dsub0, AArch64::dsub1};
	copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8,
	Indices);
	return;
	}

	// Copy a QQQQ register quad by copying the individual sub-registers.
	if (AArch64::QQQQRegClass.contains(DestReg) &&
	AArch64::QQQQRegClass.contains(SrcReg)) {
	static const unsigned Indices[] = {AArch64::qsub0, AArch64::qsub1,
	AArch64::qsub2, AArch64::qsub3};
	copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8,
	Indices);
	return;
	}

	// Copy a QQQ register triple by copying the individual sub-registers.
	if (AArch64::QQQRegClass.contains(DestReg) &&
	AArch64::QQQRegClass.contains(SrcReg)) {
	static const unsigned Indices[] = {AArch64::qsub0, AArch64::qsub1,
	AArch64::qsub2};
	copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8,
	Indices);
	return;
	}

	// Copy a QQ register pair by copying the individual sub-registers.
	if (AArch64::QQRegClass.contains(DestReg) &&
	AArch64::QQRegClass.contains(SrcReg)) {
	static const unsigned Indices[] = {AArch64::qsub0, AArch64::qsub1};
	copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8,
	Indices);
	return;
	}

	if (AArch64::XSeqPairsClassRegClass.contains(DestReg) &&
	AArch64::XSeqPairsClassRegClass.contains(SrcReg)) {
	static const unsigned Indices[] = {AArch64::sube64, AArch64::subo64};
	copyGPRRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRXrs,
	AArch64::XZR, Indices);
	return;
	}

	if (AArch64::WSeqPairsClassRegClass.contains(DestReg) &&
	AArch64::WSeqPairsClassRegClass.contains(SrcReg)) {
	static const unsigned Indices[] = {AArch64::sube32, AArch64::subo32};
	copyGPRRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRWrs,
	AArch64::WZR, Indices);
	return;
	}

	if (AArch64::FPR128RegClass.contains(DestReg) &&
	AArch64::FPR128RegClass.contains(SrcReg)) {
	if (Subtarget.isSVEorStreamingSVEAvailable() &&
	!Subtarget.isNeonAvailable())
	BuildMI(MBB, I, DL, get(AArch64::ORR_ZZZ))
	.addReg(AArch64::Z0 + (DestReg - AArch64::Q0), RegState::Define)
	.addReg(AArch64::Z0 + (SrcReg - AArch64::Q0))
	.addReg(AArch64::Z0 + (SrcReg - AArch64::Q0));
	else if (Subtarget.isNeonAvailable())
	BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg)
	.addReg(SrcReg)
	.addReg(SrcReg, getKillRegState(KillSrc));
	else {
	BuildMI(MBB, I, DL, get(AArch64::STRQpre))
	.addReg(AArch64::SP, RegState::Define)
	.addReg(SrcReg, getKillRegState(KillSrc))
	.addReg(AArch64::SP)
	.addImm(-16);
	BuildMI(MBB, I, DL, get(AArch64::LDRQpost))
	.addReg(AArch64::SP, RegState::Define)
	.addReg(DestReg, RegState::Define)
	.addReg(AArch64::SP)
	.addImm(16);
	}
	return;
	}

	if (AArch64::FPR64RegClass.contains(DestReg) &&
	AArch64::FPR64RegClass.contains(SrcReg)) {
	BuildMI(MBB, I, DL, get(AArch64::FMOVDr), DestReg)
	.addReg(SrcReg, getKillRegState(KillSrc));
	return;
	}

	if (AArch64::FPR32RegClass.contains(DestReg) &&
	AArch64::FPR32RegClass.contains(SrcReg)) {
	BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg)
	.addReg(SrcReg, getKillRegState(KillSrc));
	return;
	}

	if (AArch64::FPR16RegClass.contains(DestReg) &&
	AArch64::FPR16RegClass.contains(SrcReg)) {
	DestReg =
	RI.getMatchingSuperReg(DestReg, AArch64::hsub, &AArch64::FPR32RegClass);
	SrcReg =
	RI.getMatchingSuperReg(SrcReg, AArch64::hsub, &AArch64::FPR32RegClass);
	BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg)
	.addReg(SrcReg, getKillRegState(KillSrc));
	return;
	}

	if (AArch64::FPR8RegClass.contains(DestReg) &&
	AArch64::FPR8RegClass.contains(SrcReg)) {
	DestReg =
	RI.getMatchingSuperReg(DestReg, AArch64::bsub, &AArch64::FPR32RegClass);
	SrcReg =
	RI.getMatchingSuperReg(SrcReg, AArch64::bsub, &AArch64::FPR32RegClass);
	BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg)
	.addReg(SrcReg, getKillRegState(KillSrc));
	return;
	}

	// Copies between GPR64 and FPR64.
	if (AArch64::FPR64RegClass.contains(DestReg) &&
	AArch64::GPR64RegClass.contains(SrcReg)) {
	BuildMI(MBB, I, DL, get(AArch64::FMOVXDr), DestReg)
	.addReg(SrcReg, getKillRegState(KillSrc));
	return;
	}
	if (AArch64::GPR64RegClass.contains(DestReg) &&
	AArch64::FPR64RegClass.contains(SrcReg)) {
	BuildMI(MBB, I, DL, get(AArch64::FMOVDXr), DestReg)
	.addReg(SrcReg, getKillRegState(KillSrc));
	return;
	}
	// Copies between GPR32 and FPR32.
	if (AArch64::FPR32RegClass.contains(DestReg) &&
	AArch64::GPR32RegClass.contains(SrcReg)) {
	BuildMI(MBB, I, DL, get(AArch64::FMOVWSr), DestReg)
	.addReg(SrcReg, getKillRegState(KillSrc));
	return;
	}
	if (AArch64::GPR32RegClass.contains(DestReg) &&
	AArch64::FPR32RegClass.contains(SrcReg)) {
	BuildMI(MBB, I, DL, get(AArch64::FMOVSWr), DestReg)
	.addReg(SrcReg, getKillRegState(KillSrc));
	return;
	}

	if (DestReg == AArch64::NZCV) {
	assert(AArch64::GPR64RegClass.contains(SrcReg) && "Invalid NZCV copy");
	BuildMI(MBB, I, DL, get(AArch64::MSR))
	.addImm(AArch64SysReg::NZCV)
	.addReg(SrcReg, getKillRegState(KillSrc))
	.addReg(AArch64::NZCV, RegState::Implicit \| RegState::Define);
	return;
	}

	if (SrcReg == AArch64::NZCV) {
	assert(AArch64::GPR64RegClass.contains(DestReg) && "Invalid NZCV copy");
	BuildMI(MBB, I, DL, get(AArch64::MRS), DestReg)
	.addImm(AArch64SysReg::NZCV)
	.addReg(AArch64::NZCV, RegState::Implicit \| getKillRegState(KillSrc));
	return;
	}

	#ifndef NDEBUG
	const TargetRegisterInfo &TRI = getRegisterInfo();
	errs() << TRI.getRegAsmName(DestReg) << " = COPY "
	<< TRI.getRegAsmName(SrcReg) << "\n";
	#endif
	llvm_unreachable("unimplemented reg-to-reg copy");
	}

	static void storeRegPairToStackSlot(const TargetRegisterInfo &TRI,
	MachineBasicBlock &MBB,
	MachineBasicBlock::iterator InsertBefore,
	const MCInstrDesc &MCID,
	Register SrcReg, bool IsKill,
	unsigned SubIdx0, unsigned SubIdx1, int FI,
	MachineMemOperand *MMO) {
	Register SrcReg0 = SrcReg;
	Register SrcReg1 = SrcReg;
	if (SrcReg.isPhysical()) {
	SrcReg0 = TRI.getSubReg(SrcReg, SubIdx0);
	SubIdx0 = 0;
	SrcReg1 = TRI.getSubReg(SrcReg, SubIdx1);
	SubIdx1 = 0;
	}
	BuildMI(MBB, InsertBefore, DebugLoc(), MCID)
	.addReg(SrcReg0, getKillRegState(IsKill), SubIdx0)
	.addReg(SrcReg1, getKillRegState(IsKill), SubIdx1)
	.addFrameIndex(FI)
	.addImm(0)
	.addMemOperand(MMO);
	}

	void AArch64InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
	MachineBasicBlock::iterator MBBI,
	Register SrcReg, bool isKill, int FI,
	const TargetRegisterClass *RC,
	const TargetRegisterInfo *TRI,
	Register VReg) const {
	MachineFunction &MF = *MBB.getParent();
	MachineFrameInfo &MFI = MF.getFrameInfo();

	MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(MF, FI);
	MachineMemOperand *MMO =
	MF.getMachineMemOperand(PtrInfo, MachineMemOperand::MOStore,
	MFI.getObjectSize(FI), MFI.getObjectAlign(FI));
	unsigned Opc = 0;
	bool Offset = true;
	MCRegister PNRReg = MCRegister::NoRegister;
	unsigned StackID = TargetStackID::Default;
	switch (TRI->getSpillSize(*RC)) {
	case 1:
	if (AArch64::FPR8RegClass.hasSubClassEq(RC))
	Opc = AArch64::STRBui;
	break;
	case 2: {
	if (AArch64::FPR16RegClass.hasSubClassEq(RC))
	Opc = AArch64::STRHui;
	else if (AArch64::PNRRegClass.hasSubClassEq(RC) \|\|
	AArch64::PPRRegClass.hasSubClassEq(RC)) {
	assert(Subtarget.isSVEorStreamingSVEAvailable() &&
	"Unexpected register store without SVE store instructions");
	Opc = AArch64::STR_PXI;
	StackID = TargetStackID::ScalableVector;
	}
	break;
	}
	case 4:
	if (AArch64::GPR32allRegClass.hasSubClassEq(RC)) {
	Opc = AArch64::STRWui;
	if (SrcReg.isVirtual())
	MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR32RegClass);
	else
	assert(SrcReg != AArch64::WSP);
	} else if (AArch64::FPR32RegClass.hasSubClassEq(RC))
	Opc = AArch64::STRSui;
	else if (AArch64::PPR2RegClass.hasSubClassEq(RC)) {
	Opc = AArch64::STR_PPXI;
	StackID = TargetStackID::ScalableVector;
	}
	break;
	case 8:
	if (AArch64::GPR64allRegClass.hasSubClassEq(RC)) {
	Opc = AArch64::STRXui;
	if (SrcReg.isVirtual())
	MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR64RegClass);
	else
	assert(SrcReg != AArch64::SP);
	} else if (AArch64::FPR64RegClass.hasSubClassEq(RC)) {
	Opc = AArch64::STRDui;
	} else if (AArch64::WSeqPairsClassRegClass.hasSubClassEq(RC)) {
	storeRegPairToStackSlot(getRegisterInfo(), MBB, MBBI,
	get(AArch64::STPWi), SrcReg, isKill,
	AArch64::sube32, AArch64::subo32, FI, MMO);
	return;
	}
	break;
	case 16:
	if (AArch64::FPR128RegClass.hasSubClassEq(RC))
	Opc = AArch64::STRQui;
	else if (AArch64::DDRegClass.hasSubClassEq(RC)) {
	assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
	Opc = AArch64::ST1Twov1d;
	Offset = false;
	} else if (AArch64::XSeqPairsClassRegClass.hasSubClassEq(RC)) {
	storeRegPairToStackSlot(getRegisterInfo(), MBB, MBBI,
	get(AArch64::STPXi), SrcReg, isKill,
	AArch64::sube64, AArch64::subo64, FI, MMO);
	return;
	} else if (AArch64::ZPRRegClass.hasSubClassEq(RC)) {
	assert(Subtarget.isSVEorStreamingSVEAvailable() &&
	"Unexpected register store without SVE store instructions");
	Opc = AArch64::STR_ZXI;
	StackID = TargetStackID::ScalableVector;
	}
	break;
	case 24:
	if (AArch64::DDDRegClass.hasSubClassEq(RC)) {
	assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
	Opc = AArch64::ST1Threev1d;
	Offset = false;
	}
	break;
	case 32:
	if (AArch64::DDDDRegClass.hasSubClassEq(RC)) {
	assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
	Opc = AArch64::ST1Fourv1d;
	Offset = false;
	} else if (AArch64::QQRegClass.hasSubClassEq(RC)) {
	assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
	Opc = AArch64::ST1Twov2d;
	Offset = false;
	} else if (AArch64::ZPR2RegClass.hasSubClassEq(RC) \|\|
	AArch64::ZPR2StridedOrContiguousRegClass.hasSubClassEq(RC)) {
	assert(Subtarget.isSVEorStreamingSVEAvailable() &&
	"Unexpected register store without SVE store instructions");
	Opc = AArch64::STR_ZZXI;
	StackID = TargetStackID::ScalableVector;
	}
	break;
	case 48:
	if (AArch64::QQQRegClass.hasSubClassEq(RC)) {
	assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
	Opc = AArch64::ST1Threev2d;
	Offset = false;
	} else if (AArch64::ZPR3RegClass.hasSubClassEq(RC)) {
	assert(Subtarget.isSVEorStreamingSVEAvailable() &&
	"Unexpected register store without SVE store instructions");
	Opc = AArch64::STR_ZZZXI;
	StackID = TargetStackID::ScalableVector;
	}
	break;
	case 64:
	if (AArch64::QQQQRegClass.hasSubClassEq(RC)) {
	assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
	Opc = AArch64::ST1Fourv2d;
	Offset = false;
	} else if (AArch64::ZPR4RegClass.hasSubClassEq(RC) \|\|
	AArch64::ZPR4StridedOrContiguousRegClass.hasSubClassEq(RC)) {
	assert(Subtarget.isSVEorStreamingSVEAvailable() &&
	"Unexpected register store without SVE store instructions");
	Opc = AArch64::STR_ZZZZXI;
	StackID = TargetStackID::ScalableVector;
	}
	break;
	}
	assert(Opc && "Unknown register class");
	MFI.setStackID(FI, StackID);

	const MachineInstrBuilder MI = BuildMI(MBB, MBBI, DebugLoc(), get(Opc))
	.addReg(SrcReg, getKillRegState(isKill))
	.addFrameIndex(FI);

	if (Offset)
	MI.addImm(0);
	if (PNRReg.isValid())
	MI.addDef(PNRReg, RegState::Implicit);
	MI.addMemOperand(MMO);
	}

	static void loadRegPairFromStackSlot(const TargetRegisterInfo &TRI,
	MachineBasicBlock &MBB,
	MachineBasicBlock::iterator InsertBefore,
	const MCInstrDesc &MCID,
	Register DestReg, unsigned SubIdx0,
	unsigned SubIdx1, int FI,
	MachineMemOperand *MMO) {
	Register DestReg0 = DestReg;
	Register DestReg1 = DestReg;
	bool IsUndef = true;
	if (DestReg.isPhysical()) {
	DestReg0 = TRI.getSubReg(DestReg, SubIdx0);
	SubIdx0 = 0;
	DestReg1 = TRI.getSubReg(DestReg, SubIdx1);
	SubIdx1 = 0;
	IsUndef = false;
	}
	BuildMI(MBB, InsertBefore, DebugLoc(), MCID)
	.addReg(DestReg0, RegState::Define \| getUndefRegState(IsUndef), SubIdx0)
	.addReg(DestReg1, RegState::Define \| getUndefRegState(IsUndef), SubIdx1)
	.addFrameIndex(FI)
	.addImm(0)
	.addMemOperand(MMO);
	}

	void AArch64InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
	MachineBasicBlock::iterator MBBI,
	Register DestReg, int FI,
	const TargetRegisterClass *RC,
	const TargetRegisterInfo *TRI,
	Register VReg) const {
	MachineFunction &MF = *MBB.getParent();
	MachineFrameInfo &MFI = MF.getFrameInfo();
	MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(MF, FI);
	MachineMemOperand *MMO =
	MF.getMachineMemOperand(PtrInfo, MachineMemOperand::MOLoad,
	MFI.getObjectSize(FI), MFI.getObjectAlign(FI));

	unsigned Opc = 0;
	bool Offset = true;
	unsigned StackID = TargetStackID::Default;
	Register PNRReg = MCRegister::NoRegister;
	switch (TRI->getSpillSize(*RC)) {
	case 1:
	if (AArch64::FPR8RegClass.hasSubClassEq(RC))
	Opc = AArch64::LDRBui;
	break;
	case 2: {
	bool IsPNR = AArch64::PNRRegClass.hasSubClassEq(RC);
	if (AArch64::FPR16RegClass.hasSubClassEq(RC))
	Opc = AArch64::LDRHui;
	else if (IsPNR \|\| AArch64::PPRRegClass.hasSubClassEq(RC)) {
	assert(Subtarget.isSVEorStreamingSVEAvailable() &&
	"Unexpected register load without SVE load instructions");
	if (IsPNR)
	PNRReg = DestReg;
	Opc = AArch64::LDR_PXI;
	StackID = TargetStackID::ScalableVector;
	}
	break;
	}
	case 4:
	if (AArch64::GPR32allRegClass.hasSubClassEq(RC)) {
	Opc = AArch64::LDRWui;
	if (DestReg.isVirtual())
	MF.getRegInfo().constrainRegClass(DestReg, &AArch64::GPR32RegClass);
	else
	assert(DestReg != AArch64::WSP);
	} else if (AArch64::FPR32RegClass.hasSubClassEq(RC))
	Opc = AArch64::LDRSui;
	else if (AArch64::PPR2RegClass.hasSubClassEq(RC)) {
	Opc = AArch64::LDR_PPXI;
	StackID = TargetStackID::ScalableVector;
	}
	break;
	case 8:
	if (AArch64::GPR64allRegClass.hasSubClassEq(RC)) {
	Opc = AArch64::LDRXui;
	if (DestReg.isVirtual())
	MF.getRegInfo().constrainRegClass(DestReg, &AArch64::GPR64RegClass);
	else
	assert(DestReg != AArch64::SP);
	} else if (AArch64::FPR64RegClass.hasSubClassEq(RC)) {
	Opc = AArch64::LDRDui;
	} else if (AArch64::WSeqPairsClassRegClass.hasSubClassEq(RC)) {
	loadRegPairFromStackSlot(getRegisterInfo(), MBB, MBBI,
	get(AArch64::LDPWi), DestReg, AArch64::sube32,
	AArch64::subo32, FI, MMO);
	return;
	}
	break;
	case 16:
	if (AArch64::FPR128RegClass.hasSubClassEq(RC))
	Opc = AArch64::LDRQui;
	else if (AArch64::DDRegClass.hasSubClassEq(RC)) {
	assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
	Opc = AArch64::LD1Twov1d;
	Offset = false;
	} else if (AArch64::XSeqPairsClassRegClass.hasSubClassEq(RC)) {
	loadRegPairFromStackSlot(getRegisterInfo(), MBB, MBBI,
	get(AArch64::LDPXi), DestReg, AArch64::sube64,
	AArch64::subo64, FI, MMO);
	return;
	} else if (AArch64::ZPRRegClass.hasSubClassEq(RC)) {
	assert(Subtarget.isSVEorStreamingSVEAvailable() &&
	"Unexpected register load without SVE load instructions");
	Opc = AArch64::LDR_ZXI;
	StackID = TargetStackID::ScalableVector;
	}
	break;
	case 24:
	if (AArch64::DDDRegClass.hasSubClassEq(RC)) {
	assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
	Opc = AArch64::LD1Threev1d;
	Offset = false;
	}
	break;
	case 32:
	if (AArch64::DDDDRegClass.hasSubClassEq(RC)) {
	assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
	Opc = AArch64::LD1Fourv1d;
	Offset = false;
	} else if (AArch64::QQRegClass.hasSubClassEq(RC)) {
	assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
	Opc = AArch64::LD1Twov2d;
	Offset = false;
	} else if (AArch64::ZPR2RegClass.hasSubClassEq(RC) \|\|
	AArch64::ZPR2StridedOrContiguousRegClass.hasSubClassEq(RC)) {
	assert(Subtarget.isSVEorStreamingSVEAvailable() &&
	"Unexpected register load without SVE load instructions");
	Opc = AArch64::LDR_ZZXI;
	StackID = TargetStackID::ScalableVector;
	}
	break;
	case 48:
	if (AArch64::QQQRegClass.hasSubClassEq(RC)) {
	assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
	Opc = AArch64::LD1Threev2d;
	Offset = false;
	} else if (AArch64::ZPR3RegClass.hasSubClassEq(RC)) {
	assert(Subtarget.isSVEorStreamingSVEAvailable() &&
	"Unexpected register load without SVE load instructions");
	Opc = AArch64::LDR_ZZZXI;
	StackID = TargetStackID::ScalableVector;
	}
	break;
	case 64:
	if (AArch64::QQQQRegClass.hasSubClassEq(RC)) {
	assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
	Opc = AArch64::LD1Fourv2d;
	Offset = false;
	} else if (AArch64::ZPR4RegClass.hasSubClassEq(RC) \|\|
	AArch64::ZPR4StridedOrContiguousRegClass.hasSubClassEq(RC)) {
	assert(Subtarget.isSVEorStreamingSVEAvailable() &&
	"Unexpected register load without SVE load instructions");
	Opc = AArch64::LDR_ZZZZXI;
	StackID = TargetStackID::ScalableVector;
	}
	break;
	}

	assert(Opc && "Unknown register class");
	MFI.setStackID(FI, StackID);

	const MachineInstrBuilder MI = BuildMI(MBB, MBBI, DebugLoc(), get(Opc))
	.addReg(DestReg, getDefRegState(true))
	.addFrameIndex(FI);
	if (Offset)
	MI.addImm(0);
	if (PNRReg.isValid() && !PNRReg.isVirtual())
	MI.addDef(PNRReg, RegState::Implicit);
	MI.addMemOperand(MMO);
	-
	- if (PNRReg.isValid() && PNRReg.isVirtual())
	- BuildMI(MBB, MBBI, DebugLoc(), get(TargetOpcode::COPY), PNRReg)
	- .addReg(DestReg);
	}

	bool llvm::isNZCVTouchedInInstructionRange(const MachineInstr &DefMI,
	const MachineInstr &UseMI,
	const TargetRegisterInfo *TRI) {
	return any_of(instructionsWithoutDebug(std::next(DefMI.getIterator()),
	UseMI.getIterator()),
	[TRI](const MachineInstr &I) {
	return I.modifiesRegister(AArch64::NZCV, TRI) \|\|
	I.readsRegister(AArch64::NZCV, TRI);
	});
	}

	void AArch64InstrInfo::decomposeStackOffsetForDwarfOffsets(
	const StackOffset &Offset, int64_t &ByteSized, int64_t &VGSized) {
	// The smallest scalable element supported by scaled SVE addressing
	// modes are predicates, which are 2 scalable bytes in size. So the scalable
	// byte offset must always be a multiple of 2.
	assert(Offset.getScalable() % 2 == 0 && "Invalid frame offset");

	// VGSized offsets are divided by '2', because the VG register is the
	// the number of 64bit granules as opposed to 128bit vector chunks,
	// which is how the 'n' in e.g. MVT::nxv1i8 is modelled.
	// So, for a stack offset of 16 MVT::nxv1i8's, the size is n x 16 bytes.
	// VG = n * 2 and the dwarf offset must be VG * 8 bytes.
	ByteSized = Offset.getFixed();
	VGSized = Offset.getScalable() / 2;
	}

	/// Returns the offset in parts to which this frame offset can be
	/// decomposed for the purpose of describing a frame offset.
	/// For non-scalable offsets this is simply its byte size.
	void AArch64InstrInfo::decomposeStackOffsetForFrameOffsets(
	const StackOffset &Offset, int64_t &NumBytes, int64_t &NumPredicateVectors,
	int64_t &NumDataVectors) {
	// The smallest scalable element supported by scaled SVE addressing
	// modes are predicates, which are 2 scalable bytes in size. So the scalable
	// byte offset must always be a multiple of 2.
	assert(Offset.getScalable() % 2 == 0 && "Invalid frame offset");

	NumBytes = Offset.getFixed();
	NumDataVectors = 0;
	NumPredicateVectors = Offset.getScalable() / 2;
	// This method is used to get the offsets to adjust the frame offset.
	// If the function requires ADDPL to be used and needs more than two ADDPL
	// instructions, part of the offset is folded into NumDataVectors so that it
	// uses ADDVL for part of it, reducing the number of ADDPL instructions.
	if (NumPredicateVectors % 8 == 0 \|\| NumPredicateVectors < -64 \|\|
	NumPredicateVectors > 62) {
	NumDataVectors = NumPredicateVectors / 8;
	NumPredicateVectors -= NumDataVectors * 8;
	}
	}

	// Convenience function to create a DWARF expression for
	// Expr + NumBytes + NumVGScaledBytes * AArch64::VG
	static void appendVGScaledOffsetExpr(SmallVectorImpl<char> &Expr, int NumBytes,
	int NumVGScaledBytes, unsigned VG,
	llvm::raw_string_ostream &Comment) {
	uint8_t buffer[16];

	if (NumBytes) {
	Expr.push_back(dwarf::DW_OP_consts);
	Expr.append(buffer, buffer + encodeSLEB128(NumBytes, buffer));
	Expr.push_back((uint8_t)dwarf::DW_OP_plus);
	Comment << (NumBytes < 0 ? " - " : " + ") << std::abs(NumBytes);
	}

	if (NumVGScaledBytes) {
	Expr.push_back((uint8_t)dwarf::DW_OP_consts);
	Expr.append(buffer, buffer + encodeSLEB128(NumVGScaledBytes, buffer));

	Expr.push_back((uint8_t)dwarf::DW_OP_bregx);
	Expr.append(buffer, buffer + encodeULEB128(VG, buffer));
	Expr.push_back(0);

	Expr.push_back((uint8_t)dwarf::DW_OP_mul);
	Expr.push_back((uint8_t)dwarf::DW_OP_plus);

	Comment << (NumVGScaledBytes < 0 ? " - " : " + ")
	<< std::abs(NumVGScaledBytes) << " * VG";
	}
	}

	// Creates an MCCFIInstruction:
	// { DW_CFA_def_cfa_expression, ULEB128 (sizeof expr), expr }
	static MCCFIInstruction createDefCFAExpression(const TargetRegisterInfo &TRI,
	unsigned Reg,
	const StackOffset &Offset) {
	int64_t NumBytes, NumVGScaledBytes;
	AArch64InstrInfo::decomposeStackOffsetForDwarfOffsets(Offset, NumBytes,
	NumVGScaledBytes);
	std::string CommentBuffer;
	llvm::raw_string_ostream Comment(CommentBuffer);

	if (Reg == AArch64::SP)
	Comment << "sp";
	else if (Reg == AArch64::FP)
	Comment << "fp";
	else
	Comment << printReg(Reg, &TRI);

	// Build up the expression (Reg + NumBytes + NumVGScaledBytes * AArch64::VG)
	SmallString<64> Expr;
	unsigned DwarfReg = TRI.getDwarfRegNum(Reg, true);
	Expr.push_back((uint8_t)(dwarf::DW_OP_breg0 + DwarfReg));
	Expr.push_back(0);
	appendVGScaledOffsetExpr(Expr, NumBytes, NumVGScaledBytes,
	TRI.getDwarfRegNum(AArch64::VG, true), Comment);

	// Wrap this into DW_CFA_def_cfa.
	SmallString<64> DefCfaExpr;
	DefCfaExpr.push_back(dwarf::DW_CFA_def_cfa_expression);
	uint8_t buffer[16];
	DefCfaExpr.append(buffer, buffer + encodeULEB128(Expr.size(), buffer));
	DefCfaExpr.append(Expr.str());
	return MCCFIInstruction::createEscape(nullptr, DefCfaExpr.str(), SMLoc(),
	Comment.str());
	}

	MCCFIInstruction llvm::createDefCFA(const TargetRegisterInfo &TRI,
	unsigned FrameReg, unsigned Reg,
	const StackOffset &Offset,
	bool LastAdjustmentWasScalable) {
	if (Offset.getScalable())
	return createDefCFAExpression(TRI, Reg, Offset);

	if (FrameReg == Reg && !LastAdjustmentWasScalable)
	return MCCFIInstruction::cfiDefCfaOffset(nullptr, int(Offset.getFixed()));

	unsigned DwarfReg = TRI.getDwarfRegNum(Reg, true);
	return MCCFIInstruction::cfiDefCfa(nullptr, DwarfReg, (int)Offset.getFixed());
	}

	MCCFIInstruction llvm::createCFAOffset(const TargetRegisterInfo &TRI,
	unsigned Reg,
	const StackOffset &OffsetFromDefCFA) {
	int64_t NumBytes, NumVGScaledBytes;
	AArch64InstrInfo::decomposeStackOffsetForDwarfOffsets(
	OffsetFromDefCFA, NumBytes, NumVGScaledBytes);

	unsigned DwarfReg = TRI.getDwarfRegNum(Reg, true);

	// Non-scalable offsets can use DW_CFA_offset directly.
	if (!NumVGScaledBytes)
	return MCCFIInstruction::createOffset(nullptr, DwarfReg, NumBytes);

	std::string CommentBuffer;
	llvm::raw_string_ostream Comment(CommentBuffer);
	Comment << printReg(Reg, &TRI) << " @ cfa";

	// Build up expression (NumBytes + NumVGScaledBytes * AArch64::VG)
	SmallString<64> OffsetExpr;
	appendVGScaledOffsetExpr(OffsetExpr, NumBytes, NumVGScaledBytes,
	TRI.getDwarfRegNum(AArch64::VG, true), Comment);

	// Wrap this into DW_CFA_expression
	SmallString<64> CfaExpr;
	CfaExpr.push_back(dwarf::DW_CFA_expression);
	uint8_t buffer[16];
	CfaExpr.append(buffer, buffer + encodeULEB128(DwarfReg, buffer));
	CfaExpr.append(buffer, buffer + encodeULEB128(OffsetExpr.size(), buffer));
	CfaExpr.append(OffsetExpr.str());

	return MCCFIInstruction::createEscape(nullptr, CfaExpr.str(), SMLoc(),
	Comment.str());
	}

	// Helper function to emit a frame offset adjustment from a given
	// pointer (SrcReg), stored into DestReg. This function is explicit
	// in that it requires the opcode.
	static void emitFrameOffsetAdj(MachineBasicBlock &MBB,
	MachineBasicBlock::iterator MBBI,
	const DebugLoc &DL, unsigned DestReg,
	unsigned SrcReg, int64_t Offset, unsigned Opc,
	const TargetInstrInfo *TII,
	MachineInstr::MIFlag Flag, bool NeedsWinCFI,
	bool *HasWinCFI, bool EmitCFAOffset,
	StackOffset CFAOffset, unsigned FrameReg) {
	int Sign = 1;
	unsigned MaxEncoding, ShiftSize;
	switch (Opc) {
	case AArch64::ADDXri:
	case AArch64::ADDSXri:
	case AArch64::SUBXri:
	case AArch64::SUBSXri:
	MaxEncoding = 0xfff;
	ShiftSize = 12;
	break;
	case AArch64::ADDVL_XXI:
	case AArch64::ADDPL_XXI:
	case AArch64::ADDSVL_XXI:
	case AArch64::ADDSPL_XXI:
	MaxEncoding = 31;
	ShiftSize = 0;
	if (Offset < 0) {
	MaxEncoding = 32;
	Sign = -1;
	Offset = -Offset;
	}
	break;
	default:
	llvm_unreachable("Unsupported opcode");
	}

	// `Offset` can be in bytes or in "scalable bytes".
	int VScale = 1;
	if (Opc == AArch64::ADDVL_XXI \|\| Opc == AArch64::ADDSVL_XXI)
	VScale = 16;
	else if (Opc == AArch64::ADDPL_XXI \|\| Opc == AArch64::ADDSPL_XXI)
	VScale = 2;

	// FIXME: If the offset won't fit in 24-bits, compute the offset into a
	// scratch register. If DestReg is a virtual register, use it as the
	// scratch register; otherwise, create a new virtual register (to be
	// replaced by the scavenger at the end of PEI). That case can be optimized
	// slightly if DestReg is SP which is always 16-byte aligned, so the scratch
	// register can be loaded with offset%8 and the add/sub can use an extending
	// instruction with LSL#3.
	// Currently the function handles any offsets but generates a poor sequence
	// of code.
	// assert(Offset < (1 << 24) && "unimplemented reg plus immediate");

	const unsigned MaxEncodableValue = MaxEncoding << ShiftSize;
	Register TmpReg = DestReg;
	if (TmpReg == AArch64::XZR)
	TmpReg = MBB.getParent()->getRegInfo().createVirtualRegister(
	&AArch64::GPR64RegClass);
	do {
	uint64_t ThisVal = std::min<uint64_t>(Offset, MaxEncodableValue);
	unsigned LocalShiftSize = 0;
	if (ThisVal > MaxEncoding) {
	ThisVal = ThisVal >> ShiftSize;
	LocalShiftSize = ShiftSize;
	}
	assert((ThisVal >> ShiftSize) <= MaxEncoding &&
	"Encoding cannot handle value that big");

	Offset -= ThisVal << LocalShiftSize;
	if (Offset == 0)
	TmpReg = DestReg;
	auto MBI = BuildMI(MBB, MBBI, DL, TII->get(Opc), TmpReg)
	.addReg(SrcReg)
	.addImm(Sign * (int)ThisVal);
	if (ShiftSize)
	MBI = MBI.addImm(
	AArch64_AM::getShifterImm(AArch64_AM::LSL, LocalShiftSize));
	MBI = MBI.setMIFlag(Flag);

	auto Change =
	VScale == 1
	? StackOffset::getFixed(ThisVal << LocalShiftSize)
	: StackOffset::getScalable(VScale * (ThisVal << LocalShiftSize));
	if (Sign == -1 \|\| Opc == AArch64::SUBXri \|\| Opc == AArch64::SUBSXri)
	CFAOffset += Change;
	else
	CFAOffset -= Change;
	if (EmitCFAOffset && DestReg == TmpReg) {
	MachineFunction &MF = *MBB.getParent();
	const TargetSubtargetInfo &STI = MF.getSubtarget();
	const TargetRegisterInfo &TRI = *STI.getRegisterInfo();

	unsigned CFIIndex = MF.addFrameInst(
	createDefCFA(TRI, FrameReg, DestReg, CFAOffset, VScale != 1));
	BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
	.addCFIIndex(CFIIndex)
	.setMIFlags(Flag);
	}

	if (NeedsWinCFI) {
	assert(Sign == 1 && "SEH directives should always have a positive sign");
	int Imm = (int)(ThisVal << LocalShiftSize);
	if ((DestReg == AArch64::FP && SrcReg == AArch64::SP) \|\|
	(SrcReg == AArch64::FP && DestReg == AArch64::SP)) {
	if (HasWinCFI)
	*HasWinCFI = true;
	if (Imm == 0)
	BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_SetFP)).setMIFlag(Flag);
	else
	BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_AddFP))
	.addImm(Imm)
	.setMIFlag(Flag);
	assert(Offset == 0 && "Expected remaining offset to be zero to "
	"emit a single SEH directive");
	} else if (DestReg == AArch64::SP) {
	if (HasWinCFI)
	*HasWinCFI = true;
	assert(SrcReg == AArch64::SP && "Unexpected SrcReg for SEH_StackAlloc");
	BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_StackAlloc))
	.addImm(Imm)
	.setMIFlag(Flag);
	}
	}

	SrcReg = TmpReg;
	} while (Offset);
	}

	void llvm::emitFrameOffset(MachineBasicBlock &MBB,
	MachineBasicBlock::iterator MBBI, const DebugLoc &DL,
	unsigned DestReg, unsigned SrcReg,
	StackOffset Offset, const TargetInstrInfo *TII,
	MachineInstr::MIFlag Flag, bool SetNZCV,
	bool NeedsWinCFI, bool *HasWinCFI,
	bool EmitCFAOffset, StackOffset CFAOffset,
	unsigned FrameReg) {
	// If a function is marked as arm_locally_streaming, then the runtime value of
	// vscale in the prologue/epilogue is different the runtime value of vscale
	// in the function's body. To avoid having to consider multiple vscales,
	// we can use `addsvl` to allocate any scalable stack-slots, which under
	// most circumstances will be only locals, not callee-save slots.
	const Function &F = MBB.getParent()->getFunction();
	bool UseSVL = F.hasFnAttribute("aarch64_pstate_sm_body");

	int64_t Bytes, NumPredicateVectors, NumDataVectors;
	AArch64InstrInfo::decomposeStackOffsetForFrameOffsets(
	Offset, Bytes, NumPredicateVectors, NumDataVectors);

	// First emit non-scalable frame offsets, or a simple 'mov'.
	if (Bytes \|\| (!Offset && SrcReg != DestReg)) {
	assert((DestReg != AArch64::SP \|\| Bytes % 8 == 0) &&
	"SP increment/decrement not 8-byte aligned");
	unsigned Opc = SetNZCV ? AArch64::ADDSXri : AArch64::ADDXri;
	if (Bytes < 0) {
	Bytes = -Bytes;
	Opc = SetNZCV ? AArch64::SUBSXri : AArch64::SUBXri;
	}
	emitFrameOffsetAdj(MBB, MBBI, DL, DestReg, SrcReg, Bytes, Opc, TII, Flag,
	NeedsWinCFI, HasWinCFI, EmitCFAOffset, CFAOffset,
	FrameReg);
	CFAOffset += (Opc == AArch64::ADDXri \|\| Opc == AArch64::ADDSXri)
	? StackOffset::getFixed(-Bytes)
	: StackOffset::getFixed(Bytes);
	SrcReg = DestReg;
	FrameReg = DestReg;
	}

	assert(!(SetNZCV && (NumPredicateVectors \|\| NumDataVectors)) &&
	"SetNZCV not supported with SVE vectors");
	assert(!(NeedsWinCFI && (NumPredicateVectors \|\| NumDataVectors)) &&
	"WinCFI not supported with SVE vectors");

	if (NumDataVectors) {
	emitFrameOffsetAdj(MBB, MBBI, DL, DestReg, SrcReg, NumDataVectors,
	UseSVL ? AArch64::ADDSVL_XXI : AArch64::ADDVL_XXI,
	TII, Flag, NeedsWinCFI, nullptr, EmitCFAOffset,
	CFAOffset, FrameReg);
	CFAOffset += StackOffset::getScalable(-NumDataVectors * 16);
	SrcReg = DestReg;
	}

	if (NumPredicateVectors) {
	assert(DestReg != AArch64::SP && "Unaligned access to SP");
	emitFrameOffsetAdj(MBB, MBBI, DL, DestReg, SrcReg, NumPredicateVectors,
	UseSVL ? AArch64::ADDSPL_XXI : AArch64::ADDPL_XXI,
	TII, Flag, NeedsWinCFI, nullptr, EmitCFAOffset,
	CFAOffset, FrameReg);
	}
	}

	MachineInstr *AArch64InstrInfo::foldMemoryOperandImpl(
	MachineFunction &MF, MachineInstr &MI, ArrayRef<unsigned> Ops,
	MachineBasicBlock::iterator InsertPt, int FrameIndex,
	LiveIntervals LIS, VirtRegMap VRM) const {
	// This is a bit of a hack. Consider this instruction:
	//
	// %0 = COPY %sp; GPR64all:%0
	//
	// We explicitly chose GPR64all for the virtual register so such a copy might
	// be eliminated by RegisterCoalescer. However, that may not be possible, and
	// %0 may even spill. We can't spill %sp, and since it is in the GPR64all
	// register class, TargetInstrInfo::foldMemoryOperand() is going to try.
	//
	// To prevent that, we are going to constrain the %0 register class here.
	if (MI.isFullCopy()) {
	Register DstReg = MI.getOperand(0).getReg();
	Register SrcReg = MI.getOperand(1).getReg();
	if (SrcReg == AArch64::SP && DstReg.isVirtual()) {
	MF.getRegInfo().constrainRegClass(DstReg, &AArch64::GPR64RegClass);
	return nullptr;
	}
	if (DstReg == AArch64::SP && SrcReg.isVirtual()) {
	MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR64RegClass);
	return nullptr;
	}
	// Nothing can folded with copy from/to NZCV.
	if (SrcReg == AArch64::NZCV \|\| DstReg == AArch64::NZCV)
	return nullptr;
	}

	// Handle the case where a copy is being spilled or filled but the source
	// and destination register class don't match. For example:
	//
	// %0 = COPY %xzr; GPR64common:%0
	//
	// In this case we can still safely fold away the COPY and generate the
	// following spill code:
	//
	// STRXui %xzr, %stack.0
	//
	// This also eliminates spilled cross register class COPYs (e.g. between x and
	// d regs) of the same size. For example:
	//
	// %0 = COPY %1; GPR64:%0, FPR64:%1
	//
	// will be filled as
	//
	// LDRDui %0, fi<#0>
	//
	// instead of
	//
	// LDRXui %Temp, fi<#0>
	// %0 = FMOV %Temp
	//
	if (MI.isCopy() && Ops.size() == 1 &&
	// Make sure we're only folding the explicit COPY defs/uses.
	(Ops[0] == 0 \|\| Ops[0] == 1)) {
	bool IsSpill = Ops[0] == 0;
	bool IsFill = !IsSpill;
	const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
	const MachineRegisterInfo &MRI = MF.getRegInfo();
	MachineBasicBlock &MBB = *MI.getParent();
	const MachineOperand &DstMO = MI.getOperand(0);
	const MachineOperand &SrcMO = MI.getOperand(1);
	Register DstReg = DstMO.getReg();
	Register SrcReg = SrcMO.getReg();
	// This is slightly expensive to compute for physical regs since
	// getMinimalPhysRegClass is slow.
	auto getRegClass = [&](unsigned Reg) {
	return Register::isVirtualRegister(Reg) ? MRI.getRegClass(Reg)
	: TRI.getMinimalPhysRegClass(Reg);
	};

	if (DstMO.getSubReg() == 0 && SrcMO.getSubReg() == 0) {
	assert(TRI.getRegSizeInBits(*getRegClass(DstReg)) ==
	TRI.getRegSizeInBits(*getRegClass(SrcReg)) &&
	"Mismatched register size in non subreg COPY");
	if (IsSpill)
	storeRegToStackSlot(MBB, InsertPt, SrcReg, SrcMO.isKill(), FrameIndex,
	getRegClass(SrcReg), &TRI, Register());
	else
	loadRegFromStackSlot(MBB, InsertPt, DstReg, FrameIndex,
	getRegClass(DstReg), &TRI, Register());
	return &*--InsertPt;
	}

	// Handle cases like spilling def of:
	//
	// %0:sub_32<def,read-undef> = COPY %wzr; GPR64common:%0
	//
	// where the physical register source can be widened and stored to the full
	// virtual reg destination stack slot, in this case producing:
	//
	// STRXui %xzr, %stack.0
	//
	if (IsSpill && DstMO.isUndef() && SrcReg == AArch64::WZR &&
	TRI.getRegSizeInBits(*getRegClass(DstReg)) == 64) {
	assert(SrcMO.getSubReg() == 0 &&
	"Unexpected subreg on physical register");
	storeRegToStackSlot(MBB, InsertPt, AArch64::XZR, SrcMO.isKill(),
	FrameIndex, &AArch64::GPR64RegClass, &TRI,
	Register());
	return &*--InsertPt;
	}

	// Handle cases like filling use of:
	//
	// %0:sub_32<def,read-undef> = COPY %1; GPR64:%0, GPR32:%1
	//
	// where we can load the full virtual reg source stack slot, into the subreg
	// destination, in this case producing:
	//
	// LDRWui %0:sub_32<def,read-undef>, %stack.0
	//
	if (IsFill && SrcMO.getSubReg() == 0 && DstMO.isUndef()) {
	const TargetRegisterClass *FillRC;
	switch (DstMO.getSubReg()) {
	default:
	FillRC = nullptr;
	break;
	case AArch64::sub_32:
	FillRC = &AArch64::GPR32RegClass;
	break;
	case AArch64::ssub:
	FillRC = &AArch64::FPR32RegClass;
	break;
	case AArch64::dsub:
	FillRC = &AArch64::FPR64RegClass;
	break;
	}

	if (FillRC) {
	assert(TRI.getRegSizeInBits(*getRegClass(SrcReg)) ==
	TRI.getRegSizeInBits(*FillRC) &&
	"Mismatched regclass size on folded subreg COPY");
	loadRegFromStackSlot(MBB, InsertPt, DstReg, FrameIndex, FillRC, &TRI,
	Register());
	MachineInstr &LoadMI = *--InsertPt;
	MachineOperand &LoadDst = LoadMI.getOperand(0);
	assert(LoadDst.getSubReg() == 0 && "unexpected subreg on fill load");
	LoadDst.setSubReg(DstMO.getSubReg());
	LoadDst.setIsUndef();
	return &LoadMI;
	}
	}
	}

	// Cannot fold.
	return nullptr;
	}

	int llvm::isAArch64FrameOffsetLegal(const MachineInstr &MI,
	StackOffset &SOffset,
	bool *OutUseUnscaledOp,
	unsigned *OutUnscaledOp,
	int64_t *EmittableOffset) {
	// Set output values in case of early exit.
	if (EmittableOffset)
	*EmittableOffset = 0;
	if (OutUseUnscaledOp)
	*OutUseUnscaledOp = false;
	if (OutUnscaledOp)
	*OutUnscaledOp = 0;

	// Exit early for structured vector spills/fills as they can't take an
	// immediate offset.
	switch (MI.getOpcode()) {
	default:
	break;
	case AArch64::LD1Rv1d:
	case AArch64::LD1Rv2s:
	case AArch64::LD1Rv2d:
	case AArch64::LD1Rv4h:
	case AArch64::LD1Rv4s:
	case AArch64::LD1Rv8b:
	case AArch64::LD1Rv8h:
	case AArch64::LD1Rv16b:
	case AArch64::LD1Twov2d:
	case AArch64::LD1Threev2d:
	case AArch64::LD1Fourv2d:
	case AArch64::LD1Twov1d:
	case AArch64::LD1Threev1d:
	case AArch64::LD1Fourv1d:
	case AArch64::ST1Twov2d:
	case AArch64::ST1Threev2d:
	case AArch64::ST1Fourv2d:
	case AArch64::ST1Twov1d:
	case AArch64::ST1Threev1d:
	case AArch64::ST1Fourv1d:
	case AArch64::ST1i8:
	case AArch64::ST1i16:
	case AArch64::ST1i32:
	case AArch64::ST1i64:
	case AArch64::IRG:
	case AArch64::IRGstack:
	case AArch64::STGloop:
	case AArch64::STZGloop:
	return AArch64FrameOffsetCannotUpdate;
	}

	// Get the min/max offset and the scale.
	TypeSize ScaleValue(0U, false), Width(0U, false);
	int64_t MinOff, MaxOff;
	if (!AArch64InstrInfo::getMemOpInfo(MI.getOpcode(), ScaleValue, Width, MinOff,
	MaxOff))
	llvm_unreachable("unhandled opcode in isAArch64FrameOffsetLegal");

	// Construct the complete offset.
	bool IsMulVL = ScaleValue.isScalable();
	unsigned Scale = ScaleValue.getKnownMinValue();
	int64_t Offset = IsMulVL ? SOffset.getScalable() : SOffset.getFixed();

	const MachineOperand &ImmOpnd =
	MI.getOperand(AArch64InstrInfo::getLoadStoreImmIdx(MI.getOpcode()));
	Offset += ImmOpnd.getImm() * Scale;

	// If the offset doesn't match the scale, we rewrite the instruction to
	// use the unscaled instruction instead. Likewise, if we have a negative
	// offset and there is an unscaled op to use.
	std::optional<unsigned> UnscaledOp =
	AArch64InstrInfo::getUnscaledLdSt(MI.getOpcode());
	bool useUnscaledOp = UnscaledOp && (Offset % Scale \|\| Offset < 0);
	if (useUnscaledOp &&
	!AArch64InstrInfo::getMemOpInfo(*UnscaledOp, ScaleValue, Width, MinOff,
	MaxOff))
	llvm_unreachable("unhandled opcode in isAArch64FrameOffsetLegal");

	Scale = ScaleValue.getKnownMinValue();
	assert(IsMulVL == ScaleValue.isScalable() &&
	"Unscaled opcode has different value for scalable");

	int64_t Remainder = Offset % Scale;
	assert(!(Remainder && useUnscaledOp) &&
	"Cannot have remainder when using unscaled op");

	assert(MinOff < MaxOff && "Unexpected Min/Max offsets");
	int64_t NewOffset = Offset / Scale;
	if (MinOff <= NewOffset && NewOffset <= MaxOff)
	Offset = Remainder;
	else {
	NewOffset = NewOffset < 0 ? MinOff : MaxOff;
	Offset = Offset - (NewOffset * Scale);
	}

	if (EmittableOffset)
	*EmittableOffset = NewOffset;
	if (OutUseUnscaledOp)
	*OutUseUnscaledOp = useUnscaledOp;
	if (OutUnscaledOp && UnscaledOp)
	OutUnscaledOp = UnscaledOp;

	if (IsMulVL)
	SOffset = StackOffset::get(SOffset.getFixed(), Offset);
	else
	SOffset = StackOffset::get(Offset, SOffset.getScalable());
	return AArch64FrameOffsetCanUpdate \|
	(SOffset ? 0 : AArch64FrameOffsetIsLegal);
	}

	bool llvm::rewriteAArch64FrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
	unsigned FrameReg, StackOffset &Offset,
	const AArch64InstrInfo *TII) {
	unsigned Opcode = MI.getOpcode();
	unsigned ImmIdx = FrameRegIdx + 1;

	if (Opcode == AArch64::ADDSXri \|\| Opcode == AArch64::ADDXri) {
	Offset += StackOffset::getFixed(MI.getOperand(ImmIdx).getImm());
	emitFrameOffset(*MI.getParent(), MI, MI.getDebugLoc(),
	MI.getOperand(0).getReg(), FrameReg, Offset, TII,
	MachineInstr::NoFlags, (Opcode == AArch64::ADDSXri));
	MI.eraseFromParent();
	Offset = StackOffset();
	return true;
	}

	int64_t NewOffset;
	unsigned UnscaledOp;
	bool UseUnscaledOp;
	int Status = isAArch64FrameOffsetLegal(MI, Offset, &UseUnscaledOp,
	&UnscaledOp, &NewOffset);
	if (Status & AArch64FrameOffsetCanUpdate) {
	if (Status & AArch64FrameOffsetIsLegal)
	// Replace the FrameIndex with FrameReg.
	MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
	if (UseUnscaledOp)
	MI.setDesc(TII->get(UnscaledOp));

	MI.getOperand(ImmIdx).ChangeToImmediate(NewOffset);
	return !Offset;
	}

	return false;
	}

	void AArch64InstrInfo::insertNoop(MachineBasicBlock &MBB,
	MachineBasicBlock::iterator MI) const {
	DebugLoc DL;
	BuildMI(MBB, MI, DL, get(AArch64::HINT)).addImm(0);
	}

	MCInst AArch64InstrInfo::getNop() const {
	return MCInstBuilder(AArch64::HINT).addImm(0);
	}

	// AArch64 supports MachineCombiner.
	bool AArch64InstrInfo::useMachineCombiner() const { return true; }

	// True when Opc sets flag
	static bool isCombineInstrSettingFlag(unsigned Opc) {
	switch (Opc) {
	case AArch64::ADDSWrr:
	case AArch64::ADDSWri:
	case AArch64::ADDSXrr:
	case AArch64::ADDSXri:
	case AArch64::SUBSWrr:
	case AArch64::SUBSXrr:
	// Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi.
	case AArch64::SUBSWri:
	case AArch64::SUBSXri:
	return true;
	default:
	break;
	}
	return false;
	}

	// 32b Opcodes that can be combined with a MUL
	static bool isCombineInstrCandidate32(unsigned Opc) {
	switch (Opc) {
	case AArch64::ADDWrr:
	case AArch64::ADDWri:
	case AArch64::SUBWrr:
	case AArch64::ADDSWrr:
	case AArch64::ADDSWri:
	case AArch64::SUBSWrr:
	// Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi.
	case AArch64::SUBWri:
	case AArch64::SUBSWri:
	return true;
	default:
	break;
	}
	return false;
	}

	// 64b Opcodes that can be combined with a MUL
	static bool isCombineInstrCandidate64(unsigned Opc) {
	switch (Opc) {
	case AArch64::ADDXrr:
	case AArch64::ADDXri:
	case AArch64::SUBXrr:
	case AArch64::ADDSXrr:
	case AArch64::ADDSXri:
	case AArch64::SUBSXrr:
	// Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi.
	case AArch64::SUBXri:
	case AArch64::SUBSXri:
	case AArch64::ADDv8i8:
	case AArch64::ADDv16i8:
	case AArch64::ADDv4i16:
	case AArch64::ADDv8i16:
	case AArch64::ADDv2i32:
	case AArch64::ADDv4i32:
	case AArch64::SUBv8i8:
	case AArch64::SUBv16i8:
	case AArch64::SUBv4i16:
	case AArch64::SUBv8i16:
	case AArch64::SUBv2i32:
	case AArch64::SUBv4i32:
	return true;
	default:
	break;
	}
	return false;
	}

	// FP Opcodes that can be combined with a FMUL.
	static bool isCombineInstrCandidateFP(const MachineInstr &Inst) {
	switch (Inst.getOpcode()) {
	default:
	break;
	case AArch64::FADDHrr:
	case AArch64::FADDSrr:
	case AArch64::FADDDrr:
	case AArch64::FADDv4f16:
	case AArch64::FADDv8f16:
	case AArch64::FADDv2f32:
	case AArch64::FADDv2f64:
	case AArch64::FADDv4f32:
	case AArch64::FSUBHrr:
	case AArch64::FSUBSrr:
	case AArch64::FSUBDrr:
	case AArch64::FSUBv4f16:
	case AArch64::FSUBv8f16:
	case AArch64::FSUBv2f32:
	case AArch64::FSUBv2f64:
	case AArch64::FSUBv4f32:
	TargetOptions Options = Inst.getParent()->getParent()->getTarget().Options;
	// We can fuse FADD/FSUB with FMUL, if fusion is either allowed globally by
	// the target options or if FADD/FSUB has the contract fast-math flag.
	return Options.UnsafeFPMath \|\|
	Options.AllowFPOpFusion == FPOpFusion::Fast \|\|
	Inst.getFlag(MachineInstr::FmContract);
	return true;
	}
	return false;
	}

	// Opcodes that can be combined with a MUL
	static bool isCombineInstrCandidate(unsigned Opc) {
	return (isCombineInstrCandidate32(Opc) \|\| isCombineInstrCandidate64(Opc));
	}

	//
	// Utility routine that checks if \param MO is defined by an
	// \param CombineOpc instruction in the basic block \param MBB
	static bool canCombine(MachineBasicBlock &MBB, MachineOperand &MO,
	unsigned CombineOpc, unsigned ZeroReg = 0,
	bool CheckZeroReg = false) {
	MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
	MachineInstr *MI = nullptr;

	if (MO.isReg() && MO.getReg().isVirtual())
	MI = MRI.getUniqueVRegDef(MO.getReg());
	// And it needs to be in the trace (otherwise, it won't have a depth).
	if (!MI \|\| MI->getParent() != &MBB \|\| (unsigned)MI->getOpcode() != CombineOpc)
	return false;
	// Must only used by the user we combine with.
	if (!MRI.hasOneNonDBGUse(MI->getOperand(0).getReg()))
	return false;

	if (CheckZeroReg) {
	assert(MI->getNumOperands() >= 4 && MI->getOperand(0).isReg() &&
	MI->getOperand(1).isReg() && MI->getOperand(2).isReg() &&
	MI->getOperand(3).isReg() && "MAdd/MSub must have a least 4 regs");
	// The third input reg must be zero.
	if (MI->getOperand(3).getReg() != ZeroReg)
	return false;
	}

	if (isCombineInstrSettingFlag(CombineOpc) &&
	MI->findRegisterDefOperandIdx(AArch64::NZCV, /TRI=/nullptr, true) == -1)
	return false;

	return true;
	}

	//
	// Is \param MO defined by an integer multiply and can be combined?
	static bool canCombineWithMUL(MachineBasicBlock &MBB, MachineOperand &MO,
	unsigned MulOpc, unsigned ZeroReg) {
	return canCombine(MBB, MO, MulOpc, ZeroReg, true);
	}

	//
	// Is \param MO defined by a floating-point multiply and can be combined?
	static bool canCombineWithFMUL(MachineBasicBlock &MBB, MachineOperand &MO,
	unsigned MulOpc) {
	return canCombine(MBB, MO, MulOpc);
	}

	// TODO: There are many more machine instruction opcodes to match:
	// 1. Other data types (integer, vectors)
	// 2. Other math / logic operations (xor, or)
	// 3. Other forms of the same operation (intrinsics and other variants)
	bool AArch64InstrInfo::isAssociativeAndCommutative(const MachineInstr &Inst,
	bool Invert) const {
	if (Invert)
	return false;
	switch (Inst.getOpcode()) {
	// == Floating-point types ==
	// -- Floating-point instructions --
	case AArch64::FADDHrr:
	case AArch64::FADDSrr:
	case AArch64::FADDDrr:
	case AArch64::FMULHrr:
	case AArch64::FMULSrr:
	case AArch64::FMULDrr:
	case AArch64::FMULX16:
	case AArch64::FMULX32:
	case AArch64::FMULX64:
	// -- Advanced SIMD instructions --
	case AArch64::FADDv4f16:
	case AArch64::FADDv8f16:
	case AArch64::FADDv2f32:
	case AArch64::FADDv4f32:
	case AArch64::FADDv2f64:
	case AArch64::FMULv4f16:
	case AArch64::FMULv8f16:
	case AArch64::FMULv2f32:
	case AArch64::FMULv4f32:
	case AArch64::FMULv2f64:
	case AArch64::FMULXv4f16:
	case AArch64::FMULXv8f16:
	case AArch64::FMULXv2f32:
	case AArch64::FMULXv4f32:
	case AArch64::FMULXv2f64:
	// -- SVE instructions --
	// Opcodes FMULX_ZZZ_? don't exist because there is no unpredicated FMULX
	// in the SVE instruction set (though there are predicated ones).
	case AArch64::FADD_ZZZ_H:
	case AArch64::FADD_ZZZ_S:
	case AArch64::FADD_ZZZ_D:
	case AArch64::FMUL_ZZZ_H:
	case AArch64::FMUL_ZZZ_S:
	case AArch64::FMUL_ZZZ_D:
	return Inst.getParent()->getParent()->getTarget().Options.UnsafeFPMath \|\|
	(Inst.getFlag(MachineInstr::MIFlag::FmReassoc) &&
	Inst.getFlag(MachineInstr::MIFlag::FmNsz));

	// == Integer types ==
	// -- Base instructions --
	// Opcodes MULWrr and MULXrr don't exist because
	// `MUL <Wd>, <Wn>, <Wm>` and `MUL <Xd>, <Xn>, <Xm>` are aliases of
	// `MADD <Wd>, <Wn>, <Wm>, WZR` and `MADD <Xd>, <Xn>, <Xm>, XZR` respectively.
	// The machine-combiner does not support three-source-operands machine
	// instruction. So we cannot reassociate MULs.
	case AArch64::ADDWrr:
	case AArch64::ADDXrr:
	case AArch64::ANDWrr:
	case AArch64::ANDXrr:
	case AArch64::ORRWrr:
	case AArch64::ORRXrr:
	case AArch64::EORWrr:
	case AArch64::EORXrr:
	case AArch64::EONWrr:
	case AArch64::EONXrr:
	// -- Advanced SIMD instructions --
	// Opcodes MULv1i64 and MULv2i64 don't exist because there is no 64-bit MUL
	// in the Advanced SIMD instruction set.
	case AArch64::ADDv8i8:
	case AArch64::ADDv16i8:
	case AArch64::ADDv4i16:
	case AArch64::ADDv8i16:
	case AArch64::ADDv2i32:
	case AArch64::ADDv4i32:
	case AArch64::ADDv1i64:
	case AArch64::ADDv2i64:
	case AArch64::MULv8i8:
	case AArch64::MULv16i8:
	case AArch64::MULv4i16:
	case AArch64::MULv8i16:
	case AArch64::MULv2i32:
	case AArch64::MULv4i32:
	case AArch64::ANDv8i8:
	case AArch64::ANDv16i8:
	case AArch64::ORRv8i8:
	case AArch64::ORRv16i8:
	case AArch64::EORv8i8:
	case AArch64::EORv16i8:
	// -- SVE instructions --
	case AArch64::ADD_ZZZ_B:
	case AArch64::ADD_ZZZ_H:
	case AArch64::ADD_ZZZ_S:
	case AArch64::ADD_ZZZ_D:
	case AArch64::MUL_ZZZ_B:
	case AArch64::MUL_ZZZ_H:
	case AArch64::MUL_ZZZ_S:
	case AArch64::MUL_ZZZ_D:
	case AArch64::AND_ZZZ:
	case AArch64::ORR_ZZZ:
	case AArch64::EOR_ZZZ:
	return true;

	default:
	return false;
	}
	}

	/// Find instructions that can be turned into madd.
	static bool getMaddPatterns(MachineInstr &Root,
	SmallVectorImpl<unsigned> &Patterns) {
	unsigned Opc = Root.getOpcode();
	MachineBasicBlock &MBB = *Root.getParent();
	bool Found = false;

	if (!isCombineInstrCandidate(Opc))
	return false;
	if (isCombineInstrSettingFlag(Opc)) {
	int Cmp_NZCV =
	Root.findRegisterDefOperandIdx(AArch64::NZCV, /TRI=/nullptr, true);
	// When NZCV is live bail out.
	if (Cmp_NZCV == -1)
	return false;
	unsigned NewOpc = convertToNonFlagSettingOpc(Root);
	// When opcode can't change bail out.
	// CHECKME: do we miss any cases for opcode conversion?
	if (NewOpc == Opc)
	return false;
	Opc = NewOpc;
	}

	auto setFound = [&](int Opcode, int Operand, unsigned ZeroReg,
	unsigned Pattern) {
	if (canCombineWithMUL(MBB, Root.getOperand(Operand), Opcode, ZeroReg)) {
	Patterns.push_back(Pattern);
	Found = true;
	}
	};

	auto setVFound = [&](int Opcode, int Operand, unsigned Pattern) {
	if (canCombine(MBB, Root.getOperand(Operand), Opcode)) {
	Patterns.push_back(Pattern);
	Found = true;
	}
	};

	typedef AArch64MachineCombinerPattern MCP;

	switch (Opc) {
	default:
	break;
	case AArch64::ADDWrr:
	assert(Root.getOperand(1).isReg() && Root.getOperand(2).isReg() &&
	"ADDWrr does not have register operands");
	setFound(AArch64::MADDWrrr, 1, AArch64::WZR, MCP::MULADDW_OP1);
	setFound(AArch64::MADDWrrr, 2, AArch64::WZR, MCP::MULADDW_OP2);
	break;
	case AArch64::ADDXrr:
	setFound(AArch64::MADDXrrr, 1, AArch64::XZR, MCP::MULADDX_OP1);
	setFound(AArch64::MADDXrrr, 2, AArch64::XZR, MCP::MULADDX_OP2);
	break;
	case AArch64::SUBWrr:
	setFound(AArch64::MADDWrrr, 2, AArch64::WZR, MCP::MULSUBW_OP2);
	setFound(AArch64::MADDWrrr, 1, AArch64::WZR, MCP::MULSUBW_OP1);
	break;
	case AArch64::SUBXrr:
	setFound(AArch64::MADDXrrr, 2, AArch64::XZR, MCP::MULSUBX_OP2);
	setFound(AArch64::MADDXrrr, 1, AArch64::XZR, MCP::MULSUBX_OP1);
	break;
	case AArch64::ADDWri:
	setFound(AArch64::MADDWrrr, 1, AArch64::WZR, MCP::MULADDWI_OP1);
	break;
	case AArch64::ADDXri:
	setFound(AArch64::MADDXrrr, 1, AArch64::XZR, MCP::MULADDXI_OP1);
	break;
	case AArch64::SUBWri:
	setFound(AArch64::MADDWrrr, 1, AArch64::WZR, MCP::MULSUBWI_OP1);
	break;
	case AArch64::SUBXri:
	setFound(AArch64::MADDXrrr, 1, AArch64::XZR, MCP::MULSUBXI_OP1);
	break;
	case AArch64::ADDv8i8:
	setVFound(AArch64::MULv8i8, 1, MCP::MULADDv8i8_OP1);
	setVFound(AArch64::MULv8i8, 2, MCP::MULADDv8i8_OP2);
	break;
	case AArch64::ADDv16i8:
	setVFound(AArch64::MULv16i8, 1, MCP::MULADDv16i8_OP1);
	setVFound(AArch64::MULv16i8, 2, MCP::MULADDv16i8_OP2);
	break;
	case AArch64::ADDv4i16:
	setVFound(AArch64::MULv4i16, 1, MCP::MULADDv4i16_OP1);
	setVFound(AArch64::MULv4i16, 2, MCP::MULADDv4i16_OP2);
	setVFound(AArch64::MULv4i16_indexed, 1, MCP::MULADDv4i16_indexed_OP1);
	setVFound(AArch64::MULv4i16_indexed, 2, MCP::MULADDv4i16_indexed_OP2);
	break;
	case AArch64::ADDv8i16:
	setVFound(AArch64::MULv8i16, 1, MCP::MULADDv8i16_OP1);
	setVFound(AArch64::MULv8i16, 2, MCP::MULADDv8i16_OP2);
	setVFound(AArch64::MULv8i16_indexed, 1, MCP::MULADDv8i16_indexed_OP1);
	setVFound(AArch64::MULv8i16_indexed, 2, MCP::MULADDv8i16_indexed_OP2);
	break;
	case AArch64::ADDv2i32:
	setVFound(AArch64::MULv2i32, 1, MCP::MULADDv2i32_OP1);
	setVFound(AArch64::MULv2i32, 2, MCP::MULADDv2i32_OP2);
	setVFound(AArch64::MULv2i32_indexed, 1, MCP::MULADDv2i32_indexed_OP1);
	setVFound(AArch64::MULv2i32_indexed, 2, MCP::MULADDv2i32_indexed_OP2);
	break;
	case AArch64::ADDv4i32:
	setVFound(AArch64::MULv4i32, 1, MCP::MULADDv4i32_OP1);
	setVFound(AArch64::MULv4i32, 2, MCP::MULADDv4i32_OP2);
	setVFound(AArch64::MULv4i32_indexed, 1, MCP::MULADDv4i32_indexed_OP1);
	setVFound(AArch64::MULv4i32_indexed, 2, MCP::MULADDv4i32_indexed_OP2);
	break;
	case AArch64::SUBv8i8:
	setVFound(AArch64::MULv8i8, 1, MCP::MULSUBv8i8_OP1);
	setVFound(AArch64::MULv8i8, 2, MCP::MULSUBv8i8_OP2);
	break;
	case AArch64::SUBv16i8:
	setVFound(AArch64::MULv16i8, 1, MCP::MULSUBv16i8_OP1);
	setVFound(AArch64::MULv16i8, 2, MCP::MULSUBv16i8_OP2);
	break;
	case AArch64::SUBv4i16:
	setVFound(AArch64::MULv4i16, 1, MCP::MULSUBv4i16_OP1);
	setVFound(AArch64::MULv4i16, 2, MCP::MULSUBv4i16_OP2);
	setVFound(AArch64::MULv4i16_indexed, 1, MCP::MULSUBv4i16_indexed_OP1);
	setVFound(AArch64::MULv4i16_indexed, 2, MCP::MULSUBv4i16_indexed_OP2);
	break;
	case AArch64::SUBv8i16:
	setVFound(AArch64::MULv8i16, 1, MCP::MULSUBv8i16_OP1);
	setVFound(AArch64::MULv8i16, 2, MCP::MULSUBv8i16_OP2);
	setVFound(AArch64::MULv8i16_indexed, 1, MCP::MULSUBv8i16_indexed_OP1);
	setVFound(AArch64::MULv8i16_indexed, 2, MCP::MULSUBv8i16_indexed_OP2);
	break;
	case AArch64::SUBv2i32:
	setVFound(AArch64::MULv2i32, 1, MCP::MULSUBv2i32_OP1);
	setVFound(AArch64::MULv2i32, 2, MCP::MULSUBv2i32_OP2);
	setVFound(AArch64::MULv2i32_indexed, 1, MCP::MULSUBv2i32_indexed_OP1);
	setVFound(AArch64::MULv2i32_indexed, 2, MCP::MULSUBv2i32_indexed_OP2);
	break;
	case AArch64::SUBv4i32:
	setVFound(AArch64::MULv4i32, 1, MCP::MULSUBv4i32_OP1);
	setVFound(AArch64::MULv4i32, 2, MCP::MULSUBv4i32_OP2);
	setVFound(AArch64::MULv4i32_indexed, 1, MCP::MULSUBv4i32_indexed_OP1);
	setVFound(AArch64::MULv4i32_indexed, 2, MCP::MULSUBv4i32_indexed_OP2);
	break;
	}
	return Found;
	}
	/// Floating-Point Support

	/// Find instructions that can be turned into madd.
	static bool getFMAPatterns(MachineInstr &Root,
	SmallVectorImpl<unsigned> &Patterns) {

	if (!isCombineInstrCandidateFP(Root))
	return false;

	MachineBasicBlock &MBB = *Root.getParent();
	bool Found = false;

	auto Match = [&](int Opcode, int Operand, unsigned Pattern) -> bool {
	if (canCombineWithFMUL(MBB, Root.getOperand(Operand), Opcode)) {
	Patterns.push_back(Pattern);
	return true;
	}
	return false;
	};

	typedef AArch64MachineCombinerPattern MCP;

	switch (Root.getOpcode()) {
	default:
	assert(false && "Unsupported FP instruction in combiner\n");
	break;
	case AArch64::FADDHrr:
	assert(Root.getOperand(1).isReg() && Root.getOperand(2).isReg() &&
	"FADDHrr does not have register operands");

	Found = Match(AArch64::FMULHrr, 1, MCP::FMULADDH_OP1);
	Found \|= Match(AArch64::FMULHrr, 2, MCP::FMULADDH_OP2);
	break;
	case AArch64::FADDSrr:
	assert(Root.getOperand(1).isReg() && Root.getOperand(2).isReg() &&
	"FADDSrr does not have register operands");

	Found \|= Match(AArch64::FMULSrr, 1, MCP::FMULADDS_OP1) \|\|
	Match(AArch64::FMULv1i32_indexed, 1, MCP::FMLAv1i32_indexed_OP1);

	Found \|= Match(AArch64::FMULSrr, 2, MCP::FMULADDS_OP2) \|\|
	Match(AArch64::FMULv1i32_indexed, 2, MCP::FMLAv1i32_indexed_OP2);
	break;
	case AArch64::FADDDrr:
	Found \|= Match(AArch64::FMULDrr, 1, MCP::FMULADDD_OP1) \|\|
	Match(AArch64::FMULv1i64_indexed, 1, MCP::FMLAv1i64_indexed_OP1);

	Found \|= Match(AArch64::FMULDrr, 2, MCP::FMULADDD_OP2) \|\|
	Match(AArch64::FMULv1i64_indexed, 2, MCP::FMLAv1i64_indexed_OP2);
	break;
	case AArch64::FADDv4f16:
	Found \|= Match(AArch64::FMULv4i16_indexed, 1, MCP::FMLAv4i16_indexed_OP1) \|\|
	Match(AArch64::FMULv4f16, 1, MCP::FMLAv4f16_OP1);

	Found \|= Match(AArch64::FMULv4i16_indexed, 2, MCP::FMLAv4i16_indexed_OP2) \|\|
	Match(AArch64::FMULv4f16, 2, MCP::FMLAv4f16_OP2);
	break;
	case AArch64::FADDv8f16:
	Found \|= Match(AArch64::FMULv8i16_indexed, 1, MCP::FMLAv8i16_indexed_OP1) \|\|
	Match(AArch64::FMULv8f16, 1, MCP::FMLAv8f16_OP1);

	Found \|= Match(AArch64::FMULv8i16_indexed, 2, MCP::FMLAv8i16_indexed_OP2) \|\|
	Match(AArch64::FMULv8f16, 2, MCP::FMLAv8f16_OP2);
	break;
	case AArch64::FADDv2f32:
	Found \|= Match(AArch64::FMULv2i32_indexed, 1, MCP::FMLAv2i32_indexed_OP1) \|\|
	Match(AArch64::FMULv2f32, 1, MCP::FMLAv2f32_OP1);

	Found \|= Match(AArch64::FMULv2i32_indexed, 2, MCP::FMLAv2i32_indexed_OP2) \|\|
	Match(AArch64::FMULv2f32, 2, MCP::FMLAv2f32_OP2);
	break;
	case AArch64::FADDv2f64:
	Found \|= Match(AArch64::FMULv2i64_indexed, 1, MCP::FMLAv2i64_indexed_OP1) \|\|
	Match(AArch64::FMULv2f64, 1, MCP::FMLAv2f64_OP1);

	Found \|= Match(AArch64::FMULv2i64_indexed, 2, MCP::FMLAv2i64_indexed_OP2) \|\|
	Match(AArch64::FMULv2f64, 2, MCP::FMLAv2f64_OP2);
	break;
	case AArch64::FADDv4f32:
	Found \|= Match(AArch64::FMULv4i32_indexed, 1, MCP::FMLAv4i32_indexed_OP1) \|\|
	Match(AArch64::FMULv4f32, 1, MCP::FMLAv4f32_OP1);

	Found \|= Match(AArch64::FMULv4i32_indexed, 2, MCP::FMLAv4i32_indexed_OP2) \|\|
	Match(AArch64::FMULv4f32, 2, MCP::FMLAv4f32_OP2);
	break;
	case AArch64::FSUBHrr:
	Found = Match(AArch64::FMULHrr, 1, MCP::FMULSUBH_OP1);
	Found \|= Match(AArch64::FMULHrr, 2, MCP::FMULSUBH_OP2);
	Found \|= Match(AArch64::FNMULHrr, 1, MCP::FNMULSUBH_OP1);
	break;
	case AArch64::FSUBSrr:
	Found = Match(AArch64::FMULSrr, 1, MCP::FMULSUBS_OP1);

	Found \|= Match(AArch64::FMULSrr, 2, MCP::FMULSUBS_OP2) \|\|
	Match(AArch64::FMULv1i32_indexed, 2, MCP::FMLSv1i32_indexed_OP2);

	Found \|= Match(AArch64::FNMULSrr, 1, MCP::FNMULSUBS_OP1);
	break;
	case AArch64::FSUBDrr:
	Found = Match(AArch64::FMULDrr, 1, MCP::FMULSUBD_OP1);

	Found \|= Match(AArch64::FMULDrr, 2, MCP::FMULSUBD_OP2) \|\|
	Match(AArch64::FMULv1i64_indexed, 2, MCP::FMLSv1i64_indexed_OP2);

	Found \|= Match(AArch64::FNMULDrr, 1, MCP::FNMULSUBD_OP1);
	break;
	case AArch64::FSUBv4f16:
	Found \|= Match(AArch64::FMULv4i16_indexed, 2, MCP::FMLSv4i16_indexed_OP2) \|\|
	Match(AArch64::FMULv4f16, 2, MCP::FMLSv4f16_OP2);

	Found \|= Match(AArch64::FMULv4i16_indexed, 1, MCP::FMLSv4i16_indexed_OP1) \|\|
	Match(AArch64::FMULv4f16, 1, MCP::FMLSv4f16_OP1);
	break;
	case AArch64::FSUBv8f16:
	Found \|= Match(AArch64::FMULv8i16_indexed, 2, MCP::FMLSv8i16_indexed_OP2) \|\|
	Match(AArch64::FMULv8f16, 2, MCP::FMLSv8f16_OP2);

	Found \|= Match(AArch64::FMULv8i16_indexed, 1, MCP::FMLSv8i16_indexed_OP1) \|\|
	Match(AArch64::FMULv8f16, 1, MCP::FMLSv8f16_OP1);
	break;
	case AArch64::FSUBv2f32:
	Found \|= Match(AArch64::FMULv2i32_indexed, 2, MCP::FMLSv2i32_indexed_OP2) \|\|
	Match(AArch64::FMULv2f32, 2, MCP::FMLSv2f32_OP2);

	Found \|= Match(AArch64::FMULv2i32_indexed, 1, MCP::FMLSv2i32_indexed_OP1) \|\|
	Match(AArch64::FMULv2f32, 1, MCP::FMLSv2f32_OP1);
	break;
	case AArch64::FSUBv2f64:
	Found \|= Match(AArch64::FMULv2i64_indexed, 2, MCP::FMLSv2i64_indexed_OP2) \|\|
	Match(AArch64::FMULv2f64, 2, MCP::FMLSv2f64_OP2);

	Found \|= Match(AArch64::FMULv2i64_indexed, 1, MCP::FMLSv2i64_indexed_OP1) \|\|
	Match(AArch64::FMULv2f64, 1, MCP::FMLSv2f64_OP1);
	break;
	case AArch64::FSUBv4f32:
	Found \|= Match(AArch64::FMULv4i32_indexed, 2, MCP::FMLSv4i32_indexed_OP2) \|\|
	Match(AArch64::FMULv4f32, 2, MCP::FMLSv4f32_OP2);

	Found \|= Match(AArch64::FMULv4i32_indexed, 1, MCP::FMLSv4i32_indexed_OP1) \|\|
	Match(AArch64::FMULv4f32, 1, MCP::FMLSv4f32_OP1);
	break;
	}
	return Found;
	}

	static bool getFMULPatterns(MachineInstr &Root,
	SmallVectorImpl<unsigned> &Patterns) {
	MachineBasicBlock &MBB = *Root.getParent();
	bool Found = false;

	auto Match = [&](unsigned Opcode, int Operand, unsigned Pattern) -> bool {
	MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
	MachineOperand &MO = Root.getOperand(Operand);
	MachineInstr *MI = nullptr;
	if (MO.isReg() && MO.getReg().isVirtual())
	MI = MRI.getUniqueVRegDef(MO.getReg());
	// Ignore No-op COPYs in FMUL(COPY(DUP(..)))
	if (MI && MI->getOpcode() == TargetOpcode::COPY &&
	MI->getOperand(1).getReg().isVirtual())
	MI = MRI.getUniqueVRegDef(MI->getOperand(1).getReg());
	if (MI && MI->getOpcode() == Opcode) {
	Patterns.push_back(Pattern);
	return true;
	}
	return false;
	};

	typedef AArch64MachineCombinerPattern MCP;

	switch (Root.getOpcode()) {
	default:
	return false;
	case AArch64::FMULv2f32:
	Found = Match(AArch64::DUPv2i32lane, 1, MCP::FMULv2i32_indexed_OP1);
	Found \|= Match(AArch64::DUPv2i32lane, 2, MCP::FMULv2i32_indexed_OP2);
	break;
	case AArch64::FMULv2f64:
	Found = Match(AArch64::DUPv2i64lane, 1, MCP::FMULv2i64_indexed_OP1);
	Found \|= Match(AArch64::DUPv2i64lane, 2, MCP::FMULv2i64_indexed_OP2);
	break;
	case AArch64::FMULv4f16:
	Found = Match(AArch64::DUPv4i16lane, 1, MCP::FMULv4i16_indexed_OP1);
	Found \|= Match(AArch64::DUPv4i16lane, 2, MCP::FMULv4i16_indexed_OP2);
	break;
	case AArch64::FMULv4f32:
	Found = Match(AArch64::DUPv4i32lane, 1, MCP::FMULv4i32_indexed_OP1);
	Found \|= Match(AArch64::DUPv4i32lane, 2, MCP::FMULv4i32_indexed_OP2);
	break;
	case AArch64::FMULv8f16:
	Found = Match(AArch64::DUPv8i16lane, 1, MCP::FMULv8i16_indexed_OP1);
	Found \|= Match(AArch64::DUPv8i16lane, 2, MCP::FMULv8i16_indexed_OP2);
	break;
	}

	return Found;
	}

	static bool getFNEGPatterns(MachineInstr &Root,
	SmallVectorImpl<unsigned> &Patterns) {
	unsigned Opc = Root.getOpcode();
	MachineBasicBlock &MBB = *Root.getParent();
	MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();

	auto Match = [&](unsigned Opcode, unsigned Pattern) -> bool {
	MachineOperand &MO = Root.getOperand(1);
	MachineInstr *MI = MRI.getUniqueVRegDef(MO.getReg());
	if (MI != nullptr && (MI->getOpcode() == Opcode) &&
	MRI.hasOneNonDBGUse(MI->getOperand(0).getReg()) &&
	Root.getFlag(MachineInstr::MIFlag::FmContract) &&
	Root.getFlag(MachineInstr::MIFlag::FmNsz) &&
	MI->getFlag(MachineInstr::MIFlag::FmContract) &&
	MI->getFlag(MachineInstr::MIFlag::FmNsz)) {
	Patterns.push_back(Pattern);
	return true;
	}
	return false;
	};

	switch (Opc) {
	default:
	break;
	case AArch64::FNEGDr:
	return Match(AArch64::FMADDDrrr, AArch64MachineCombinerPattern::FNMADD);
	case AArch64::FNEGSr:
	return Match(AArch64::FMADDSrrr, AArch64MachineCombinerPattern::FNMADD);
	}

	return false;
	}

	/// Return true when a code sequence can improve throughput. It
	/// should be called only for instructions in loops.
	/// \param Pattern - combiner pattern
	bool AArch64InstrInfo::isThroughputPattern(unsigned Pattern) const {
	switch (Pattern) {
	default:
	break;
	case AArch64MachineCombinerPattern::FMULADDH_OP1:
	case AArch64MachineCombinerPattern::FMULADDH_OP2:
	case AArch64MachineCombinerPattern::FMULSUBH_OP1:
	case AArch64MachineCombinerPattern::FMULSUBH_OP2:
	case AArch64MachineCombinerPattern::FMULADDS_OP1:
	case AArch64MachineCombinerPattern::FMULADDS_OP2:
	case AArch64MachineCombinerPattern::FMULSUBS_OP1:
	case AArch64MachineCombinerPattern::FMULSUBS_OP2:
	case AArch64MachineCombinerPattern::FMULADDD_OP1:
	case AArch64MachineCombinerPattern::FMULADDD_OP2:
	case AArch64MachineCombinerPattern::FMULSUBD_OP1:
	case AArch64MachineCombinerPattern::FMULSUBD_OP2:
	case AArch64MachineCombinerPattern::FNMULSUBH_OP1:
	case AArch64MachineCombinerPattern::FNMULSUBS_OP1:
	case AArch64MachineCombinerPattern::FNMULSUBD_OP1:
	case AArch64MachineCombinerPattern::FMLAv4i16_indexed_OP1:
	case AArch64MachineCombinerPattern::FMLAv4i16_indexed_OP2:
	case AArch64MachineCombinerPattern::FMLAv8i16_indexed_OP1:
	case AArch64MachineCombinerPattern::FMLAv8i16_indexed_OP2:
	case AArch64MachineCombinerPattern::FMLAv1i32_indexed_OP1:
	case AArch64MachineCombinerPattern::FMLAv1i32_indexed_OP2:
	case AArch64MachineCombinerPattern::FMLAv1i64_indexed_OP1:
	case AArch64MachineCombinerPattern::FMLAv1i64_indexed_OP2:
	case AArch64MachineCombinerPattern::FMLAv4f16_OP2:
	case AArch64MachineCombinerPattern::FMLAv4f16_OP1:
	case AArch64MachineCombinerPattern::FMLAv8f16_OP1:
	case AArch64MachineCombinerPattern::FMLAv8f16_OP2:
	case AArch64MachineCombinerPattern::FMLAv2f32_OP2:
	case AArch64MachineCombinerPattern::FMLAv2f32_OP1:
	case AArch64MachineCombinerPattern::FMLAv2f64_OP1:
	case AArch64MachineCombinerPattern::FMLAv2f64_OP2:
	case AArch64MachineCombinerPattern::FMLAv2i32_indexed_OP1:
	case AArch64MachineCombinerPattern::FMLAv2i32_indexed_OP2:
	case AArch64MachineCombinerPattern::FMLAv2i64_indexed_OP1:
	case AArch64MachineCombinerPattern::FMLAv2i64_indexed_OP2:
	case AArch64MachineCombinerPattern::FMLAv4f32_OP1:
	case AArch64MachineCombinerPattern::FMLAv4f32_OP2:
	case AArch64MachineCombinerPattern::FMLAv4i32_indexed_OP1:
	case AArch64MachineCombinerPattern::FMLAv4i32_indexed_OP2:
	case AArch64MachineCombinerPattern::FMLSv4i16_indexed_OP1:
	case AArch64MachineCombinerPattern::FMLSv4i16_indexed_OP2:
	case AArch64MachineCombinerPattern::FMLSv8i16_indexed_OP1:
	case AArch64MachineCombinerPattern::FMLSv8i16_indexed_OP2:
	case AArch64MachineCombinerPattern::FMLSv1i32_indexed_OP2:
	case AArch64MachineCombinerPattern::FMLSv1i64_indexed_OP2:
	case AArch64MachineCombinerPattern::FMLSv2i32_indexed_OP2:
	case AArch64MachineCombinerPattern::FMLSv2i64_indexed_OP2:
	case AArch64MachineCombinerPattern::FMLSv4f16_OP1:
	case AArch64MachineCombinerPattern::FMLSv4f16_OP2:
	case AArch64MachineCombinerPattern::FMLSv8f16_OP1:
	case AArch64MachineCombinerPattern::FMLSv8f16_OP2:
	case AArch64MachineCombinerPattern::FMLSv2f32_OP2:
	case AArch64MachineCombinerPattern::FMLSv2f64_OP2:
	case AArch64MachineCombinerPattern::FMLSv4i32_indexed_OP2:
	case AArch64MachineCombinerPattern::FMLSv4f32_OP2:
	case AArch64MachineCombinerPattern::FMULv2i32_indexed_OP1:
	case AArch64MachineCombinerPattern::FMULv2i32_indexed_OP2:
	case AArch64MachineCombinerPattern::FMULv2i64_indexed_OP1:
	case AArch64MachineCombinerPattern::FMULv2i64_indexed_OP2:
	case AArch64MachineCombinerPattern::FMULv4i16_indexed_OP1:
	case AArch64MachineCombinerPattern::FMULv4i16_indexed_OP2:
	case AArch64MachineCombinerPattern::FMULv4i32_indexed_OP1:
	case AArch64MachineCombinerPattern::FMULv4i32_indexed_OP2:
	case AArch64MachineCombinerPattern::FMULv8i16_indexed_OP1:
	case AArch64MachineCombinerPattern::FMULv8i16_indexed_OP2:
	case AArch64MachineCombinerPattern::MULADDv8i8_OP1:
	case AArch64MachineCombinerPattern::MULADDv8i8_OP2:
	case AArch64MachineCombinerPattern::MULADDv16i8_OP1:
	case AArch64MachineCombinerPattern::MULADDv16i8_OP2:
	case AArch64MachineCombinerPattern::MULADDv4i16_OP1:
	case AArch64MachineCombinerPattern::MULADDv4i16_OP2:
	case AArch64MachineCombinerPattern::MULADDv8i16_OP1:
	case AArch64MachineCombinerPattern::MULADDv8i16_OP2:
	case AArch64MachineCombinerPattern::MULADDv2i32_OP1:
	case AArch64MachineCombinerPattern::MULADDv2i32_OP2:
	case AArch64MachineCombinerPattern::MULADDv4i32_OP1:
	case AArch64MachineCombinerPattern::MULADDv4i32_OP2:
	case AArch64MachineCombinerPattern::MULSUBv8i8_OP1:
	case AArch64MachineCombinerPattern::MULSUBv8i8_OP2:
	case AArch64MachineCombinerPattern::MULSUBv16i8_OP1:
	case AArch64MachineCombinerPattern::MULSUBv16i8_OP2:
	case AArch64MachineCombinerPattern::MULSUBv4i16_OP1:
	case AArch64MachineCombinerPattern::MULSUBv4i16_OP2:
	case AArch64MachineCombinerPattern::MULSUBv8i16_OP1:
	case AArch64MachineCombinerPattern::MULSUBv8i16_OP2:
	case AArch64MachineCombinerPattern::MULSUBv2i32_OP1:
	case AArch64MachineCombinerPattern::MULSUBv2i32_OP2:
	case AArch64MachineCombinerPattern::MULSUBv4i32_OP1:
	case AArch64MachineCombinerPattern::MULSUBv4i32_OP2:
	case AArch64MachineCombinerPattern::MULADDv4i16_indexed_OP1:
	case AArch64MachineCombinerPattern::MULADDv4i16_indexed_OP2:
	case AArch64MachineCombinerPattern::MULADDv8i16_indexed_OP1:
	case AArch64MachineCombinerPattern::MULADDv8i16_indexed_OP2:
	case AArch64MachineCombinerPattern::MULADDv2i32_indexed_OP1:
	case AArch64MachineCombinerPattern::MULADDv2i32_indexed_OP2:
	case AArch64MachineCombinerPattern::MULADDv4i32_indexed_OP1:
	case AArch64MachineCombinerPattern::MULADDv4i32_indexed_OP2:
	case AArch64MachineCombinerPattern::MULSUBv4i16_indexed_OP1:
	case AArch64MachineCombinerPattern::MULSUBv4i16_indexed_OP2:
	case AArch64MachineCombinerPattern::MULSUBv8i16_indexed_OP1:
	case AArch64MachineCombinerPattern::MULSUBv8i16_indexed_OP2:
	case AArch64MachineCombinerPattern::MULSUBv2i32_indexed_OP1:
	case AArch64MachineCombinerPattern::MULSUBv2i32_indexed_OP2:
	case AArch64MachineCombinerPattern::MULSUBv4i32_indexed_OP1:
	case AArch64MachineCombinerPattern::MULSUBv4i32_indexed_OP2:
	return true;
	} // end switch (Pattern)
	return false;
	}

	/// Find other MI combine patterns.
	static bool getMiscPatterns(MachineInstr &Root,
	SmallVectorImpl<unsigned> &Patterns) {
	// A - (B + C) ==> (A - B) - C or (A - C) - B
	unsigned Opc = Root.getOpcode();
	MachineBasicBlock &MBB = *Root.getParent();

	switch (Opc) {
	case AArch64::SUBWrr:
	case AArch64::SUBSWrr:
	case AArch64::SUBXrr:
	case AArch64::SUBSXrr:
	// Found candidate root.
	break;
	default:
	return false;
	}

	if (isCombineInstrSettingFlag(Opc) &&
	Root.findRegisterDefOperandIdx(AArch64::NZCV, /TRI=/nullptr, true) ==
	-1)
	return false;

	if (canCombine(MBB, Root.getOperand(2), AArch64::ADDWrr) \|\|
	canCombine(MBB, Root.getOperand(2), AArch64::ADDSWrr) \|\|
	canCombine(MBB, Root.getOperand(2), AArch64::ADDXrr) \|\|
	canCombine(MBB, Root.getOperand(2), AArch64::ADDSXrr)) {
	Patterns.push_back(AArch64MachineCombinerPattern::SUBADD_OP1);
	Patterns.push_back(AArch64MachineCombinerPattern::SUBADD_OP2);
	return true;
	}

	return false;
	}

	CombinerObjective
	AArch64InstrInfo::getCombinerObjective(unsigned Pattern) const {
	switch (Pattern) {
	case AArch64MachineCombinerPattern::SUBADD_OP1:
	case AArch64MachineCombinerPattern::SUBADD_OP2:
	return CombinerObjective::MustReduceDepth;
	default:
	return TargetInstrInfo::getCombinerObjective(Pattern);
	}
	}

	/// Return true when there is potentially a faster code sequence for an
	/// instruction chain ending in \p Root. All potential patterns are listed in
	/// the \p Pattern vector. Pattern should be sorted in priority order since the
	/// pattern evaluator stops checking as soon as it finds a faster sequence.

	bool AArch64InstrInfo::getMachineCombinerPatterns(
	MachineInstr &Root, SmallVectorImpl<unsigned> &Patterns,
	bool DoRegPressureReduce) const {
	// Integer patterns
	if (getMaddPatterns(Root, Patterns))
	return true;
	// Floating point patterns
	if (getFMULPatterns(Root, Patterns))
	return true;
	if (getFMAPatterns(Root, Patterns))
	return true;
	if (getFNEGPatterns(Root, Patterns))
	return true;

	// Other patterns
	if (getMiscPatterns(Root, Patterns))
	return true;

	return TargetInstrInfo::getMachineCombinerPatterns(Root, Patterns,
	DoRegPressureReduce);
	}

	enum class FMAInstKind { Default, Indexed, Accumulator };
	/// genFusedMultiply - Generate fused multiply instructions.
	/// This function supports both integer and floating point instructions.
	/// A typical example:
	/// F\|MUL I=A,B,0
	/// F\|ADD R,I,C
	/// ==> F\|MADD R,A,B,C
	/// \param MF Containing MachineFunction
	/// \param MRI Register information
	/// \param TII Target information
	/// \param Root is the F\|ADD instruction
	/// \param [out] InsInstrs is a vector of machine instructions and will
	/// contain the generated madd instruction
	/// \param IdxMulOpd is index of operand in Root that is the result of
	/// the F\|MUL. In the example above IdxMulOpd is 1.
	/// \param MaddOpc the opcode fo the f\|madd instruction
	/// \param RC Register class of operands
	/// \param kind of fma instruction (addressing mode) to be generated
	/// \param ReplacedAddend is the result register from the instruction
	/// replacing the non-combined operand, if any.
	static MachineInstr *
	genFusedMultiply(MachineFunction &MF, MachineRegisterInfo &MRI,
	const TargetInstrInfo *TII, MachineInstr &Root,
	SmallVectorImpl<MachineInstr *> &InsInstrs, unsigned IdxMulOpd,
	unsigned MaddOpc, const TargetRegisterClass *RC,
	FMAInstKind kind = FMAInstKind::Default,
	const Register *ReplacedAddend = nullptr) {
	assert(IdxMulOpd == 1 \|\| IdxMulOpd == 2);

	unsigned IdxOtherOpd = IdxMulOpd == 1 ? 2 : 1;
	MachineInstr *MUL = MRI.getUniqueVRegDef(Root.getOperand(IdxMulOpd).getReg());
	Register ResultReg = Root.getOperand(0).getReg();
	Register SrcReg0 = MUL->getOperand(1).getReg();
	bool Src0IsKill = MUL->getOperand(1).isKill();
	Register SrcReg1 = MUL->getOperand(2).getReg();
	bool Src1IsKill = MUL->getOperand(2).isKill();

	Register SrcReg2;
	bool Src2IsKill;
	if (ReplacedAddend) {
	// If we just generated a new addend, we must be it's only use.
	SrcReg2 = *ReplacedAddend;
	Src2IsKill = true;
	} else {
	SrcReg2 = Root.getOperand(IdxOtherOpd).getReg();
	Src2IsKill = Root.getOperand(IdxOtherOpd).isKill();
	}

	if (ResultReg.isVirtual())
	MRI.constrainRegClass(ResultReg, RC);
	if (SrcReg0.isVirtual())
	MRI.constrainRegClass(SrcReg0, RC);
	if (SrcReg1.isVirtual())
	MRI.constrainRegClass(SrcReg1, RC);
	if (SrcReg2.isVirtual())
	MRI.constrainRegClass(SrcReg2, RC);

	MachineInstrBuilder MIB;
	if (kind == FMAInstKind::Default)
	MIB = BuildMI(MF, MIMetadata(Root), TII->get(MaddOpc), ResultReg)
	.addReg(SrcReg0, getKillRegState(Src0IsKill))
	.addReg(SrcReg1, getKillRegState(Src1IsKill))
	.addReg(SrcReg2, getKillRegState(Src2IsKill));
	else if (kind == FMAInstKind::Indexed)
	MIB = BuildMI(MF, MIMetadata(Root), TII->get(MaddOpc), ResultReg)
	.addReg(SrcReg2, getKillRegState(Src2IsKill))
	.addReg(SrcReg0, getKillRegState(Src0IsKill))
	.addReg(SrcReg1, getKillRegState(Src1IsKill))
	.addImm(MUL->getOperand(3).getImm());
	else if (kind == FMAInstKind::Accumulator)
	MIB = BuildMI(MF, MIMetadata(Root), TII->get(MaddOpc), ResultReg)
	.addReg(SrcReg2, getKillRegState(Src2IsKill))
	.addReg(SrcReg0, getKillRegState(Src0IsKill))
	.addReg(SrcReg1, getKillRegState(Src1IsKill));
	else
	assert(false && "Invalid FMA instruction kind \n");
	// Insert the MADD (MADD, FMA, FMS, FMLA, FMSL)
	InsInstrs.push_back(MIB);
	return MUL;
	}

	static MachineInstr *
	genFNegatedMAD(MachineFunction &MF, MachineRegisterInfo &MRI,
	const TargetInstrInfo *TII, MachineInstr &Root,
	SmallVectorImpl<MachineInstr *> &InsInstrs) {
	MachineInstr *MAD = MRI.getUniqueVRegDef(Root.getOperand(1).getReg());

	unsigned Opc = 0;
	const TargetRegisterClass *RC = MRI.getRegClass(MAD->getOperand(0).getReg());
	if (AArch64::FPR32RegClass.hasSubClassEq(RC))
	Opc = AArch64::FNMADDSrrr;
	else if (AArch64::FPR64RegClass.hasSubClassEq(RC))
	Opc = AArch64::FNMADDDrrr;
	else
	return nullptr;

	Register ResultReg = Root.getOperand(0).getReg();
	Register SrcReg0 = MAD->getOperand(1).getReg();
	Register SrcReg1 = MAD->getOperand(2).getReg();
	Register SrcReg2 = MAD->getOperand(3).getReg();
	bool Src0IsKill = MAD->getOperand(1).isKill();
	bool Src1IsKill = MAD->getOperand(2).isKill();
	bool Src2IsKill = MAD->getOperand(3).isKill();
	if (ResultReg.isVirtual())
	MRI.constrainRegClass(ResultReg, RC);
	if (SrcReg0.isVirtual())
	MRI.constrainRegClass(SrcReg0, RC);
	if (SrcReg1.isVirtual())
	MRI.constrainRegClass(SrcReg1, RC);
	if (SrcReg2.isVirtual())
	MRI.constrainRegClass(SrcReg2, RC);

	MachineInstrBuilder MIB =
	BuildMI(MF, MIMetadata(Root), TII->get(Opc), ResultReg)
	.addReg(SrcReg0, getKillRegState(Src0IsKill))
	.addReg(SrcReg1, getKillRegState(Src1IsKill))
	.addReg(SrcReg2, getKillRegState(Src2IsKill));
	InsInstrs.push_back(MIB);

	return MAD;
	}

	/// Fold (FMUL x (DUP y lane)) into (FMUL_indexed x y lane)
	static MachineInstr *
	genIndexedMultiply(MachineInstr &Root,
	SmallVectorImpl<MachineInstr *> &InsInstrs,
	unsigned IdxDupOp, unsigned MulOpc,
	const TargetRegisterClass *RC, MachineRegisterInfo &MRI) {
	assert(((IdxDupOp == 1) \|\| (IdxDupOp == 2)) &&
	"Invalid index of FMUL operand");

	MachineFunction &MF = *Root.getMF();
	const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();

	MachineInstr *Dup =
	MF.getRegInfo().getUniqueVRegDef(Root.getOperand(IdxDupOp).getReg());

	if (Dup->getOpcode() == TargetOpcode::COPY)
	Dup = MRI.getUniqueVRegDef(Dup->getOperand(1).getReg());

	Register DupSrcReg = Dup->getOperand(1).getReg();
	MRI.clearKillFlags(DupSrcReg);
	MRI.constrainRegClass(DupSrcReg, RC);

	unsigned DupSrcLane = Dup->getOperand(2).getImm();

	unsigned IdxMulOp = IdxDupOp == 1 ? 2 : 1;
	MachineOperand &MulOp = Root.getOperand(IdxMulOp);

	Register ResultReg = Root.getOperand(0).getReg();

	MachineInstrBuilder MIB;
	MIB = BuildMI(MF, MIMetadata(Root), TII->get(MulOpc), ResultReg)
	.add(MulOp)
	.addReg(DupSrcReg)
	.addImm(DupSrcLane);

	InsInstrs.push_back(MIB);
	return &Root;
	}

	/// genFusedMultiplyAcc - Helper to generate fused multiply accumulate
	/// instructions.
	///
	/// \see genFusedMultiply
	static MachineInstr *genFusedMultiplyAcc(
	MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII,
	MachineInstr &Root, SmallVectorImpl<MachineInstr *> &InsInstrs,
	unsigned IdxMulOpd, unsigned MaddOpc, const TargetRegisterClass *RC) {
	return genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd, MaddOpc, RC,
	FMAInstKind::Accumulator);
	}

	/// genNeg - Helper to generate an intermediate negation of the second operand
	/// of Root
	static Register genNeg(MachineFunction &MF, MachineRegisterInfo &MRI,
	const TargetInstrInfo *TII, MachineInstr &Root,
	SmallVectorImpl<MachineInstr *> &InsInstrs,
	DenseMap<unsigned, unsigned> &InstrIdxForVirtReg,
	unsigned MnegOpc, const TargetRegisterClass *RC) {
	Register NewVR = MRI.createVirtualRegister(RC);
	MachineInstrBuilder MIB =
	BuildMI(MF, MIMetadata(Root), TII->get(MnegOpc), NewVR)
	.add(Root.getOperand(2));
	InsInstrs.push_back(MIB);

	assert(InstrIdxForVirtReg.empty());
	InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));

	return NewVR;
	}

	/// genFusedMultiplyAccNeg - Helper to generate fused multiply accumulate
	/// instructions with an additional negation of the accumulator
	static MachineInstr *genFusedMultiplyAccNeg(
	MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII,
	MachineInstr &Root, SmallVectorImpl<MachineInstr *> &InsInstrs,
	DenseMap<unsigned, unsigned> &InstrIdxForVirtReg, unsigned IdxMulOpd,
	unsigned MaddOpc, unsigned MnegOpc, const TargetRegisterClass *RC) {
	assert(IdxMulOpd == 1);

	Register NewVR =
	genNeg(MF, MRI, TII, Root, InsInstrs, InstrIdxForVirtReg, MnegOpc, RC);
	return genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd, MaddOpc, RC,
	FMAInstKind::Accumulator, &NewVR);
	}

	/// genFusedMultiplyIdx - Helper to generate fused multiply accumulate
	/// instructions.
	///
	/// \see genFusedMultiply
	static MachineInstr *genFusedMultiplyIdx(
	MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII,
	MachineInstr &Root, SmallVectorImpl<MachineInstr *> &InsInstrs,
	unsigned IdxMulOpd, unsigned MaddOpc, const TargetRegisterClass *RC) {
	return genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd, MaddOpc, RC,
	FMAInstKind::Indexed);
	}

	/// genFusedMultiplyAccNeg - Helper to generate fused multiply accumulate
	/// instructions with an additional negation of the accumulator
	static MachineInstr *genFusedMultiplyIdxNeg(
	MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII,
	MachineInstr &Root, SmallVectorImpl<MachineInstr *> &InsInstrs,
	DenseMap<unsigned, unsigned> &InstrIdxForVirtReg, unsigned IdxMulOpd,
	unsigned MaddOpc, unsigned MnegOpc, const TargetRegisterClass *RC) {
	assert(IdxMulOpd == 1);

	Register NewVR =
	genNeg(MF, MRI, TII, Root, InsInstrs, InstrIdxForVirtReg, MnegOpc, RC);

	return genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd, MaddOpc, RC,
	FMAInstKind::Indexed, &NewVR);
	}

	/// genMaddR - Generate madd instruction and combine mul and add using
	/// an extra virtual register
	/// Example - an ADD intermediate needs to be stored in a register:
	/// MUL I=A,B,0
	/// ADD R,I,Imm
	/// ==> ORR V, ZR, Imm
	/// ==> MADD R,A,B,V
	/// \param MF Containing MachineFunction
	/// \param MRI Register information
	/// \param TII Target information
	/// \param Root is the ADD instruction
	/// \param [out] InsInstrs is a vector of machine instructions and will
	/// contain the generated madd instruction
	/// \param IdxMulOpd is index of operand in Root that is the result of
	/// the MUL. In the example above IdxMulOpd is 1.
	/// \param MaddOpc the opcode fo the madd instruction
	/// \param VR is a virtual register that holds the value of an ADD operand
	/// (V in the example above).
	/// \param RC Register class of operands
	static MachineInstr *genMaddR(MachineFunction &MF, MachineRegisterInfo &MRI,
	const TargetInstrInfo *TII, MachineInstr &Root,
	SmallVectorImpl<MachineInstr *> &InsInstrs,
	unsigned IdxMulOpd, unsigned MaddOpc, unsigned VR,
	const TargetRegisterClass *RC) {
	assert(IdxMulOpd == 1 \|\| IdxMulOpd == 2);

	MachineInstr *MUL = MRI.getUniqueVRegDef(Root.getOperand(IdxMulOpd).getReg());
	Register ResultReg = Root.getOperand(0).getReg();
	Register SrcReg0 = MUL->getOperand(1).getReg();
	bool Src0IsKill = MUL->getOperand(1).isKill();
	Register SrcReg1 = MUL->getOperand(2).getReg();
	bool Src1IsKill = MUL->getOperand(2).isKill();

	if (ResultReg.isVirtual())
	MRI.constrainRegClass(ResultReg, RC);
	if (SrcReg0.isVirtual())
	MRI.constrainRegClass(SrcReg0, RC);
	if (SrcReg1.isVirtual())
	MRI.constrainRegClass(SrcReg1, RC);
	if (Register::isVirtualRegister(VR))
	MRI.constrainRegClass(VR, RC);

	MachineInstrBuilder MIB =
	BuildMI(MF, MIMetadata(Root), TII->get(MaddOpc), ResultReg)
	.addReg(SrcReg0, getKillRegState(Src0IsKill))
	.addReg(SrcReg1, getKillRegState(Src1IsKill))
	.addReg(VR);
	// Insert the MADD
	InsInstrs.push_back(MIB);
	return MUL;
	}

	/// Do the following transformation
	/// A - (B + C) ==> (A - B) - C
	/// A - (B + C) ==> (A - C) - B
	static void
	genSubAdd2SubSub(MachineFunction &MF, MachineRegisterInfo &MRI,
	const TargetInstrInfo *TII, MachineInstr &Root,
	SmallVectorImpl<MachineInstr *> &InsInstrs,
	SmallVectorImpl<MachineInstr *> &DelInstrs,
	unsigned IdxOpd1,
	DenseMap<unsigned, unsigned> &InstrIdxForVirtReg) {
	assert(IdxOpd1 == 1 \|\| IdxOpd1 == 2);
	unsigned IdxOtherOpd = IdxOpd1 == 1 ? 2 : 1;
	MachineInstr *AddMI = MRI.getUniqueVRegDef(Root.getOperand(2).getReg());

	Register ResultReg = Root.getOperand(0).getReg();
	Register RegA = Root.getOperand(1).getReg();
	bool RegAIsKill = Root.getOperand(1).isKill();
	Register RegB = AddMI->getOperand(IdxOpd1).getReg();
	bool RegBIsKill = AddMI->getOperand(IdxOpd1).isKill();
	Register RegC = AddMI->getOperand(IdxOtherOpd).getReg();
	bool RegCIsKill = AddMI->getOperand(IdxOtherOpd).isKill();
	Register NewVR = MRI.createVirtualRegister(MRI.getRegClass(RegA));

	unsigned Opcode = Root.getOpcode();
	if (Opcode == AArch64::SUBSWrr)
	Opcode = AArch64::SUBWrr;
	else if (Opcode == AArch64::SUBSXrr)
	Opcode = AArch64::SUBXrr;
	else
	assert((Opcode == AArch64::SUBWrr \|\| Opcode == AArch64::SUBXrr) &&
	"Unexpected instruction opcode.");

	uint32_t Flags = Root.mergeFlagsWith(*AddMI);
	Flags &= ~MachineInstr::NoSWrap;
	Flags &= ~MachineInstr::NoUWrap;

	MachineInstrBuilder MIB1 =
	BuildMI(MF, MIMetadata(Root), TII->get(Opcode), NewVR)
	.addReg(RegA, getKillRegState(RegAIsKill))
	.addReg(RegB, getKillRegState(RegBIsKill))
	.setMIFlags(Flags);
	MachineInstrBuilder MIB2 =
	BuildMI(MF, MIMetadata(Root), TII->get(Opcode), ResultReg)
	.addReg(NewVR, getKillRegState(true))
	.addReg(RegC, getKillRegState(RegCIsKill))
	.setMIFlags(Flags);

	InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
	InsInstrs.push_back(MIB1);
	InsInstrs.push_back(MIB2);
	DelInstrs.push_back(AddMI);
	DelInstrs.push_back(&Root);
	}

	/// When getMachineCombinerPatterns() finds potential patterns,
	/// this function generates the instructions that could replace the
	/// original code sequence
	void AArch64InstrInfo::genAlternativeCodeSequence(
	MachineInstr &Root, unsigned Pattern,
	SmallVectorImpl<MachineInstr *> &InsInstrs,
	SmallVectorImpl<MachineInstr *> &DelInstrs,
	DenseMap<unsigned, unsigned> &InstrIdxForVirtReg) const {
	MachineBasicBlock &MBB = *Root.getParent();
	MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
	MachineFunction &MF = *MBB.getParent();
	const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();

	MachineInstr *MUL = nullptr;
	const TargetRegisterClass *RC;
	unsigned Opc;
	switch (Pattern) {
	default:
	// Reassociate instructions.
	TargetInstrInfo::genAlternativeCodeSequence(Root, Pattern, InsInstrs,
	DelInstrs, InstrIdxForVirtReg);
	return;
	case AArch64MachineCombinerPattern::SUBADD_OP1:
	// A - (B + C)
	// ==> (A - B) - C
	genSubAdd2SubSub(MF, MRI, TII, Root, InsInstrs, DelInstrs, 1,
	InstrIdxForVirtReg);
	return;
	case AArch64MachineCombinerPattern::SUBADD_OP2:
	// A - (B + C)
	// ==> (A - C) - B
	genSubAdd2SubSub(MF, MRI, TII, Root, InsInstrs, DelInstrs, 2,
	InstrIdxForVirtReg);
	return;
	case AArch64MachineCombinerPattern::MULADDW_OP1:
	case AArch64MachineCombinerPattern::MULADDX_OP1:
	// MUL I=A,B,0
	// ADD R,I,C
	// ==> MADD R,A,B,C
	// --- Create(MADD);
	if (Pattern == AArch64MachineCombinerPattern::MULADDW_OP1) {
	Opc = AArch64::MADDWrrr;
	RC = &AArch64::GPR32RegClass;
	} else {
	Opc = AArch64::MADDXrrr;
	RC = &AArch64::GPR64RegClass;
	}
	MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
	break;
	case AArch64MachineCombinerPattern::MULADDW_OP2:
	case AArch64MachineCombinerPattern::MULADDX_OP2:
	// MUL I=A,B,0
	// ADD R,C,I
	// ==> MADD R,A,B,C
	// --- Create(MADD);
	if (Pattern == AArch64MachineCombinerPattern::MULADDW_OP2) {
	Opc = AArch64::MADDWrrr;
	RC = &AArch64::GPR32RegClass;
	} else {
	Opc = AArch64::MADDXrrr;
	RC = &AArch64::GPR64RegClass;
	}
	MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
	break;
	case AArch64MachineCombinerPattern::MULADDWI_OP1:
	case AArch64MachineCombinerPattern::MULADDXI_OP1: {
	// MUL I=A,B,0
	// ADD R,I,Imm
	// ==> MOV V, Imm
	// ==> MADD R,A,B,V
	// --- Create(MADD);
	const TargetRegisterClass *OrrRC;
	unsigned BitSize, OrrOpc, ZeroReg;
	if (Pattern == AArch64MachineCombinerPattern::MULADDWI_OP1) {
	OrrOpc = AArch64::ORRWri;
	OrrRC = &AArch64::GPR32spRegClass;
	BitSize = 32;
	ZeroReg = AArch64::WZR;
	Opc = AArch64::MADDWrrr;
	RC = &AArch64::GPR32RegClass;
	} else {
	OrrOpc = AArch64::ORRXri;
	OrrRC = &AArch64::GPR64spRegClass;
	BitSize = 64;
	ZeroReg = AArch64::XZR;
	Opc = AArch64::MADDXrrr;
	RC = &AArch64::GPR64RegClass;
	}
	Register NewVR = MRI.createVirtualRegister(OrrRC);
	uint64_t Imm = Root.getOperand(2).getImm();

	if (Root.getOperand(3).isImm()) {
	unsigned Val = Root.getOperand(3).getImm();
	Imm = Imm << Val;
	}
	uint64_t UImm = SignExtend64(Imm, BitSize);
	// The immediate can be composed via a single instruction.
	SmallVector<AArch64_IMM::ImmInsnModel, 4> Insn;
	AArch64_IMM::expandMOVImm(UImm, BitSize, Insn);
	if (Insn.size() != 1)
	return;
	auto MovI = Insn.begin();
	MachineInstrBuilder MIB1;
	// MOV is an alias for one of three instructions: movz, movn, and orr.
	if (MovI->Opcode == OrrOpc)
	MIB1 = BuildMI(MF, MIMetadata(Root), TII->get(OrrOpc), NewVR)
	.addReg(ZeroReg)
	.addImm(MovI->Op2);
	else {
	if (BitSize == 32)
	assert((MovI->Opcode == AArch64::MOVNWi \|\|
	MovI->Opcode == AArch64::MOVZWi) &&
	"Expected opcode");
	else
	assert((MovI->Opcode == AArch64::MOVNXi \|\|
	MovI->Opcode == AArch64::MOVZXi) &&
	"Expected opcode");
	MIB1 = BuildMI(MF, MIMetadata(Root), TII->get(MovI->Opcode), NewVR)
	.addImm(MovI->Op1)
	.addImm(MovI->Op2);
	}
	InsInstrs.push_back(MIB1);
	InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
	MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC);
	break;
	}
	case AArch64MachineCombinerPattern::MULSUBW_OP1:
	case AArch64MachineCombinerPattern::MULSUBX_OP1: {
	// MUL I=A,B,0
	// SUB R,I, C
	// ==> SUB V, 0, C
	// ==> MADD R,A,B,V // = -C + A*B
	// --- Create(MADD);
	const TargetRegisterClass *SubRC;
	unsigned SubOpc, ZeroReg;
	if (Pattern == AArch64MachineCombinerPattern::MULSUBW_OP1) {
	SubOpc = AArch64::SUBWrr;
	SubRC = &AArch64::GPR32spRegClass;
	ZeroReg = AArch64::WZR;
	Opc = AArch64::MADDWrrr;
	RC = &AArch64::GPR32RegClass;
	} else {
	SubOpc = AArch64::SUBXrr;
	SubRC = &AArch64::GPR64spRegClass;
	ZeroReg = AArch64::XZR;
	Opc = AArch64::MADDXrrr;
	RC = &AArch64::GPR64RegClass;
	}
	Register NewVR = MRI.createVirtualRegister(SubRC);
	// SUB NewVR, 0, C
	MachineInstrBuilder MIB1 =
	BuildMI(MF, MIMetadata(Root), TII->get(SubOpc), NewVR)
	.addReg(ZeroReg)
	.add(Root.getOperand(2));
	InsInstrs.push_back(MIB1);
	InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
	MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC);
	break;
	}
	case AArch64MachineCombinerPattern::MULSUBW_OP2:
	case AArch64MachineCombinerPattern::MULSUBX_OP2:
	// MUL I=A,B,0
	// SUB R,C,I
	// ==> MSUB R,A,B,C (computes C - A*B)
	// --- Create(MSUB);
	if (Pattern == AArch64MachineCombinerPattern::MULSUBW_OP2) {
	Opc = AArch64::MSUBWrrr;
	RC = &AArch64::GPR32RegClass;
	} else {
	Opc = AArch64::MSUBXrrr;
	RC = &AArch64::GPR64RegClass;
	}
	MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
	break;
	case AArch64MachineCombinerPattern::MULSUBWI_OP1:
	case AArch64MachineCombinerPattern::MULSUBXI_OP1: {
	// MUL I=A,B,0
	// SUB R,I, Imm
	// ==> MOV V, -Imm
	// ==> MADD R,A,B,V // = -Imm + A*B
	// --- Create(MADD);
	const TargetRegisterClass *OrrRC;
	unsigned BitSize, OrrOpc, ZeroReg;
	if (Pattern == AArch64MachineCombinerPattern::MULSUBWI_OP1) {
	OrrOpc = AArch64::ORRWri;
	OrrRC = &AArch64::GPR32spRegClass;
	BitSize = 32;
	ZeroReg = AArch64::WZR;
	Opc = AArch64::MADDWrrr;
	RC = &AArch64::GPR32RegClass;
	} else {
	OrrOpc = AArch64::ORRXri;
	OrrRC = &AArch64::GPR64spRegClass;
	BitSize = 64;
	ZeroReg = AArch64::XZR;
	Opc = AArch64::MADDXrrr;
	RC = &AArch64::GPR64RegClass;
	}
	Register NewVR = MRI.createVirtualRegister(OrrRC);
	uint64_t Imm = Root.getOperand(2).getImm();
	if (Root.getOperand(3).isImm()) {
	unsigned Val = Root.getOperand(3).getImm();
	Imm = Imm << Val;
	}
	uint64_t UImm = SignExtend64(-Imm, BitSize);
	// The immediate can be composed via a single instruction.
	SmallVector<AArch64_IMM::ImmInsnModel, 4> Insn;
	AArch64_IMM::expandMOVImm(UImm, BitSize, Insn);
	if (Insn.size() != 1)
	return;
	auto MovI = Insn.begin();
	MachineInstrBuilder MIB1;
	// MOV is an alias for one of three instructions: movz, movn, and orr.
	if (MovI->Opcode == OrrOpc)
	MIB1 = BuildMI(MF, MIMetadata(Root), TII->get(OrrOpc), NewVR)
	.addReg(ZeroReg)
	.addImm(MovI->Op2);
	else {
	if (BitSize == 32)
	assert((MovI->Opcode == AArch64::MOVNWi \|\|
	MovI->Opcode == AArch64::MOVZWi) &&
	"Expected opcode");
	else
	assert((MovI->Opcode == AArch64::MOVNXi \|\|
	MovI->Opcode == AArch64::MOVZXi) &&
	"Expected opcode");
	MIB1 = BuildMI(MF, MIMetadata(Root), TII->get(MovI->Opcode), NewVR)
	.addImm(MovI->Op1)
	.addImm(MovI->Op2);
	}
	InsInstrs.push_back(MIB1);
	InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
	MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC);
	break;
	}

	case AArch64MachineCombinerPattern::MULADDv8i8_OP1:
	Opc = AArch64::MLAv8i8;
	RC = &AArch64::FPR64RegClass;
	MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
	break;
	case AArch64MachineCombinerPattern::MULADDv8i8_OP2:
	Opc = AArch64::MLAv8i8;
	RC = &AArch64::FPR64RegClass;
	MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
	break;
	case AArch64MachineCombinerPattern::MULADDv16i8_OP1:
	Opc = AArch64::MLAv16i8;
	RC = &AArch64::FPR128RegClass;
	MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
	break;
	case AArch64MachineCombinerPattern::MULADDv16i8_OP2:
	Opc = AArch64::MLAv16i8;
	RC = &AArch64::FPR128RegClass;
	MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
	break;
	case AArch64MachineCombinerPattern::MULADDv4i16_OP1:
	Opc = AArch64::MLAv4i16;
	RC = &AArch64::FPR64RegClass;
	MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
	break;
	case AArch64MachineCombinerPattern::MULADDv4i16_OP2:
	Opc = AArch64::MLAv4i16;
	RC = &AArch64::FPR64RegClass;
	MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
	break;
	case AArch64MachineCombinerPattern::MULADDv8i16_OP1:
	Opc = AArch64::MLAv8i16;
	RC = &AArch64::FPR128RegClass;
	MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
	break;
	case AArch64MachineCombinerPattern::MULADDv8i16_OP2:
	Opc = AArch64::MLAv8i16;
	RC = &AArch64::FPR128RegClass;
	MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
	break;
	case AArch64MachineCombinerPattern::MULADDv2i32_OP1:
	Opc = AArch64::MLAv2i32;
	RC = &AArch64::FPR64RegClass;
	MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
	break;
	case AArch64MachineCombinerPattern::MULADDv2i32_OP2:
	Opc = AArch64::MLAv2i32;
	RC = &AArch64::FPR64RegClass;
	MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
	break;
	case AArch64MachineCombinerPattern::MULADDv4i32_OP1:
	Opc = AArch64::MLAv4i32;
	RC = &AArch64::FPR128RegClass;
	MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
	break;
	case AArch64MachineCombinerPattern::MULADDv4i32_OP2:
	Opc = AArch64::MLAv4i32;
	RC = &AArch64::FPR128RegClass;
	MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
	break;

	case AArch64MachineCombinerPattern::MULSUBv8i8_OP1:
	Opc = AArch64::MLAv8i8;
	RC = &AArch64::FPR64RegClass;
	MUL = genFusedMultiplyAccNeg(MF, MRI, TII, Root, InsInstrs,
	InstrIdxForVirtReg, 1, Opc, AArch64::NEGv8i8,
	RC);
	break;
	case AArch64MachineCombinerPattern::MULSUBv8i8_OP2:
	Opc = AArch64::MLSv8i8;
	RC = &AArch64::FPR64RegClass;
	MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
	break;
	case AArch64MachineCombinerPattern::MULSUBv16i8_OP1:
	Opc = AArch64::MLAv16i8;
	RC = &AArch64::FPR128RegClass;
	MUL = genFusedMultiplyAccNeg(MF, MRI, TII, Root, InsInstrs,
	InstrIdxForVirtReg, 1, Opc, AArch64::NEGv16i8,
	RC);
	break;
	case AArch64MachineCombinerPattern::MULSUBv16i8_OP2:
	Opc = AArch64::MLSv16i8;
	RC = &AArch64::FPR128RegClass;
	MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
	break;
	case AArch64MachineCombinerPattern::MULSUBv4i16_OP1:
	Opc = AArch64::MLAv4i16;
	RC = &AArch64::FPR64RegClass;
	MUL = genFusedMultiplyAccNeg(MF, MRI, TII, Root, InsInstrs,
	InstrIdxForVirtReg, 1, Opc, AArch64::NEGv4i16,
	RC);
	break;
	case AArch64MachineCombinerPattern::MULSUBv4i16_OP2:
	Opc = AArch64::MLSv4i16;
	RC = &AArch64::FPR64RegClass;
	MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
	break;
	case AArch64MachineCombinerPattern::MULSUBv8i16_OP1:
	Opc = AArch64::MLAv8i16;
	RC = &AArch64::FPR128RegClass;
	MUL = genFusedMultiplyAccNeg(MF, MRI, TII, Root, InsInstrs,
	InstrIdxForVirtReg, 1, Opc, AArch64::NEGv8i16,
	RC);
	break;
	case AArch64MachineCombinerPattern::MULSUBv8i16_OP2:
	Opc = AArch64::MLSv8i16;
	RC = &AArch64::FPR128RegClass;
	MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
	break;
	case AArch64MachineCombinerPattern::MULSUBv2i32_OP1:
	Opc = AArch64::MLAv2i32;
	RC = &AArch64::FPR64RegClass;
	MUL = genFusedMultiplyAccNeg(MF, MRI, TII, Root, InsInstrs,
	InstrIdxForVirtReg, 1, Opc, AArch64::NEGv2i32,
	RC);
	break;
	case AArch64MachineCombinerPattern::MULSUBv2i32_OP2:
	Opc = AArch64::MLSv2i32;
	RC = &AArch64::FPR64RegClass;
	MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
	break;
	case AArch64MachineCombinerPattern::MULSUBv4i32_OP1:
	Opc = AArch64::MLAv4i32;
	RC = &AArch64::FPR128RegClass;
	MUL = genFusedMultiplyAccNeg(MF, MRI, TII, Root, InsInstrs,
	InstrIdxForVirtReg, 1, Opc, AArch64::NEGv4i32,
	RC);
	break;
	case AArch64MachineCombinerPattern::MULSUBv4i32_OP2:
	Opc = AArch64::MLSv4i32;
	RC = &AArch64::FPR128RegClass;
	MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
	break;

	case AArch64MachineCombinerPattern::MULADDv4i16_indexed_OP1:
	Opc = AArch64::MLAv4i16_indexed;
	RC = &AArch64::FPR64RegClass;
	MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
	break;
	case AArch64MachineCombinerPattern::MULADDv4i16_indexed_OP2:
	Opc = AArch64::MLAv4i16_indexed;
	RC = &AArch64::FPR64RegClass;
	MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
	break;
	case AArch64MachineCombinerPattern::MULADDv8i16_indexed_OP1:
	Opc = AArch64::MLAv8i16_indexed;
	RC = &AArch64::FPR128RegClass;
	MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
	break;
	case AArch64MachineCombinerPattern::MULADDv8i16_indexed_OP2:
	Opc = AArch64::MLAv8i16_indexed;
	RC = &AArch64::FPR128RegClass;
	MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
	break;
	case AArch64MachineCombinerPattern::MULADDv2i32_indexed_OP1:
	Opc = AArch64::MLAv2i32_indexed;
	RC = &AArch64::FPR64RegClass;
	MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
	break;
	case AArch64MachineCombinerPattern::MULADDv2i32_indexed_OP2:
	Opc = AArch64::MLAv2i32_indexed;
	RC = &AArch64::FPR64RegClass;
	MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
	break;
	case AArch64MachineCombinerPattern::MULADDv4i32_indexed_OP1:
	Opc = AArch64::MLAv4i32_indexed;
	RC = &AArch64::FPR128RegClass;
	MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
	break;
	case AArch64MachineCombinerPattern::MULADDv4i32_indexed_OP2:
	Opc = AArch64::MLAv4i32_indexed;
	RC = &AArch64::FPR128RegClass;
	MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
	break;

	case AArch64MachineCombinerPattern::MULSUBv4i16_indexed_OP1:
	Opc = AArch64::MLAv4i16_indexed;
	RC = &AArch64::FPR64RegClass;
	MUL = genFusedMultiplyIdxNeg(MF, MRI, TII, Root, InsInstrs,
	InstrIdxForVirtReg, 1, Opc, AArch64::NEGv4i16,
	RC);
	break;
	case AArch64MachineCombinerPattern::MULSUBv4i16_indexed_OP2:
	Opc = AArch64::MLSv4i16_indexed;
	RC = &AArch64::FPR64RegClass;
	MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
	break;
	case AArch64MachineCombinerPattern::MULSUBv8i16_indexed_OP1:
	Opc = AArch64::MLAv8i16_indexed;
	RC = &AArch64::FPR128RegClass;
	MUL = genFusedMultiplyIdxNeg(MF, MRI, TII, Root, InsInstrs,
	InstrIdxForVirtReg, 1, Opc, AArch64::NEGv8i16,
	RC);
	break;
	case AArch64MachineCombinerPattern::MULSUBv8i16_indexed_OP2:
	Opc = AArch64::MLSv8i16_indexed;
	RC = &AArch64::FPR128RegClass;
	MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
	break;
	case AArch64MachineCombinerPattern::MULSUBv2i32_indexed_OP1:
	Opc = AArch64::MLAv2i32_indexed;
	RC = &AArch64::FPR64RegClass;
	MUL = genFusedMultiplyIdxNeg(MF, MRI, TII, Root, InsInstrs,
	InstrIdxForVirtReg, 1, Opc, AArch64::NEGv2i32,
	RC);
	break;
	case AArch64MachineCombinerPattern::MULSUBv2i32_indexed_OP2:
	Opc = AArch64::MLSv2i32_indexed;
	RC = &AArch64::FPR64RegClass;
	MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
	break;
	case AArch64MachineCombinerPattern::MULSUBv4i32_indexed_OP1:
	Opc = AArch64::MLAv4i32_indexed;
	RC = &AArch64::FPR128RegClass;
	MUL = genFusedMultiplyIdxNeg(MF, MRI, TII, Root, InsInstrs,
	InstrIdxForVirtReg, 1, Opc, AArch64::NEGv4i32,
	RC);
	break;
	case AArch64MachineCombinerPattern::MULSUBv4i32_indexed_OP2:
	Opc = AArch64::MLSv4i32_indexed;
	RC = &AArch64::FPR128RegClass;
	MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
	break;

	// Floating Point Support
	case AArch64MachineCombinerPattern::FMULADDH_OP1:
	Opc = AArch64::FMADDHrrr;
	RC = &AArch64::FPR16RegClass;
	MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
	break;
	case AArch64MachineCombinerPattern::FMULADDS_OP1:
	Opc = AArch64::FMADDSrrr;
	RC = &AArch64::FPR32RegClass;
	MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
	break;
	case AArch64MachineCombinerPattern::FMULADDD_OP1:
	Opc = AArch64::FMADDDrrr;
	RC = &AArch64::FPR64RegClass;
	MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
	break;

	case AArch64MachineCombinerPattern::FMULADDH_OP2:
	Opc = AArch64::FMADDHrrr;
	RC = &AArch64::FPR16RegClass;
	MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
	break;
	case AArch64MachineCombinerPattern::FMULADDS_OP2:
	Opc = AArch64::FMADDSrrr;
	RC = &AArch64::FPR32RegClass;
	MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
	break;
	case AArch64MachineCombinerPattern::FMULADDD_OP2:
	Opc = AArch64::FMADDDrrr;
	RC = &AArch64::FPR64RegClass;
	MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
	break;

	case AArch64MachineCombinerPattern::FMLAv1i32_indexed_OP1:
	Opc = AArch64::FMLAv1i32_indexed;
	RC = &AArch64::FPR32RegClass;
	MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
	FMAInstKind::Indexed);
	break;
	case AArch64MachineCombinerPattern::FMLAv1i32_indexed_OP2:
	Opc = AArch64::FMLAv1i32_indexed;
	RC = &AArch64::FPR32RegClass;
	MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
	FMAInstKind::Indexed);
	break;

	case AArch64MachineCombinerPattern::FMLAv1i64_indexed_OP1:
	Opc = AArch64::FMLAv1i64_indexed;
	RC = &AArch64::FPR64RegClass;
	MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
	FMAInstKind::Indexed);
	break;
	case AArch64MachineCombinerPattern::FMLAv1i64_indexed_OP2:
	Opc = AArch64::FMLAv1i64_indexed;
	RC = &AArch64::FPR64RegClass;
	MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
	FMAInstKind::Indexed);
	break;

	case AArch64MachineCombinerPattern::FMLAv4i16_indexed_OP1:
	RC = &AArch64::FPR64RegClass;
	Opc = AArch64::FMLAv4i16_indexed;
	MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
	FMAInstKind::Indexed);
	break;
	case AArch64MachineCombinerPattern::FMLAv4f16_OP1:
	RC = &AArch64::FPR64RegClass;
	Opc = AArch64::FMLAv4f16;
	MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
	FMAInstKind::Accumulator);
	break;
	case AArch64MachineCombinerPattern::FMLAv4i16_indexed_OP2:
	RC = &AArch64::FPR64RegClass;
	Opc = AArch64::FMLAv4i16_indexed;
	MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
	FMAInstKind::Indexed);
	break;
	case AArch64MachineCombinerPattern::FMLAv4f16_OP2:
	RC = &AArch64::FPR64RegClass;
	Opc = AArch64::FMLAv4f16;
	MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
	FMAInstKind::Accumulator);
	break;

	case AArch64MachineCombinerPattern::FMLAv2i32_indexed_OP1:
	case AArch64MachineCombinerPattern::FMLAv2f32_OP1:
	RC = &AArch64::FPR64RegClass;
	if (Pattern == AArch64MachineCombinerPattern::FMLAv2i32_indexed_OP1) {
	Opc = AArch64::FMLAv2i32_indexed;
	MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
	FMAInstKind::Indexed);
	} else {
	Opc = AArch64::FMLAv2f32;
	MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
	FMAInstKind::Accumulator);
	}
	break;
	case AArch64MachineCombinerPattern::FMLAv2i32_indexed_OP2:
	case AArch64MachineCombinerPattern::FMLAv2f32_OP2:
	RC = &AArch64::FPR64RegClass;
	if (Pattern == AArch64MachineCombinerPattern::FMLAv2i32_indexed_OP2) {
	Opc = AArch64::FMLAv2i32_indexed;
	MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
	FMAInstKind::Indexed);
	} else {
	Opc = AArch64::FMLAv2f32;
	MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
	FMAInstKind::Accumulator);
	}
	break;

	case AArch64MachineCombinerPattern::FMLAv8i16_indexed_OP1:
	RC = &AArch64::FPR128RegClass;
	Opc = AArch64::FMLAv8i16_indexed;
	MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
	FMAInstKind::Indexed);
	break;
	case AArch64MachineCombinerPattern::FMLAv8f16_OP1:
	RC = &AArch64::FPR128RegClass;
	Opc = AArch64::FMLAv8f16;
	MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
	FMAInstKind::Accumulator);
	break;
	case AArch64MachineCombinerPattern::FMLAv8i16_indexed_OP2:
	RC = &AArch64::FPR128RegClass;
	Opc = AArch64::FMLAv8i16_indexed;
	MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
	FMAInstKind::Indexed);
	break;
	case AArch64MachineCombinerPattern::FMLAv8f16_OP2:
	RC = &AArch64::FPR128RegClass;
	Opc = AArch64::FMLAv8f16;
	MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
	FMAInstKind::Accumulator);
	break;

	case AArch64MachineCombinerPattern::FMLAv2i64_indexed_OP1:
	case AArch64MachineCombinerPattern::FMLAv2f64_OP1:
	RC = &AArch64::FPR128RegClass;
	if (Pattern == AArch64MachineCombinerPattern::FMLAv2i64_indexed_OP1) {
	Opc = AArch64::FMLAv2i64_indexed;
	MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
	FMAInstKind::Indexed);
	} else {
	Opc = AArch64::FMLAv2f64;
	MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
	FMAInstKind::Accumulator);
	}
	break;
	case AArch64MachineCombinerPattern::FMLAv2i64_indexed_OP2:
	case AArch64MachineCombinerPattern::FMLAv2f64_OP2:
	RC = &AArch64::FPR128RegClass;
	if (Pattern == AArch64MachineCombinerPattern::FMLAv2i64_indexed_OP2) {
	Opc = AArch64::FMLAv2i64_indexed;
	MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
	FMAInstKind::Indexed);
	} else {
	Opc = AArch64::FMLAv2f64;
	MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
	FMAInstKind::Accumulator);
	}
	break;

	case AArch64MachineCombinerPattern::FMLAv4i32_indexed_OP1:
	case AArch64MachineCombinerPattern::FMLAv4f32_OP1:
	RC = &AArch64::FPR128RegClass;
	if (Pattern == AArch64MachineCombinerPattern::FMLAv4i32_indexed_OP1) {
	Opc = AArch64::FMLAv4i32_indexed;
	MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
	FMAInstKind::Indexed);
	} else {
	Opc = AArch64::FMLAv4f32;
	MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
	FMAInstKind::Accumulator);
	}
	break;

	case AArch64MachineCombinerPattern::FMLAv4i32_indexed_OP2:
	case AArch64MachineCombinerPattern::FMLAv4f32_OP2:
	RC = &AArch64::FPR128RegClass;
	if (Pattern == AArch64MachineCombinerPattern::FMLAv4i32_indexed_OP2) {
	Opc = AArch64::FMLAv4i32_indexed;
	MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
	FMAInstKind::Indexed);
	} else {
	Opc = AArch64::FMLAv4f32;
	MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
	FMAInstKind::Accumulator);
	}
	break;

	case AArch64MachineCombinerPattern::FMULSUBH_OP1:
	Opc = AArch64::FNMSUBHrrr;
	RC = &AArch64::FPR16RegClass;
	MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
	break;
	case AArch64MachineCombinerPattern::FMULSUBS_OP1:
	Opc = AArch64::FNMSUBSrrr;
	RC = &AArch64::FPR32RegClass;
	MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
	break;
	case AArch64MachineCombinerPattern::FMULSUBD_OP1:
	Opc = AArch64::FNMSUBDrrr;
	RC = &AArch64::FPR64RegClass;
	MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
	break;

	case AArch64MachineCombinerPattern::FNMULSUBH_OP1:
	Opc = AArch64::FNMADDHrrr;
	RC = &AArch64::FPR16RegClass;
	MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
	break;
	case AArch64MachineCombinerPattern::FNMULSUBS_OP1:
	Opc = AArch64::FNMADDSrrr;
	RC = &AArch64::FPR32RegClass;
	MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
	break;
	case AArch64MachineCombinerPattern::FNMULSUBD_OP1:
	Opc = AArch64::FNMADDDrrr;
	RC = &AArch64::FPR64RegClass;
	MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
	break;

	case AArch64MachineCombinerPattern::FMULSUBH_OP2:
	Opc = AArch64::FMSUBHrrr;
	RC = &AArch64::FPR16RegClass;
	MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
	break;
	case AArch64MachineCombinerPattern::FMULSUBS_OP2:
	Opc = AArch64::FMSUBSrrr;
	RC = &AArch64::FPR32RegClass;
	MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
	break;
	case AArch64MachineCombinerPattern::FMULSUBD_OP2:
	Opc = AArch64::FMSUBDrrr;
	RC = &AArch64::FPR64RegClass;
	MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
	break;

	case AArch64MachineCombinerPattern::FMLSv1i32_indexed_OP2:
	Opc = AArch64::FMLSv1i32_indexed;
	RC = &AArch64::FPR32RegClass;
	MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
	FMAInstKind::Indexed);
	break;

	case AArch64MachineCombinerPattern::FMLSv1i64_indexed_OP2:
	Opc = AArch64::FMLSv1i64_indexed;
	RC = &AArch64::FPR64RegClass;
	MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
	FMAInstKind::Indexed);
	break;

	case AArch64MachineCombinerPattern::FMLSv4f16_OP1:
	case AArch64MachineCombinerPattern::FMLSv4i16_indexed_OP1: {
	RC = &AArch64::FPR64RegClass;
	Register NewVR = MRI.createVirtualRegister(RC);
	MachineInstrBuilder MIB1 =
	BuildMI(MF, MIMetadata(Root), TII->get(AArch64::FNEGv4f16), NewVR)
	.add(Root.getOperand(2));
	InsInstrs.push_back(MIB1);
	InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
	if (Pattern == AArch64MachineCombinerPattern::FMLSv4f16_OP1) {
	Opc = AArch64::FMLAv4f16;
	MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
	FMAInstKind::Accumulator, &NewVR);
	} else {
	Opc = AArch64::FMLAv4i16_indexed;
	MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
	FMAInstKind::Indexed, &NewVR);
	}
	break;
	}
	case AArch64MachineCombinerPattern::FMLSv4f16_OP2:
	RC = &AArch64::FPR64RegClass;
	Opc = AArch64::FMLSv4f16;
	MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
	FMAInstKind::Accumulator);
	break;
	case AArch64MachineCombinerPattern::FMLSv4i16_indexed_OP2:
	RC = &AArch64::FPR64RegClass;
	Opc = AArch64::FMLSv4i16_indexed;
	MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
	FMAInstKind::Indexed);
	break;

	case AArch64MachineCombinerPattern::FMLSv2f32_OP2:
	case AArch64MachineCombinerPattern::FMLSv2i32_indexed_OP2:
	RC = &AArch64::FPR64RegClass;
	if (Pattern == AArch64MachineCombinerPattern::FMLSv2i32_indexed_OP2) {
	Opc = AArch64::FMLSv2i32_indexed;
	MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
	FMAInstKind::Indexed);
	} else {
	Opc = AArch64::FMLSv2f32;
	MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
	FMAInstKind::Accumulator);
	}
	break;

	case AArch64MachineCombinerPattern::FMLSv8f16_OP1:
	case AArch64MachineCombinerPattern::FMLSv8i16_indexed_OP1: {
	RC = &AArch64::FPR128RegClass;
	Register NewVR = MRI.createVirtualRegister(RC);
	MachineInstrBuilder MIB1 =
	BuildMI(MF, MIMetadata(Root), TII->get(AArch64::FNEGv8f16), NewVR)
	.add(Root.getOperand(2));
	InsInstrs.push_back(MIB1);
	InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
	if (Pattern == AArch64MachineCombinerPattern::FMLSv8f16_OP1) {
	Opc = AArch64::FMLAv8f16;
	MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
	FMAInstKind::Accumulator, &NewVR);
	} else {
	Opc = AArch64::FMLAv8i16_indexed;
	MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
	FMAInstKind::Indexed, &NewVR);
	}
	break;
	}
	case AArch64MachineCombinerPattern::FMLSv8f16_OP2:
	RC = &AArch64::FPR128RegClass;
	Opc = AArch64::FMLSv8f16;
	MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
	FMAInstKind::Accumulator);
	break;
	case AArch64MachineCombinerPattern::FMLSv8i16_indexed_OP2:
	RC = &AArch64::FPR128RegClass;
	Opc = AArch64::FMLSv8i16_indexed;
	MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
	FMAInstKind::Indexed);
	break;

	case AArch64MachineCombinerPattern::FMLSv2f64_OP2:
	case AArch64MachineCombinerPattern::FMLSv2i64_indexed_OP2:
	RC = &AArch64::FPR128RegClass;
	if (Pattern == AArch64MachineCombinerPattern::FMLSv2i64_indexed_OP2) {
	Opc = AArch64::FMLSv2i64_indexed;
	MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
	FMAInstKind::Indexed);
	} else {
	Opc = AArch64::FMLSv2f64;
	MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
	FMAInstKind::Accumulator);
	}
	break;

	case AArch64MachineCombinerPattern::FMLSv4f32_OP2:
	case AArch64MachineCombinerPattern::FMLSv4i32_indexed_OP2:
	RC = &AArch64::FPR128RegClass;
	if (Pattern == AArch64MachineCombinerPattern::FMLSv4i32_indexed_OP2) {
	Opc = AArch64::FMLSv4i32_indexed;
	MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
	FMAInstKind::Indexed);
	} else {
	Opc = AArch64::FMLSv4f32;
	MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
	FMAInstKind::Accumulator);
	}
	break;
	case AArch64MachineCombinerPattern::FMLSv2f32_OP1:
	case AArch64MachineCombinerPattern::FMLSv2i32_indexed_OP1: {
	RC = &AArch64::FPR64RegClass;
	Register NewVR = MRI.createVirtualRegister(RC);
	MachineInstrBuilder MIB1 =
	BuildMI(MF, MIMetadata(Root), TII->get(AArch64::FNEGv2f32), NewVR)
	.add(Root.getOperand(2));
	InsInstrs.push_back(MIB1);
	InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
	if (Pattern == AArch64MachineCombinerPattern::FMLSv2i32_indexed_OP1) {
	Opc = AArch64::FMLAv2i32_indexed;
	MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
	FMAInstKind::Indexed, &NewVR);
	} else {
	Opc = AArch64::FMLAv2f32;
	MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
	FMAInstKind::Accumulator, &NewVR);
	}
	break;
	}
	case AArch64MachineCombinerPattern::FMLSv4f32_OP1:
	case AArch64MachineCombinerPattern::FMLSv4i32_indexed_OP1: {
	RC = &AArch64::FPR128RegClass;
	Register NewVR = MRI.createVirtualRegister(RC);
	MachineInstrBuilder MIB1 =
	BuildMI(MF, MIMetadata(Root), TII->get(AArch64::FNEGv4f32), NewVR)
	.add(Root.getOperand(2));
	InsInstrs.push_back(MIB1);
	InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
	if (Pattern == AArch64MachineCombinerPattern::FMLSv4i32_indexed_OP1) {
	Opc = AArch64::FMLAv4i32_indexed;
	MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
	FMAInstKind::Indexed, &NewVR);
	} else {
	Opc = AArch64::FMLAv4f32;
	MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
	FMAInstKind::Accumulator, &NewVR);
	}
	break;
	}
	case AArch64MachineCombinerPattern::FMLSv2f64_OP1:
	case AArch64MachineCombinerPattern::FMLSv2i64_indexed_OP1: {
	RC = &AArch64::FPR128RegClass;
	Register NewVR = MRI.createVirtualRegister(RC);
	MachineInstrBuilder MIB1 =
	BuildMI(MF, MIMetadata(Root), TII->get(AArch64::FNEGv2f64), NewVR)
	.add(Root.getOperand(2));
	InsInstrs.push_back(MIB1);
	InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
	if (Pattern == AArch64MachineCombinerPattern::FMLSv2i64_indexed_OP1) {
	Opc = AArch64::FMLAv2i64_indexed;
	MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
	FMAInstKind::Indexed, &NewVR);
	} else {
	Opc = AArch64::FMLAv2f64;
	MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
	FMAInstKind::Accumulator, &NewVR);
	}
	break;
	}
	case AArch64MachineCombinerPattern::FMULv2i32_indexed_OP1:
	case AArch64MachineCombinerPattern::FMULv2i32_indexed_OP2: {
	unsigned IdxDupOp =
	(Pattern == AArch64MachineCombinerPattern::FMULv2i32_indexed_OP1) ? 1
	: 2;
	genIndexedMultiply(Root, InsInstrs, IdxDupOp, AArch64::FMULv2i32_indexed,
	&AArch64::FPR128RegClass, MRI);
	break;
	}
	case AArch64MachineCombinerPattern::FMULv2i64_indexed_OP1:
	case AArch64MachineCombinerPattern::FMULv2i64_indexed_OP2: {
	unsigned IdxDupOp =
	(Pattern == AArch64MachineCombinerPattern::FMULv2i64_indexed_OP1) ? 1
	: 2;
	genIndexedMultiply(Root, InsInstrs, IdxDupOp, AArch64::FMULv2i64_indexed,
	&AArch64::FPR128RegClass, MRI);
	break;
	}
	case AArch64MachineCombinerPattern::FMULv4i16_indexed_OP1:
	case AArch64MachineCombinerPattern::FMULv4i16_indexed_OP2: {
	unsigned IdxDupOp =
	(Pattern == AArch64MachineCombinerPattern::FMULv4i16_indexed_OP1) ? 1
	: 2;
	genIndexedMultiply(Root, InsInstrs, IdxDupOp, AArch64::FMULv4i16_indexed,
	&AArch64::FPR128_loRegClass, MRI);
	break;
	}
	case AArch64MachineCombinerPattern::FMULv4i32_indexed_OP1:
	case AArch64MachineCombinerPattern::FMULv4i32_indexed_OP2: {
	unsigned IdxDupOp =
	(Pattern == AArch64MachineCombinerPattern::FMULv4i32_indexed_OP1) ? 1
	: 2;
	genIndexedMultiply(Root, InsInstrs, IdxDupOp, AArch64::FMULv4i32_indexed,
	&AArch64::FPR128RegClass, MRI);
	break;
	}
	case AArch64MachineCombinerPattern::FMULv8i16_indexed_OP1:
	case AArch64MachineCombinerPattern::FMULv8i16_indexed_OP2: {
	unsigned IdxDupOp =
	(Pattern == AArch64MachineCombinerPattern::FMULv8i16_indexed_OP1) ? 1
	: 2;
	genIndexedMultiply(Root, InsInstrs, IdxDupOp, AArch64::FMULv8i16_indexed,
	&AArch64::FPR128_loRegClass, MRI);
	break;
	}
	case AArch64MachineCombinerPattern::FNMADD: {
	MUL = genFNegatedMAD(MF, MRI, TII, Root, InsInstrs);
	break;
	}

	} // end switch (Pattern)
	// Record MUL and ADD/SUB for deletion
	if (MUL)
	DelInstrs.push_back(MUL);
	DelInstrs.push_back(&Root);

	// Set the flags on the inserted instructions to be the merged flags of the
	// instructions that we have combined.
	uint32_t Flags = Root.getFlags();
	if (MUL)
	Flags = Root.mergeFlagsWith(*MUL);
	for (auto *MI : InsInstrs)
	MI->setFlags(Flags);
	}

	/// Replace csincr-branch sequence by simple conditional branch
	///
	/// Examples:
	/// 1. \code
	/// csinc w9, wzr, wzr, <condition code>
	/// tbnz w9, #0, 0x44
	/// \endcode
	/// to
	/// \code
	/// b.<inverted condition code>
	/// \endcode
	///
	/// 2. \code
	/// csinc w9, wzr, wzr, <condition code>
	/// tbz w9, #0, 0x44
	/// \endcode
	/// to
	/// \code
	/// b.<condition code>
	/// \endcode
	///
	/// Replace compare and branch sequence by TBZ/TBNZ instruction when the
	/// compare's constant operand is power of 2.
	///
	/// Examples:
	/// \code
	/// and w8, w8, #0x400
	/// cbnz w8, L1
	/// \endcode
	/// to
	/// \code
	/// tbnz w8, #10, L1
	/// \endcode
	///
	/// \param MI Conditional Branch
	/// \return True when the simple conditional branch is generated
	///
	bool AArch64InstrInfo::optimizeCondBranch(MachineInstr &MI) const {
	bool IsNegativeBranch = false;
	bool IsTestAndBranch = false;
	unsigned TargetBBInMI = 0;
	switch (MI.getOpcode()) {
	default:
	llvm_unreachable("Unknown branch instruction?");
	case AArch64::Bcc:
	return false;
	case AArch64::CBZW:
	case AArch64::CBZX:
	TargetBBInMI = 1;
	break;
	case AArch64::CBNZW:
	case AArch64::CBNZX:
	TargetBBInMI = 1;
	IsNegativeBranch = true;
	break;
	case AArch64::TBZW:
	case AArch64::TBZX:
	TargetBBInMI = 2;
	IsTestAndBranch = true;
	break;
	case AArch64::TBNZW:
	case AArch64::TBNZX:
	TargetBBInMI = 2;
	IsNegativeBranch = true;
	IsTestAndBranch = true;
	break;
	}
	// So we increment a zero register and test for bits other
	// than bit 0? Conservatively bail out in case the verifier
	// missed this case.
	if (IsTestAndBranch && MI.getOperand(1).getImm())
	return false;

	// Find Definition.
	assert(MI.getParent() && "Incomplete machine instruciton\n");
	MachineBasicBlock *MBB = MI.getParent();
	MachineFunction *MF = MBB->getParent();
	MachineRegisterInfo *MRI = &MF->getRegInfo();
	Register VReg = MI.getOperand(0).getReg();
	if (!VReg.isVirtual())
	return false;

	MachineInstr *DefMI = MRI->getVRegDef(VReg);

	// Look through COPY instructions to find definition.
	while (DefMI->isCopy()) {
	Register CopyVReg = DefMI->getOperand(1).getReg();
	if (!MRI->hasOneNonDBGUse(CopyVReg))
	return false;
	if (!MRI->hasOneDef(CopyVReg))
	return false;
	DefMI = MRI->getVRegDef(CopyVReg);
	}

	switch (DefMI->getOpcode()) {
	default:
	return false;
	// Fold AND into a TBZ/TBNZ if constant operand is power of 2.
	case AArch64::ANDWri:
	case AArch64::ANDXri: {
	if (IsTestAndBranch)
	return false;
	if (DefMI->getParent() != MBB)
	return false;
	if (!MRI->hasOneNonDBGUse(VReg))
	return false;

	bool Is32Bit = (DefMI->getOpcode() == AArch64::ANDWri);
	uint64_t Mask = AArch64_AM::decodeLogicalImmediate(
	DefMI->getOperand(2).getImm(), Is32Bit ? 32 : 64);
	if (!isPowerOf2_64(Mask))
	return false;

	MachineOperand &MO = DefMI->getOperand(1);
	Register NewReg = MO.getReg();
	if (!NewReg.isVirtual())
	return false;

	assert(!MRI->def_empty(NewReg) && "Register must be defined.");

	MachineBasicBlock &RefToMBB = *MBB;
	MachineBasicBlock *TBB = MI.getOperand(1).getMBB();
	DebugLoc DL = MI.getDebugLoc();
	unsigned Imm = Log2_64(Mask);
	unsigned Opc = (Imm < 32)
	? (IsNegativeBranch ? AArch64::TBNZW : AArch64::TBZW)
	: (IsNegativeBranch ? AArch64::TBNZX : AArch64::TBZX);
	MachineInstr *NewMI = BuildMI(RefToMBB, MI, DL, get(Opc))
	.addReg(NewReg)
	.addImm(Imm)
	.addMBB(TBB);
	// Register lives on to the CBZ now.
	MO.setIsKill(false);

	// For immediate smaller than 32, we need to use the 32-bit
	// variant (W) in all cases. Indeed the 64-bit variant does not
	// allow to encode them.
	// Therefore, if the input register is 64-bit, we need to take the
	// 32-bit sub-part.
	if (!Is32Bit && Imm < 32)
	NewMI->getOperand(0).setSubReg(AArch64::sub_32);
	MI.eraseFromParent();
	return true;
	}
	// Look for CSINC
	case AArch64::CSINCWr:
	case AArch64::CSINCXr: {
	if (!(DefMI->getOperand(1).getReg() == AArch64::WZR &&
	DefMI->getOperand(2).getReg() == AArch64::WZR) &&
	!(DefMI->getOperand(1).getReg() == AArch64::XZR &&
	DefMI->getOperand(2).getReg() == AArch64::XZR))
	return false;

	if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, /TRI=/nullptr,
	true) != -1)
	return false;

	AArch64CC::CondCode CC = (AArch64CC::CondCode)DefMI->getOperand(3).getImm();
	// Convert only when the condition code is not modified between
	// the CSINC and the branch. The CC may be used by other
	// instructions in between.
	if (areCFlagsAccessedBetweenInstrs(DefMI, MI, &getRegisterInfo(), AK_Write))
	return false;
	MachineBasicBlock &RefToMBB = *MBB;
	MachineBasicBlock *TBB = MI.getOperand(TargetBBInMI).getMBB();
	DebugLoc DL = MI.getDebugLoc();
	if (IsNegativeBranch)
	CC = AArch64CC::getInvertedCondCode(CC);
	BuildMI(RefToMBB, MI, DL, get(AArch64::Bcc)).addImm(CC).addMBB(TBB);
	MI.eraseFromParent();
	return true;
	}
	}
	}

	std::pair<unsigned, unsigned>
	AArch64InstrInfo::decomposeMachineOperandsTargetFlags(unsigned TF) const {
	const unsigned Mask = AArch64II::MO_FRAGMENT;
	return std::make_pair(TF & Mask, TF & ~Mask);
	}

	ArrayRef<std::pair<unsigned, const char *>>
	AArch64InstrInfo::getSerializableDirectMachineOperandTargetFlags() const {
	using namespace AArch64II;

	static const std::pair<unsigned, const char *> TargetFlags[] = {
	{MO_PAGE, "aarch64-page"}, {MO_PAGEOFF, "aarch64-pageoff"},
	{MO_G3, "aarch64-g3"}, {MO_G2, "aarch64-g2"},
	{MO_G1, "aarch64-g1"}, {MO_G0, "aarch64-g0"},
	{MO_HI12, "aarch64-hi12"}};
	return ArrayRef(TargetFlags);
	}

	ArrayRef<std::pair<unsigned, const char *>>
	AArch64InstrInfo::getSerializableBitmaskMachineOperandTargetFlags() const {
	using namespace AArch64II;

	static const std::pair<unsigned, const char *> TargetFlags[] = {
	{MO_COFFSTUB, "aarch64-coffstub"},
	{MO_GOT, "aarch64-got"},
	{MO_NC, "aarch64-nc"},
	{MO_S, "aarch64-s"},
	{MO_TLS, "aarch64-tls"},
	{MO_DLLIMPORT, "aarch64-dllimport"},
	{MO_PREL, "aarch64-prel"},
	{MO_TAGGED, "aarch64-tagged"},
	{MO_ARM64EC_CALLMANGLE, "aarch64-arm64ec-callmangle"},
	};
	return ArrayRef(TargetFlags);
	}

	ArrayRef<std::pair<MachineMemOperand::Flags, const char *>>
	AArch64InstrInfo::getSerializableMachineMemOperandTargetFlags() const {
	static const std::pair<MachineMemOperand::Flags, const char *> TargetFlags[] =
	{{MOSuppressPair, "aarch64-suppress-pair"},
	{MOStridedAccess, "aarch64-strided-access"}};
	return ArrayRef(TargetFlags);
	}

	/// Constants defining how certain sequences should be outlined.
	/// This encompasses how an outlined function should be called, and what kind of
	/// frame should be emitted for that outlined function.
	///
	/// \p MachineOutlinerDefault implies that the function should be called with
	/// a save and restore of LR to the stack.
	///
	/// That is,
	///
	/// I1 Save LR OUTLINED_FUNCTION:
	/// I2 --> BL OUTLINED_FUNCTION I1
	/// I3 Restore LR I2
	/// I3
	/// RET
	///
	/// * Call construction overhead: 3 (save + BL + restore)
	/// * Frame construction overhead: 1 (ret)
	/// * Requires stack fixups? Yes
	///
	/// \p MachineOutlinerTailCall implies that the function is being created from
	/// a sequence of instructions ending in a return.
	///
	/// That is,
	///
	/// I1 OUTLINED_FUNCTION:
	/// I2 --> B OUTLINED_FUNCTION I1
	/// RET I2
	/// RET
	///
	/// * Call construction overhead: 1 (B)
	/// * Frame construction overhead: 0 (Return included in sequence)
	/// * Requires stack fixups? No
	///
	/// \p MachineOutlinerNoLRSave implies that the function should be called using
	/// a BL instruction, but doesn't require LR to be saved and restored. This
	/// happens when LR is known to be dead.
	///
	/// That is,
	///
	/// I1 OUTLINED_FUNCTION:
	/// I2 --> BL OUTLINED_FUNCTION I1
	/// I3 I2
	/// I3
	/// RET
	///
	/// * Call construction overhead: 1 (BL)
	/// * Frame construction overhead: 1 (RET)
	/// * Requires stack fixups? No
	///
	/// \p MachineOutlinerThunk implies that the function is being created from
	/// a sequence of instructions ending in a call. The outlined function is
	/// called with a BL instruction, and the outlined function tail-calls the
	/// original call destination.
	///
	/// That is,
	///
	/// I1 OUTLINED_FUNCTION:
	/// I2 --> BL OUTLINED_FUNCTION I1
	/// BL f I2
	/// B f
	/// * Call construction overhead: 1 (BL)
	/// * Frame construction overhead: 0
	/// * Requires stack fixups? No
	///
	/// \p MachineOutlinerRegSave implies that the function should be called with a
	/// save and restore of LR to an available register. This allows us to avoid
	/// stack fixups. Note that this outlining variant is compatible with the
	/// NoLRSave case.
	///
	/// That is,
	///
	/// I1 Save LR OUTLINED_FUNCTION:
	/// I2 --> BL OUTLINED_FUNCTION I1
	/// I3 Restore LR I2
	/// I3
	/// RET
	///
	/// * Call construction overhead: 3 (save + BL + restore)
	/// * Frame construction overhead: 1 (ret)
	/// * Requires stack fixups? No
	enum MachineOutlinerClass {
	MachineOutlinerDefault, /// Emit a save, restore, call, and return.
	MachineOutlinerTailCall, /// Only emit a branch.
	MachineOutlinerNoLRSave, /// Emit a call and return.
	MachineOutlinerThunk, /// Emit a call and tail-call.
	MachineOutlinerRegSave /// Same as default, but save to a register.
	};

	enum MachineOutlinerMBBFlags {
	LRUnavailableSomewhere = 0x2,
	HasCalls = 0x4,
	UnsafeRegsDead = 0x8
	};

	Register
	AArch64InstrInfo::findRegisterToSaveLRTo(outliner::Candidate &C) const {
	MachineFunction *MF = C.getMF();
	const TargetRegisterInfo &TRI = *MF->getSubtarget().getRegisterInfo();
	const AArch64RegisterInfo *ARI =
	static_cast<const AArch64RegisterInfo *>(&TRI);
	// Check if there is an available register across the sequence that we can
	// use.
	for (unsigned Reg : AArch64::GPR64RegClass) {
	if (!ARI->isReservedReg(*MF, Reg) &&
	Reg != AArch64::LR && // LR is not reserved, but don't use it.
	Reg != AArch64::X16 && // X16 is not guaranteed to be preserved.
	Reg != AArch64::X17 && // Ditto for X17.
	C.isAvailableAcrossAndOutOfSeq(Reg, TRI) &&
	C.isAvailableInsideSeq(Reg, TRI))
	return Reg;
	}
	return Register();
	}

	static bool
	outliningCandidatesSigningScopeConsensus(const outliner::Candidate &a,
	const outliner::Candidate &b) {
	const auto &MFIa = a.getMF()->getInfo<AArch64FunctionInfo>();
	const auto &MFIb = b.getMF()->getInfo<AArch64FunctionInfo>();

	return MFIa->shouldSignReturnAddress(false) == MFIb->shouldSignReturnAddress(false) &&
	MFIa->shouldSignReturnAddress(true) == MFIb->shouldSignReturnAddress(true);
	}

	static bool
	outliningCandidatesSigningKeyConsensus(const outliner::Candidate &a,
	const outliner::Candidate &b) {
	const auto &MFIa = a.getMF()->getInfo<AArch64FunctionInfo>();
	const auto &MFIb = b.getMF()->getInfo<AArch64FunctionInfo>();

	return MFIa->shouldSignWithBKey() == MFIb->shouldSignWithBKey();
	}

	static bool outliningCandidatesV8_3OpsConsensus(const outliner::Candidate &a,
	const outliner::Candidate &b) {
	const AArch64Subtarget &SubtargetA =
	a.getMF()->getSubtarget<AArch64Subtarget>();
	const AArch64Subtarget &SubtargetB =
	b.getMF()->getSubtarget<AArch64Subtarget>();
	return SubtargetA.hasV8_3aOps() == SubtargetB.hasV8_3aOps();
	}

	std::optional<outliner::OutlinedFunction>
	AArch64InstrInfo::getOutliningCandidateInfo(
	std::vector<outliner::Candidate> &RepeatedSequenceLocs) const {
	unsigned SequenceSize = 0;
	for (auto &MI : RepeatedSequenceLocs[0])
	SequenceSize += getInstSizeInBytes(MI);

	unsigned NumBytesToCreateFrame = 0;

	// We only allow outlining for functions having exactly matching return
	// address signing attributes, i.e., all share the same value for the
	// attribute "sign-return-address" and all share the same type of key they
	// are signed with.
	// Additionally we require all functions to simultaniously either support
	// v8.3a features or not. Otherwise an outlined function could get signed
	// using dedicated v8.3 instructions and a call from a function that doesn't
	// support v8.3 instructions would therefore be invalid.
	if (std::adjacent_find(
	RepeatedSequenceLocs.begin(), RepeatedSequenceLocs.end(),
	[](const outliner::Candidate &a, const outliner::Candidate &b) {
	// Return true if a and b are non-equal w.r.t. return address
	// signing or support of v8.3a features
	if (outliningCandidatesSigningScopeConsensus(a, b) &&
	outliningCandidatesSigningKeyConsensus(a, b) &&
	outliningCandidatesV8_3OpsConsensus(a, b)) {
	return false;
	}
	return true;
	}) != RepeatedSequenceLocs.end()) {
	return std::nullopt;
	}

	// Since at this point all candidates agree on their return address signing
	// picking just one is fine. If the candidate functions potentially sign their
	// return addresses, the outlined function should do the same. Note that in
	// the case of "sign-return-address"="non-leaf" this is an assumption: It is
	// not certainly true that the outlined function will have to sign its return
	// address but this decision is made later, when the decision to outline
	// has already been made.
	// The same holds for the number of additional instructions we need: On
	// v8.3a RET can be replaced by RETAA/RETAB and no AUT instruction is
	// necessary. However, at this point we don't know if the outlined function
	// will have a RET instruction so we assume the worst.
	const TargetRegisterInfo &TRI = getRegisterInfo();
	// Performing a tail call may require extra checks when PAuth is enabled.
	// If PAuth is disabled, set it to zero for uniformity.
	unsigned NumBytesToCheckLRInTCEpilogue = 0;
	if (RepeatedSequenceLocs[0]
	.getMF()
	->getInfo<AArch64FunctionInfo>()
	->shouldSignReturnAddress(true)) {
	// One PAC and one AUT instructions
	NumBytesToCreateFrame += 8;

	// PAuth is enabled - set extra tail call cost, if any.
	auto LRCheckMethod = Subtarget.getAuthenticatedLRCheckMethod(
	*RepeatedSequenceLocs[0].getMF());
	NumBytesToCheckLRInTCEpilogue =
	AArch64PAuth::getCheckerSizeInBytes(LRCheckMethod);
	// Checking the authenticated LR value may significantly impact
	// SequenceSize, so account for it for more precise results.
	if (isTailCallReturnInst(RepeatedSequenceLocs[0].back()))
	SequenceSize += NumBytesToCheckLRInTCEpilogue;

	// We have to check if sp modifying instructions would get outlined.
	// If so we only allow outlining if sp is unchanged overall, so matching
	// sub and add instructions are okay to outline, all other sp modifications
	// are not
	auto hasIllegalSPModification = [&TRI](outliner::Candidate &C) {
	int SPValue = 0;
	for (auto &MI : C) {
	if (MI.modifiesRegister(AArch64::SP, &TRI)) {
	switch (MI.getOpcode()) {
	case AArch64::ADDXri:
	case AArch64::ADDWri:
	assert(MI.getNumOperands() == 4 && "Wrong number of operands");
	assert(MI.getOperand(2).isImm() &&
	"Expected operand to be immediate");
	assert(MI.getOperand(1).isReg() &&
	"Expected operand to be a register");
	// Check if the add just increments sp. If so, we search for
	// matching sub instructions that decrement sp. If not, the
	// modification is illegal
	if (MI.getOperand(1).getReg() == AArch64::SP)
	SPValue += MI.getOperand(2).getImm();
	else
	return true;
	break;
	case AArch64::SUBXri:
	case AArch64::SUBWri:
	assert(MI.getNumOperands() == 4 && "Wrong number of operands");
	assert(MI.getOperand(2).isImm() &&
	"Expected operand to be immediate");
	assert(MI.getOperand(1).isReg() &&
	"Expected operand to be a register");
	// Check if the sub just decrements sp. If so, we search for
	// matching add instructions that increment sp. If not, the
	// modification is illegal
	if (MI.getOperand(1).getReg() == AArch64::SP)
	SPValue -= MI.getOperand(2).getImm();
	else
	return true;
	break;
	default:
	return true;
	}
	}
	}
	if (SPValue)
	return true;
	return false;
	};
	// Remove candidates with illegal stack modifying instructions
	llvm::erase_if(RepeatedSequenceLocs, hasIllegalSPModification);

	// If the sequence doesn't have enough candidates left, then we're done.
	if (RepeatedSequenceLocs.size() < 2)
	return std::nullopt;
	}

	// Properties about candidate MBBs that hold for all of them.
	unsigned FlagsSetInAll = 0xF;

	// Compute liveness information for each candidate, and set FlagsSetInAll.
	for (outliner::Candidate &C : RepeatedSequenceLocs)
	FlagsSetInAll &= C.Flags;

	unsigned LastInstrOpcode = RepeatedSequenceLocs[0].back().getOpcode();

	// Helper lambda which sets call information for every candidate.
	auto SetCandidateCallInfo =
	[&RepeatedSequenceLocs](unsigned CallID, unsigned NumBytesForCall) {
	for (outliner::Candidate &C : RepeatedSequenceLocs)
	C.setCallInfo(CallID, NumBytesForCall);
	};

	unsigned FrameID = MachineOutlinerDefault;
	NumBytesToCreateFrame += 4;

	bool HasBTI = any_of(RepeatedSequenceLocs, [](outliner::Candidate &C) {
	return C.getMF()->getInfo<AArch64FunctionInfo>()->branchTargetEnforcement();
	});

	// We check to see if CFI Instructions are present, and if they are
	// we find the number of CFI Instructions in the candidates.
	unsigned CFICount = 0;
	for (auto &I : RepeatedSequenceLocs[0]) {
	if (I.isCFIInstruction())
	CFICount++;
	}

	// We compare the number of found CFI Instructions to the number of CFI
	// instructions in the parent function for each candidate. We must check this
	// since if we outline one of the CFI instructions in a function, we have to
	// outline them all for correctness. If we do not, the address offsets will be
	// incorrect between the two sections of the program.
	for (outliner::Candidate &C : RepeatedSequenceLocs) {
	std::vector<MCCFIInstruction> CFIInstructions =
	C.getMF()->getFrameInstructions();

	if (CFICount > 0 && CFICount != CFIInstructions.size())
	return std::nullopt;
	}

	// Returns true if an instructions is safe to fix up, false otherwise.
	auto IsSafeToFixup = [this, &TRI](MachineInstr &MI) {
	if (MI.isCall())
	return true;

	if (!MI.modifiesRegister(AArch64::SP, &TRI) &&
	!MI.readsRegister(AArch64::SP, &TRI))
	return true;

	// Any modification of SP will break our code to save/restore LR.
	// FIXME: We could handle some instructions which add a constant
	// offset to SP, with a bit more work.
	if (MI.modifiesRegister(AArch64::SP, &TRI))
	return false;

	// At this point, we have a stack instruction that we might need to
	// fix up. We'll handle it if it's a load or store.
	if (MI.mayLoadOrStore()) {
	const MachineOperand *Base; // Filled with the base operand of MI.
	int64_t Offset; // Filled with the offset of MI.
	bool OffsetIsScalable;

	// Does it allow us to offset the base operand and is the base the
	// register SP?
	if (!getMemOperandWithOffset(MI, Base, Offset, OffsetIsScalable, &TRI) \|\|
	!Base->isReg() \|\| Base->getReg() != AArch64::SP)
	return false;

	// Fixe-up code below assumes bytes.
	if (OffsetIsScalable)
	return false;

	// Find the minimum/maximum offset for this instruction and check
	// if fixing it up would be in range.
	int64_t MinOffset,
	MaxOffset; // Unscaled offsets for the instruction.
	// The scale to multiply the offsets by.
	TypeSize Scale(0U, false), DummyWidth(0U, false);
	getMemOpInfo(MI.getOpcode(), Scale, DummyWidth, MinOffset, MaxOffset);

	Offset += 16; // Update the offset to what it would be if we outlined.
	if (Offset < MinOffset * (int64_t)Scale.getFixedValue() \|\|
	Offset > MaxOffset * (int64_t)Scale.getFixedValue())
	return false;

	// It's in range, so we can outline it.
	return true;
	}

	// FIXME: Add handling for instructions like "add x0, sp, #8".

	// We can't fix it up, so don't outline it.
	return false;
	};

	// True if it's possible to fix up each stack instruction in this sequence.
	// Important for frames/call variants that modify the stack.
	bool AllStackInstrsSafe =
	llvm::all_of(RepeatedSequenceLocs[0], IsSafeToFixup);

	// If the last instruction in any candidate is a terminator, then we should
	// tail call all of the candidates.
	if (RepeatedSequenceLocs[0].back().isTerminator()) {
	FrameID = MachineOutlinerTailCall;
	NumBytesToCreateFrame = 0;
	unsigned NumBytesForCall = 4 + NumBytesToCheckLRInTCEpilogue;
	SetCandidateCallInfo(MachineOutlinerTailCall, NumBytesForCall);
	}

	else if (LastInstrOpcode == AArch64::BL \|\|
	((LastInstrOpcode == AArch64::BLR \|\|
	LastInstrOpcode == AArch64::BLRNoIP) &&
	!HasBTI)) {
	// FIXME: Do we need to check if the code after this uses the value of LR?
	FrameID = MachineOutlinerThunk;
	NumBytesToCreateFrame = NumBytesToCheckLRInTCEpilogue;
	SetCandidateCallInfo(MachineOutlinerThunk, 4);
	}

	else {
	// We need to decide how to emit calls + frames. We can always emit the same
	// frame if we don't need to save to the stack. If we have to save to the
	// stack, then we need a different frame.
	unsigned NumBytesNoStackCalls = 0;
	std::vector<outliner::Candidate> CandidatesWithoutStackFixups;

	// Check if we have to save LR.
	for (outliner::Candidate &C : RepeatedSequenceLocs) {
	bool LRAvailable =
	(C.Flags & MachineOutlinerMBBFlags::LRUnavailableSomewhere)
	? C.isAvailableAcrossAndOutOfSeq(AArch64::LR, TRI)
	: true;
	// If we have a noreturn caller, then we're going to be conservative and
	// say that we have to save LR. If we don't have a ret at the end of the
	// block, then we can't reason about liveness accurately.
	//
	// FIXME: We can probably do better than always disabling this in
	// noreturn functions by fixing up the liveness info.
	bool IsNoReturn =
	C.getMF()->getFunction().hasFnAttribute(Attribute::NoReturn);

	// Is LR available? If so, we don't need a save.
	if (LRAvailable && !IsNoReturn) {
	NumBytesNoStackCalls += 4;
	C.setCallInfo(MachineOutlinerNoLRSave, 4);
	CandidatesWithoutStackFixups.push_back(C);
	}

	// Is an unused register available? If so, we won't modify the stack, so
	// we can outline with the same frame type as those that don't save LR.
	else if (findRegisterToSaveLRTo(C)) {
	NumBytesNoStackCalls += 12;
	C.setCallInfo(MachineOutlinerRegSave, 12);
	CandidatesWithoutStackFixups.push_back(C);
	}

	// Is SP used in the sequence at all? If not, we don't have to modify
	// the stack, so we are guaranteed to get the same frame.
	else if (C.isAvailableInsideSeq(AArch64::SP, TRI)) {
	NumBytesNoStackCalls += 12;
	C.setCallInfo(MachineOutlinerDefault, 12);
	CandidatesWithoutStackFixups.push_back(C);
	}

	// If we outline this, we need to modify the stack. Pretend we don't
	// outline this by saving all of its bytes.
	else {
	NumBytesNoStackCalls += SequenceSize;
	}
	}

	// If there are no places where we have to save LR, then note that we
	// don't have to update the stack. Otherwise, give every candidate the
	// default call type, as long as it's safe to do so.
	if (!AllStackInstrsSafe \|\|
	NumBytesNoStackCalls <= RepeatedSequenceLocs.size() * 12) {
	RepeatedSequenceLocs = CandidatesWithoutStackFixups;
	FrameID = MachineOutlinerNoLRSave;
	if (RepeatedSequenceLocs.size() < 2)
	return std::nullopt;
	} else {
	SetCandidateCallInfo(MachineOutlinerDefault, 12);

	// Bugzilla ID: 46767
	// TODO: Check if fixing up the stack more than once is safe so we can
	// outline these.
	//
	// An outline resulting in a caller that requires stack fixups at the
	// callsite to a callee that also requires stack fixups can happen when
	// there are no available registers at the candidate callsite for a
	// candidate that itself also has calls.
	//
	// In other words if function_containing_sequence in the following pseudo
	// assembly requires that we save LR at the point of the call, but there
	// are no available registers: in this case we save using SP and as a
	// result the SP offsets requires stack fixups by multiples of 16.
	//
	// function_containing_sequence:
	// ...
	// save LR to SP <- Requires stack instr fixups in OUTLINED_FUNCTION_N
	// call OUTLINED_FUNCTION_N
	// restore LR from SP
	// ...
	//
	// OUTLINED_FUNCTION_N:
	// save LR to SP <- Requires stack instr fixups in OUTLINED_FUNCTION_N
	// ...
	// bl foo
	// restore LR from SP
	// ret
	//
	// Because the code to handle more than one stack fixup does not
	// currently have the proper checks for legality, these cases will assert
	// in the AArch64 MachineOutliner. This is because the code to do this
	// needs more hardening, testing, better checks that generated code is
	// legal, etc and because it is only verified to handle a single pass of
	// stack fixup.
	//
	// The assert happens in AArch64InstrInfo::buildOutlinedFrame to catch
	// these cases until they are known to be handled. Bugzilla 46767 is
	// referenced in comments at the assert site.
	//
	// To avoid asserting (or generating non-legal code on noassert builds)
	// we remove all candidates which would need more than one stack fixup by
	// pruning the cases where the candidate has calls while also having no
	// available LR and having no available general purpose registers to copy
	// LR to (ie one extra stack save/restore).
	//
	if (FlagsSetInAll & MachineOutlinerMBBFlags::HasCalls) {
	erase_if(RepeatedSequenceLocs, [this, &TRI](outliner::Candidate &C) {
	auto IsCall = [](const MachineInstr &MI) { return MI.isCall(); };
	return (llvm::any_of(C, IsCall)) &&
	(!C.isAvailableAcrossAndOutOfSeq(AArch64::LR, TRI) \|\|
	!findRegisterToSaveLRTo(C));
	});
	}
	}

	// If we dropped all of the candidates, bail out here.
	if (RepeatedSequenceLocs.size() < 2) {
	RepeatedSequenceLocs.clear();
	return std::nullopt;
	}
	}

	// Does every candidate's MBB contain a call? If so, then we might have a call
	// in the range.
	if (FlagsSetInAll & MachineOutlinerMBBFlags::HasCalls) {
	// Check if the range contains a call. These require a save + restore of the
	// link register.
	outliner::Candidate &FirstCand = RepeatedSequenceLocs[0];
	bool ModStackToSaveLR = false;
	if (std::any_of(FirstCand.begin(), std::prev(FirstCand.end()),
	[](const MachineInstr &MI) { return MI.isCall(); }))
	ModStackToSaveLR = true;

	// Handle the last instruction separately. If this is a tail call, then the
	// last instruction is a call. We don't want to save + restore in this case.
	// However, it could be possible that the last instruction is a call without
	// it being valid to tail call this sequence. We should consider this as
	// well.
	else if (FrameID != MachineOutlinerThunk &&
	FrameID != MachineOutlinerTailCall && FirstCand.back().isCall())
	ModStackToSaveLR = true;

	if (ModStackToSaveLR) {
	// We can't fix up the stack. Bail out.
	if (!AllStackInstrsSafe) {
	RepeatedSequenceLocs.clear();
	return std::nullopt;
	}

	// Save + restore LR.
	NumBytesToCreateFrame += 8;
	}
	}

	// If we have CFI instructions, we can only outline if the outlined section
	// can be a tail call
	if (FrameID != MachineOutlinerTailCall && CFICount > 0)
	return std::nullopt;

	return outliner::OutlinedFunction(RepeatedSequenceLocs, SequenceSize,
	NumBytesToCreateFrame, FrameID);
	}

	void AArch64InstrInfo::mergeOutliningCandidateAttributes(
	Function &F, std::vector<outliner::Candidate> &Candidates) const {
	// If a bunch of candidates reach this point they must agree on their return
	// address signing. It is therefore enough to just consider the signing
	// behaviour of one of them
	const auto &CFn = Candidates.front().getMF()->getFunction();

	if (CFn.hasFnAttribute("ptrauth-returns"))
	F.addFnAttr(CFn.getFnAttribute("ptrauth-returns"));
	if (CFn.hasFnAttribute("ptrauth-auth-traps"))
	F.addFnAttr(CFn.getFnAttribute("ptrauth-auth-traps"));
	// Since all candidates belong to the same module, just copy the
	// function-level attributes of an arbitrary function.
	if (CFn.hasFnAttribute("sign-return-address"))
	F.addFnAttr(CFn.getFnAttribute("sign-return-address"));
	if (CFn.hasFnAttribute("sign-return-address-key"))
	F.addFnAttr(CFn.getFnAttribute("sign-return-address-key"));

	AArch64GenInstrInfo::mergeOutliningCandidateAttributes(F, Candidates);
	}

	bool AArch64InstrInfo::isFunctionSafeToOutlineFrom(
	MachineFunction &MF, bool OutlineFromLinkOnceODRs) const {
	const Function &F = MF.getFunction();

	// Can F be deduplicated by the linker? If it can, don't outline from it.
	if (!OutlineFromLinkOnceODRs && F.hasLinkOnceODRLinkage())
	return false;

	// Don't outline from functions with section markings; the program could
	// expect that all the code is in the named section.
	// FIXME: Allow outlining from multiple functions with the same section
	// marking.
	if (F.hasSection())
	return false;

	// Outlining from functions with redzones is unsafe since the outliner may
	// modify the stack. Check if hasRedZone is true or unknown; if yes, don't
	// outline from it.
	AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
	if (!AFI \|\| AFI->hasRedZone().value_or(true))
	return false;

	// FIXME: Determine whether it is safe to outline from functions which contain
	// streaming-mode changes. We may need to ensure any smstart/smstop pairs are
	// outlined together and ensure it is safe to outline with async unwind info,
	// required for saving & restoring VG around calls.
	if (AFI->hasStreamingModeChanges())
	return false;

	// FIXME: Teach the outliner to generate/handle Windows unwind info.
	if (MF.getTarget().getMCAsmInfo()->usesWindowsCFI())
	return false;

	// It's safe to outline from MF.
	return true;
	}

	SmallVector<std::pair<MachineBasicBlock::iterator, MachineBasicBlock::iterator>>
	AArch64InstrInfo::getOutlinableRanges(MachineBasicBlock &MBB,
	unsigned &Flags) const {
	assert(MBB.getParent()->getRegInfo().tracksLiveness() &&
	"Must track liveness!");
	SmallVector<
	std::pair<MachineBasicBlock::iterator, MachineBasicBlock::iterator>>
	Ranges;
	// According to the AArch64 Procedure Call Standard, the following are
	// undefined on entry/exit from a function call:
	//
	// * Registers x16, x17, (and thus w16, w17)
	// * Condition codes (and thus the NZCV register)
	//
	// If any of these registers are used inside or live across an outlined
	// function, then they may be modified later, either by the compiler or
	// some other tool (like the linker).
	//
	// To avoid outlining in these situations, partition each block into ranges
	// where these registers are dead. We will only outline from those ranges.
	LiveRegUnits LRU(getRegisterInfo());
	auto AreAllUnsafeRegsDead = [&LRU]() {
	return LRU.available(AArch64::W16) && LRU.available(AArch64::W17) &&
	LRU.available(AArch64::NZCV);
	};

	// We need to know if LR is live across an outlining boundary later on in
	// order to decide how we'll create the outlined call, frame, etc.
	//
	// It's pretty expensive to check this for every candidate within a block.
	// That's some potentially n^2 behaviour, since in the worst case, we'd need
	// to compute liveness from the end of the block for O(n) candidates within
	// the block.
	//
	// So, to improve the average case, let's keep track of liveness from the end
	// of the block to the beginning of every outlinable range. If we know that
	// LR is available in every range we could outline from, then we know that
	// we don't need to check liveness for any candidate within that range.
	bool LRAvailableEverywhere = true;
	// Compute liveness bottom-up.
	LRU.addLiveOuts(MBB);
	// Update flags that require info about the entire MBB.
	auto UpdateWholeMBBFlags = [&Flags](const MachineInstr &MI) {
	if (MI.isCall() && !MI.isTerminator())
	Flags \|= MachineOutlinerMBBFlags::HasCalls;
	};
	// Range: [RangeBegin, RangeEnd)
	MachineBasicBlock::instr_iterator RangeBegin, RangeEnd;
	unsigned RangeLen;
	auto CreateNewRangeStartingAt =
	[&RangeBegin, &RangeEnd,
	&RangeLen](MachineBasicBlock::instr_iterator NewBegin) {
	RangeBegin = NewBegin;
	RangeEnd = std::next(RangeBegin);
	RangeLen = 0;
	};
	auto SaveRangeIfNonEmpty = [&RangeLen, &Ranges, &RangeBegin, &RangeEnd]() {
	// At least one unsafe register is not dead. We do not want to outline at
	// this point. If it is long enough to outline from, save the range
	// [RangeBegin, RangeEnd).
	if (RangeLen > 1)
	Ranges.push_back(std::make_pair(RangeBegin, RangeEnd));
	};
	// Find the first point where all unsafe registers are dead.
	// FIND: <safe instr> <-- end of first potential range
	// SKIP: <unsafe def>
	// SKIP: ... everything between ...
	// SKIP: <unsafe use>
	auto FirstPossibleEndPt = MBB.instr_rbegin();
	for (; FirstPossibleEndPt != MBB.instr_rend(); ++FirstPossibleEndPt) {
	LRU.stepBackward(*FirstPossibleEndPt);
	// Update flags that impact how we outline across the entire block,
	// regardless of safety.
	UpdateWholeMBBFlags(*FirstPossibleEndPt);
	if (AreAllUnsafeRegsDead())
	break;
	}
	// If we exhausted the entire block, we have no safe ranges to outline.
	if (FirstPossibleEndPt == MBB.instr_rend())
	return Ranges;
	// Current range.
	CreateNewRangeStartingAt(FirstPossibleEndPt->getIterator());
	// StartPt points to the first place where all unsafe registers
	// are dead (if there is any such point). Begin partitioning the MBB into
	// ranges.
	for (auto &MI : make_range(FirstPossibleEndPt, MBB.instr_rend())) {
	LRU.stepBackward(MI);
	UpdateWholeMBBFlags(MI);
	if (!AreAllUnsafeRegsDead()) {
	SaveRangeIfNonEmpty();
	CreateNewRangeStartingAt(MI.getIterator());
	continue;
	}
	LRAvailableEverywhere &= LRU.available(AArch64::LR);
	RangeBegin = MI.getIterator();
	++RangeLen;
	}
	// Above loop misses the last (or only) range. If we are still safe, then
	// let's save the range.
	if (AreAllUnsafeRegsDead())
	SaveRangeIfNonEmpty();
	if (Ranges.empty())
	return Ranges;
	// We found the ranges bottom-up. Mapping expects the top-down. Reverse
	// the order.
	std::reverse(Ranges.begin(), Ranges.end());
	// If there is at least one outlinable range where LR is unavailable
	// somewhere, remember that.
	if (!LRAvailableEverywhere)
	Flags \|= MachineOutlinerMBBFlags::LRUnavailableSomewhere;
	return Ranges;
	}

	outliner::InstrType
	AArch64InstrInfo::getOutliningTypeImpl(MachineBasicBlock::iterator &MIT,
	unsigned Flags) const {
	MachineInstr &MI = *MIT;
	MachineBasicBlock *MBB = MI.getParent();
	MachineFunction *MF = MBB->getParent();
	AArch64FunctionInfo *FuncInfo = MF->getInfo<AArch64FunctionInfo>();

	// Don't outline anything used for return address signing. The outlined
	// function will get signed later if needed
	switch (MI.getOpcode()) {
	case AArch64::PACM:
	case AArch64::PACIASP:
	case AArch64::PACIBSP:
	case AArch64::PACIASPPC:
	case AArch64::PACIBSPPC:
	case AArch64::AUTIASP:
	case AArch64::AUTIBSP:
	case AArch64::AUTIASPPCi:
	case AArch64::AUTIASPPCr:
	case AArch64::AUTIBSPPCi:
	case AArch64::AUTIBSPPCr:
	case AArch64::RETAA:
	case AArch64::RETAB:
	case AArch64::RETAASPPCi:
	case AArch64::RETAASPPCr:
	case AArch64::RETABSPPCi:
	case AArch64::RETABSPPCr:
	case AArch64::EMITBKEY:
	case AArch64::PAUTH_PROLOGUE:
	case AArch64::PAUTH_EPILOGUE:
	return outliner::InstrType::Illegal;
	}

	// Don't outline LOHs.
	if (FuncInfo->getLOHRelated().count(&MI))
	return outliner::InstrType::Illegal;

	// We can only outline these if we will tail call the outlined function, or
	// fix up the CFI offsets. Currently, CFI instructions are outlined only if
	// in a tail call.
	//
	// FIXME: If the proper fixups for the offset are implemented, this should be
	// possible.
	if (MI.isCFIInstruction())
	return outliner::InstrType::Legal;

	// Is this a terminator for a basic block?
	if (MI.isTerminator())
	// TargetInstrInfo::getOutliningType has already filtered out anything
	// that would break this, so we can allow it here.
	return outliner::InstrType::Legal;

	// Make sure none of the operands are un-outlinable.
	for (const MachineOperand &MOP : MI.operands()) {
	// A check preventing CFI indices was here before, but only CFI
	// instructions should have those.
	assert(!MOP.isCFIIndex());

	// If it uses LR or W30 explicitly, then don't touch it.
	if (MOP.isReg() && !MOP.isImplicit() &&
	(MOP.getReg() == AArch64::LR \|\| MOP.getReg() == AArch64::W30))
	return outliner::InstrType::Illegal;
	}

	// Special cases for instructions that can always be outlined, but will fail
	// the later tests. e.g, ADRPs, which are PC-relative use LR, but can always
	// be outlined because they don't require a specific value to be in LR.
	if (MI.getOpcode() == AArch64::ADRP)
	return outliner::InstrType::Legal;

	// If MI is a call we might be able to outline it. We don't want to outline
	// any calls that rely on the position of items on the stack. When we outline
	// something containing a call, we have to emit a save and restore of LR in
	// the outlined function. Currently, this always happens by saving LR to the
	// stack. Thus, if we outline, say, half the parameters for a function call
	// plus the call, then we'll break the callee's expectations for the layout
	// of the stack.
	//
	// FIXME: Allow calls to functions which construct a stack frame, as long
	// as they don't access arguments on the stack.
	// FIXME: Figure out some way to analyze functions defined in other modules.
	// We should be able to compute the memory usage based on the IR calling
	// convention, even if we can't see the definition.
	if (MI.isCall()) {
	// Get the function associated with the call. Look at each operand and find
	// the one that represents the callee and get its name.
	const Function *Callee = nullptr;
	for (const MachineOperand &MOP : MI.operands()) {
	if (MOP.isGlobal()) {
	Callee = dyn_cast<Function>(MOP.getGlobal());
	break;
	}
	}

	// Never outline calls to mcount. There isn't any rule that would require
	// this, but the Linux kernel's "ftrace" feature depends on it.
	if (Callee && Callee->getName() == "\01_mcount")
	return outliner::InstrType::Illegal;

	// If we don't know anything about the callee, assume it depends on the
	// stack layout of the caller. In that case, it's only legal to outline
	// as a tail-call. Explicitly list the call instructions we know about so we
	// don't get unexpected results with call pseudo-instructions.
	auto UnknownCallOutlineType = outliner::InstrType::Illegal;
	if (MI.getOpcode() == AArch64::BLR \|\|
	MI.getOpcode() == AArch64::BLRNoIP \|\| MI.getOpcode() == AArch64::BL)
	UnknownCallOutlineType = outliner::InstrType::LegalTerminator;

	if (!Callee)
	return UnknownCallOutlineType;

	// We have a function we have information about. Check it if it's something
	// can safely outline.
	MachineFunction CalleeMF = MF->getMMI().getMachineFunction(Callee);

	// We don't know what's going on with the callee at all. Don't touch it.
	if (!CalleeMF)
	return UnknownCallOutlineType;

	// Check if we know anything about the callee saves on the function. If we
	// don't, then don't touch it, since that implies that we haven't
	// computed anything about its stack frame yet.
	MachineFrameInfo &MFI = CalleeMF->getFrameInfo();
	if (!MFI.isCalleeSavedInfoValid() \|\| MFI.getStackSize() > 0 \|\|
	MFI.getNumObjects() > 0)
	return UnknownCallOutlineType;

	// At this point, we can say that CalleeMF ought to not pass anything on the
	// stack. Therefore, we can outline it.
	return outliner::InstrType::Legal;
	}

	// Don't touch the link register or W30.
	if (MI.readsRegister(AArch64::W30, &getRegisterInfo()) \|\|
	MI.modifiesRegister(AArch64::W30, &getRegisterInfo()))
	return outliner::InstrType::Illegal;

	// Don't outline BTI instructions, because that will prevent the outlining
	// site from being indirectly callable.
	if (hasBTISemantics(MI))
	return outliner::InstrType::Illegal;

	return outliner::InstrType::Legal;
	}

	void AArch64InstrInfo::fixupPostOutline(MachineBasicBlock &MBB) const {
	for (MachineInstr &MI : MBB) {
	const MachineOperand *Base;
	TypeSize Width(0, false);
	int64_t Offset;
	bool OffsetIsScalable;

	// Is this a load or store with an immediate offset with SP as the base?
	if (!MI.mayLoadOrStore() \|\|
	!getMemOperandWithOffsetWidth(MI, Base, Offset, OffsetIsScalable, Width,
	&RI) \|\|
	(Base->isReg() && Base->getReg() != AArch64::SP))
	continue;

	// It is, so we have to fix it up.
	TypeSize Scale(0U, false);
	int64_t Dummy1, Dummy2;

	MachineOperand &StackOffsetOperand = getMemOpBaseRegImmOfsOffsetOperand(MI);
	assert(StackOffsetOperand.isImm() && "Stack offset wasn't immediate!");
	getMemOpInfo(MI.getOpcode(), Scale, Width, Dummy1, Dummy2);
	assert(Scale != 0 && "Unexpected opcode!");
	assert(!OffsetIsScalable && "Expected offset to be a byte offset");

	// We've pushed the return address to the stack, so add 16 to the offset.
	// This is safe, since we already checked if it would overflow when we
	// checked if this instruction was legal to outline.
	int64_t NewImm = (Offset + 16) / (int64_t)Scale.getFixedValue();
	StackOffsetOperand.setImm(NewImm);
	}
	}

	static void signOutlinedFunction(MachineFunction &MF, MachineBasicBlock &MBB,
	const AArch64InstrInfo *TII,
	bool ShouldSignReturnAddr) {
	if (!ShouldSignReturnAddr)
	return;

	BuildMI(MBB, MBB.begin(), DebugLoc(), TII->get(AArch64::PAUTH_PROLOGUE))
	.setMIFlag(MachineInstr::FrameSetup);
	BuildMI(MBB, MBB.getFirstInstrTerminator(), DebugLoc(),
	TII->get(AArch64::PAUTH_EPILOGUE))
	.setMIFlag(MachineInstr::FrameDestroy);
	}

	void AArch64InstrInfo::buildOutlinedFrame(
	MachineBasicBlock &MBB, MachineFunction &MF,
	const outliner::OutlinedFunction &OF) const {

	AArch64FunctionInfo *FI = MF.getInfo<AArch64FunctionInfo>();

	if (OF.FrameConstructionID == MachineOutlinerTailCall)
	FI->setOutliningStyle("Tail Call");
	else if (OF.FrameConstructionID == MachineOutlinerThunk) {
	// For thunk outlining, rewrite the last instruction from a call to a
	// tail-call.
	MachineInstr Call = &--MBB.instr_end();
	unsigned TailOpcode;
	if (Call->getOpcode() == AArch64::BL) {
	TailOpcode = AArch64::TCRETURNdi;
	} else {
	assert(Call->getOpcode() == AArch64::BLR \|\|
	Call->getOpcode() == AArch64::BLRNoIP);
	TailOpcode = AArch64::TCRETURNriALL;
	}
	MachineInstr *TC = BuildMI(MF, DebugLoc(), get(TailOpcode))
	.add(Call->getOperand(0))
	.addImm(0);
	MBB.insert(MBB.end(), TC);
	Call->eraseFromParent();

	FI->setOutliningStyle("Thunk");
	}

	bool IsLeafFunction = true;

	// Is there a call in the outlined range?
	auto IsNonTailCall = [](const MachineInstr &MI) {
	return MI.isCall() && !MI.isReturn();
	};

	if (llvm::any_of(MBB.instrs(), IsNonTailCall)) {
	// Fix up the instructions in the range, since we're going to modify the
	// stack.

	// Bugzilla ID: 46767
	// TODO: Check if fixing up twice is safe so we can outline these.
	assert(OF.FrameConstructionID != MachineOutlinerDefault &&
	"Can only fix up stack references once");
	fixupPostOutline(MBB);

	IsLeafFunction = false;

	// LR has to be a live in so that we can save it.
	if (!MBB.isLiveIn(AArch64::LR))
	MBB.addLiveIn(AArch64::LR);

	MachineBasicBlock::iterator It = MBB.begin();
	MachineBasicBlock::iterator Et = MBB.end();

	if (OF.FrameConstructionID == MachineOutlinerTailCall \|\|
	OF.FrameConstructionID == MachineOutlinerThunk)
	Et = std::prev(MBB.end());

	// Insert a save before the outlined region
	MachineInstr *STRXpre = BuildMI(MF, DebugLoc(), get(AArch64::STRXpre))
	.addReg(AArch64::SP, RegState::Define)
	.addReg(AArch64::LR)
	.addReg(AArch64::SP)
	.addImm(-16);
	It = MBB.insert(It, STRXpre);

	if (MF.getInfo<AArch64FunctionInfo>()->needsDwarfUnwindInfo(MF)) {
	const TargetSubtargetInfo &STI = MF.getSubtarget();
	const MCRegisterInfo *MRI = STI.getRegisterInfo();
	unsigned DwarfReg = MRI->getDwarfRegNum(AArch64::LR, true);

	// Add a CFI saying the stack was moved 16 B down.
	int64_t StackPosEntry =
	MF.addFrameInst(MCCFIInstruction::cfiDefCfaOffset(nullptr, 16));
	BuildMI(MBB, It, DebugLoc(), get(AArch64::CFI_INSTRUCTION))
	.addCFIIndex(StackPosEntry)
	.setMIFlags(MachineInstr::FrameSetup);

	// Add a CFI saying that the LR that we want to find is now 16 B higher
	// than before.
	int64_t LRPosEntry = MF.addFrameInst(
	MCCFIInstruction::createOffset(nullptr, DwarfReg, -16));
	BuildMI(MBB, It, DebugLoc(), get(AArch64::CFI_INSTRUCTION))
	.addCFIIndex(LRPosEntry)
	.setMIFlags(MachineInstr::FrameSetup);
	}

	// Insert a restore before the terminator for the function.
	MachineInstr *LDRXpost = BuildMI(MF, DebugLoc(), get(AArch64::LDRXpost))
	.addReg(AArch64::SP, RegState::Define)
	.addReg(AArch64::LR, RegState::Define)
	.addReg(AArch64::SP)
	.addImm(16);
	Et = MBB.insert(Et, LDRXpost);
	}

	bool ShouldSignReturnAddr = FI->shouldSignReturnAddress(!IsLeafFunction);

	// If this is a tail call outlined function, then there's already a return.
	if (OF.FrameConstructionID == MachineOutlinerTailCall \|\|
	OF.FrameConstructionID == MachineOutlinerThunk) {
	signOutlinedFunction(MF, MBB, this, ShouldSignReturnAddr);
	return;
	}

	// It's not a tail call, so we have to insert the return ourselves.

	// LR has to be a live in so that we can return to it.
	if (!MBB.isLiveIn(AArch64::LR))
	MBB.addLiveIn(AArch64::LR);

	MachineInstr *ret = BuildMI(MF, DebugLoc(), get(AArch64::RET))
	.addReg(AArch64::LR);
	MBB.insert(MBB.end(), ret);

	signOutlinedFunction(MF, MBB, this, ShouldSignReturnAddr);

	FI->setOutliningStyle("Function");

	// Did we have to modify the stack by saving the link register?
	if (OF.FrameConstructionID != MachineOutlinerDefault)
	return;

	// We modified the stack.
	// Walk over the basic block and fix up all the stack accesses.
	fixupPostOutline(MBB);
	}

	MachineBasicBlock::iterator AArch64InstrInfo::insertOutlinedCall(
	Module &M, MachineBasicBlock &MBB, MachineBasicBlock::iterator &It,
	MachineFunction &MF, outliner::Candidate &C) const {

	// Are we tail calling?
	if (C.CallConstructionID == MachineOutlinerTailCall) {
	// If yes, then we can just branch to the label.
	It = MBB.insert(It, BuildMI(MF, DebugLoc(), get(AArch64::TCRETURNdi))
	.addGlobalAddress(M.getNamedValue(MF.getName()))
	.addImm(0));
	return It;
	}

	// Are we saving the link register?
	if (C.CallConstructionID == MachineOutlinerNoLRSave \|\|
	C.CallConstructionID == MachineOutlinerThunk) {
	// No, so just insert the call.
	It = MBB.insert(It, BuildMI(MF, DebugLoc(), get(AArch64::BL))
	.addGlobalAddress(M.getNamedValue(MF.getName())));
	return It;
	}

	// We want to return the spot where we inserted the call.
	MachineBasicBlock::iterator CallPt;

	// Instructions for saving and restoring LR around the call instruction we're
	// going to insert.
	MachineInstr *Save;
	MachineInstr *Restore;
	// Can we save to a register?
	if (C.CallConstructionID == MachineOutlinerRegSave) {
	// FIXME: This logic should be sunk into a target-specific interface so that
	// we don't have to recompute the register.
	Register Reg = findRegisterToSaveLRTo(C);
	assert(Reg && "No callee-saved register available?");

	// LR has to be a live in so that we can save it.
	if (!MBB.isLiveIn(AArch64::LR))
	MBB.addLiveIn(AArch64::LR);

	// Save and restore LR from Reg.
	Save = BuildMI(MF, DebugLoc(), get(AArch64::ORRXrs), Reg)
	.addReg(AArch64::XZR)
	.addReg(AArch64::LR)
	.addImm(0);
	Restore = BuildMI(MF, DebugLoc(), get(AArch64::ORRXrs), AArch64::LR)
	.addReg(AArch64::XZR)
	.addReg(Reg)
	.addImm(0);
	} else {
	// We have the default case. Save and restore from SP.
	Save = BuildMI(MF, DebugLoc(), get(AArch64::STRXpre))
	.addReg(AArch64::SP, RegState::Define)
	.addReg(AArch64::LR)
	.addReg(AArch64::SP)
	.addImm(-16);
	Restore = BuildMI(MF, DebugLoc(), get(AArch64::LDRXpost))
	.addReg(AArch64::SP, RegState::Define)
	.addReg(AArch64::LR, RegState::Define)
	.addReg(AArch64::SP)
	.addImm(16);
	}

	It = MBB.insert(It, Save);
	It++;

	// Insert the call.
	It = MBB.insert(It, BuildMI(MF, DebugLoc(), get(AArch64::BL))
	.addGlobalAddress(M.getNamedValue(MF.getName())));
	CallPt = It;
	It++;

	It = MBB.insert(It, Restore);
	return CallPt;
	}

	bool AArch64InstrInfo::shouldOutlineFromFunctionByDefault(
	MachineFunction &MF) const {
	return MF.getFunction().hasMinSize();
	}

	void AArch64InstrInfo::buildClearRegister(Register Reg, MachineBasicBlock &MBB,
	MachineBasicBlock::iterator Iter,
	DebugLoc &DL,
	bool AllowSideEffects) const {
	const MachineFunction &MF = *MBB.getParent();
	const AArch64Subtarget &STI = MF.getSubtarget<AArch64Subtarget>();
	const AArch64RegisterInfo &TRI = *STI.getRegisterInfo();

	if (TRI.isGeneralPurposeRegister(MF, Reg)) {
	BuildMI(MBB, Iter, DL, get(AArch64::MOVZXi), Reg).addImm(0).addImm(0);
	} else if (STI.hasSVE()) {
	BuildMI(MBB, Iter, DL, get(AArch64::DUP_ZI_D), Reg)
	.addImm(0)
	.addImm(0);
	} else {
	BuildMI(MBB, Iter, DL, get(AArch64::MOVIv2d_ns), Reg)
	.addImm(0);
	}
	}

	std::optional<DestSourcePair>
	AArch64InstrInfo::isCopyInstrImpl(const MachineInstr &MI) const {

	// AArch64::ORRWrs and AArch64::ORRXrs with WZR/XZR reg
	// and zero immediate operands used as an alias for mov instruction.
	if (MI.getOpcode() == AArch64::ORRWrs &&
	MI.getOperand(1).getReg() == AArch64::WZR &&
	MI.getOperand(3).getImm() == 0x0 &&
	// Check that the w->w move is not a zero-extending w->x mov.
	(!MI.getOperand(0).getReg().isVirtual() \|\|
	MI.getOperand(0).getSubReg() == 0) &&
	(!MI.getOperand(0).getReg().isPhysical() \|\|
	MI.findRegisterDefOperandIdx(MI.getOperand(0).getReg() - AArch64::W0 +
	AArch64::X0,
	/TRI=/nullptr) == -1))
	return DestSourcePair{MI.getOperand(0), MI.getOperand(2)};

	if (MI.getOpcode() == AArch64::ORRXrs &&
	MI.getOperand(1).getReg() == AArch64::XZR &&
	MI.getOperand(3).getImm() == 0x0)
	return DestSourcePair{MI.getOperand(0), MI.getOperand(2)};

	return std::nullopt;
	}

	std::optional<DestSourcePair>
	AArch64InstrInfo::isCopyLikeInstrImpl(const MachineInstr &MI) const {
	if (MI.getOpcode() == AArch64::ORRWrs &&
	MI.getOperand(1).getReg() == AArch64::WZR &&
	MI.getOperand(3).getImm() == 0x0)
	return DestSourcePair{MI.getOperand(0), MI.getOperand(2)};
	return std::nullopt;
	}

	std::optional<RegImmPair>
	AArch64InstrInfo::isAddImmediate(const MachineInstr &MI, Register Reg) const {
	int Sign = 1;
	int64_t Offset = 0;

	// TODO: Handle cases where Reg is a super- or sub-register of the
	// destination register.
	const MachineOperand &Op0 = MI.getOperand(0);
	if (!Op0.isReg() \|\| Reg != Op0.getReg())
	return std::nullopt;

	switch (MI.getOpcode()) {
	default:
	return std::nullopt;
	case AArch64::SUBWri:
	case AArch64::SUBXri:
	case AArch64::SUBSWri:
	case AArch64::SUBSXri:
	Sign *= -1;
	[[fallthrough]];
	case AArch64::ADDSWri:
	case AArch64::ADDSXri:
	case AArch64::ADDWri:
	case AArch64::ADDXri: {
	// TODO: Third operand can be global address (usually some string).
	if (!MI.getOperand(0).isReg() \|\| !MI.getOperand(1).isReg() \|\|
	!MI.getOperand(2).isImm())
	return std::nullopt;
	int Shift = MI.getOperand(3).getImm();
	assert((Shift == 0 \|\| Shift == 12) && "Shift can be either 0 or 12");
	Offset = Sign * (MI.getOperand(2).getImm() << Shift);
	}
	}
	return RegImmPair{MI.getOperand(1).getReg(), Offset};
	}

	/// If the given ORR instruction is a copy, and \p DescribedReg overlaps with
	/// the destination register then, if possible, describe the value in terms of
	/// the source register.
	static std::optional<ParamLoadedValue>
	describeORRLoadedValue(const MachineInstr &MI, Register DescribedReg,
	const TargetInstrInfo *TII,
	const TargetRegisterInfo *TRI) {
	auto DestSrc = TII->isCopyLikeInstr(MI);
	if (!DestSrc)
	return std::nullopt;

	Register DestReg = DestSrc->Destination->getReg();
	Register SrcReg = DestSrc->Source->getReg();

	auto Expr = DIExpression::get(MI.getMF()->getFunction().getContext(), {});

	// If the described register is the destination, just return the source.
	if (DestReg == DescribedReg)
	return ParamLoadedValue(MachineOperand::CreateReg(SrcReg, false), Expr);

	// ORRWrs zero-extends to 64-bits, so we need to consider such cases.
	if (MI.getOpcode() == AArch64::ORRWrs &&
	TRI->isSuperRegister(DestReg, DescribedReg))
	return ParamLoadedValue(MachineOperand::CreateReg(SrcReg, false), Expr);

	// We may need to describe the lower part of a ORRXrs move.
	if (MI.getOpcode() == AArch64::ORRXrs &&
	TRI->isSubRegister(DestReg, DescribedReg)) {
	Register SrcSubReg = TRI->getSubReg(SrcReg, AArch64::sub_32);
	return ParamLoadedValue(MachineOperand::CreateReg(SrcSubReg, false), Expr);
	}

	assert(!TRI->isSuperOrSubRegisterEq(DestReg, DescribedReg) &&
	"Unhandled ORR[XW]rs copy case");

	return std::nullopt;
	}

	bool AArch64InstrInfo::isFunctionSafeToSplit(const MachineFunction &MF) const {
	// Functions cannot be split to different sections on AArch64 if they have
	// a red zone. This is because relaxing a cross-section branch may require
	// incrementing the stack pointer to spill a register, which would overwrite
	// the red zone.
	if (MF.getInfo<AArch64FunctionInfo>()->hasRedZone().value_or(true))
	return false;

	return TargetInstrInfo::isFunctionSafeToSplit(MF);
	}

	bool AArch64InstrInfo::isMBBSafeToSplitToCold(
	const MachineBasicBlock &MBB) const {
	// Asm Goto blocks can contain conditional branches to goto labels, which can
	// get moved out of range of the branch instruction.
	auto isAsmGoto = [](const MachineInstr &MI) {
	return MI.getOpcode() == AArch64::INLINEASM_BR;
	};
	if (llvm::any_of(MBB, isAsmGoto) \|\| MBB.isInlineAsmBrIndirectTarget())
	return false;

	// Because jump tables are label-relative instead of table-relative, they all
	// must be in the same section or relocation fixup handling will fail.

	// Check if MBB is a jump table target
	const MachineJumpTableInfo *MJTI = MBB.getParent()->getJumpTableInfo();
	auto containsMBB = [&MBB](const MachineJumpTableEntry &JTE) {
	return llvm::is_contained(JTE.MBBs, &MBB);
	};
	if (MJTI != nullptr && llvm::any_of(MJTI->getJumpTables(), containsMBB))
	return false;

	// Check if MBB contains a jump table lookup
	for (const MachineInstr &MI : MBB) {
	switch (MI.getOpcode()) {
	case TargetOpcode::G_BRJT:
	case AArch64::JumpTableDest32:
	case AArch64::JumpTableDest16:
	case AArch64::JumpTableDest8:
	return false;
	default:
	continue;
	}
	}

	// MBB isn't a special case, so it's safe to be split to the cold section.
	return true;
	}

	std::optional<ParamLoadedValue>
	AArch64InstrInfo::describeLoadedValue(const MachineInstr &MI,
	Register Reg) const {
	const MachineFunction *MF = MI.getMF();
	const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
	switch (MI.getOpcode()) {
	case AArch64::MOVZWi:
	case AArch64::MOVZXi: {
	// MOVZWi may be used for producing zero-extended 32-bit immediates in
	// 64-bit parameters, so we need to consider super-registers.
	if (!TRI->isSuperRegisterEq(MI.getOperand(0).getReg(), Reg))
	return std::nullopt;

	if (!MI.getOperand(1).isImm())
	return std::nullopt;
	int64_t Immediate = MI.getOperand(1).getImm();
	int Shift = MI.getOperand(2).getImm();
	return ParamLoadedValue(MachineOperand::CreateImm(Immediate << Shift),
	nullptr);
	}
	case AArch64::ORRWrs:
	case AArch64::ORRXrs:
	return describeORRLoadedValue(MI, Reg, this, TRI);
	}

	return TargetInstrInfo::describeLoadedValue(MI, Reg);
	}

	bool AArch64InstrInfo::isExtendLikelyToBeFolded(
	MachineInstr &ExtMI, MachineRegisterInfo &MRI) const {
	assert(ExtMI.getOpcode() == TargetOpcode::G_SEXT \|\|
	ExtMI.getOpcode() == TargetOpcode::G_ZEXT \|\|
	ExtMI.getOpcode() == TargetOpcode::G_ANYEXT);

	// Anyexts are nops.
	if (ExtMI.getOpcode() == TargetOpcode::G_ANYEXT)
	return true;

	Register DefReg = ExtMI.getOperand(0).getReg();
	if (!MRI.hasOneNonDBGUse(DefReg))
	return false;

	// It's likely that a sext/zext as a G_PTR_ADD offset will be folded into an
	// addressing mode.
	auto UserMI = &MRI.use_instr_nodbg_begin(DefReg);
	return UserMI->getOpcode() == TargetOpcode::G_PTR_ADD;
	}

	uint64_t AArch64InstrInfo::getElementSizeForOpcode(unsigned Opc) const {
	return get(Opc).TSFlags & AArch64::ElementSizeMask;
	}

	bool AArch64InstrInfo::isPTestLikeOpcode(unsigned Opc) const {
	return get(Opc).TSFlags & AArch64::InstrFlagIsPTestLike;
	}

	bool AArch64InstrInfo::isWhileOpcode(unsigned Opc) const {
	return get(Opc).TSFlags & AArch64::InstrFlagIsWhile;
	}

	unsigned int
	AArch64InstrInfo::getTailDuplicateSize(CodeGenOptLevel OptLevel) const {
	return OptLevel >= CodeGenOptLevel::Aggressive ? 6 : 2;
	}

	bool AArch64InstrInfo::isLegalAddressingMode(unsigned NumBytes, int64_t Offset,
	unsigned Scale) const {
	if (Offset && Scale)
	return false;

	// Check Reg + Imm
	if (!Scale) {
	// 9-bit signed offset
	if (isInt<9>(Offset))
	return true;

	// 12-bit unsigned offset
	unsigned Shift = Log2_64(NumBytes);
	if (NumBytes && Offset > 0 && (Offset / NumBytes) <= (1LL << 12) - 1 &&
	// Must be a multiple of NumBytes (NumBytes is a power of 2)
	(Offset >> Shift) << Shift == Offset)
	return true;
	return false;
	}

	// Check reg1 + SIZE_IN_BYTES * reg2 and reg1 + reg2
	return Scale == 1 \|\| (Scale > 0 && Scale == NumBytes);
	}

	unsigned llvm::getBLRCallOpcode(const MachineFunction &MF) {
	if (MF.getSubtarget<AArch64Subtarget>().hardenSlsBlr())
	return AArch64::BLRNoIP;
	else
	return AArch64::BLR;
	}

	MachineBasicBlock::iterator
	AArch64InstrInfo::probedStackAlloc(MachineBasicBlock::iterator MBBI,
	Register TargetReg, bool FrameSetup) const {
	assert(TargetReg != AArch64::SP && "New top of stack cannot aleady be in SP");

	MachineBasicBlock &MBB = *MBBI->getParent();
	MachineFunction &MF = *MBB.getParent();
	const AArch64InstrInfo *TII =
	MF.getSubtarget<AArch64Subtarget>().getInstrInfo();
	int64_t ProbeSize = MF.getInfo<AArch64FunctionInfo>()->getStackProbeSize();
	DebugLoc DL = MBB.findDebugLoc(MBBI);

	MachineFunction::iterator MBBInsertPoint = std::next(MBB.getIterator());
	MachineBasicBlock *LoopTestMBB =
	MF.CreateMachineBasicBlock(MBB.getBasicBlock());
	MF.insert(MBBInsertPoint, LoopTestMBB);
	MachineBasicBlock *LoopBodyMBB =
	MF.CreateMachineBasicBlock(MBB.getBasicBlock());
	MF.insert(MBBInsertPoint, LoopBodyMBB);
	MachineBasicBlock *ExitMBB = MF.CreateMachineBasicBlock(MBB.getBasicBlock());
	MF.insert(MBBInsertPoint, ExitMBB);
	MachineInstr::MIFlag Flags =
	FrameSetup ? MachineInstr::FrameSetup : MachineInstr::NoFlags;

	// LoopTest:
	// SUB SP, SP, #ProbeSize
	emitFrameOffset(*LoopTestMBB, LoopTestMBB->end(), DL, AArch64::SP,
	AArch64::SP, StackOffset::getFixed(-ProbeSize), TII, Flags);

	// CMP SP, TargetReg
	BuildMI(*LoopTestMBB, LoopTestMBB->end(), DL, TII->get(AArch64::SUBSXrx64),
	AArch64::XZR)
	.addReg(AArch64::SP)
	.addReg(TargetReg)
	.addImm(AArch64_AM::getArithExtendImm(AArch64_AM::UXTX, 0))
	.setMIFlags(Flags);

	// B.<Cond> LoopExit
	BuildMI(*LoopTestMBB, LoopTestMBB->end(), DL, TII->get(AArch64::Bcc))
	.addImm(AArch64CC::LE)
	.addMBB(ExitMBB)
	.setMIFlags(Flags);

	// STR XZR, [SP]
	BuildMI(*LoopBodyMBB, LoopBodyMBB->end(), DL, TII->get(AArch64::STRXui))
	.addReg(AArch64::XZR)
	.addReg(AArch64::SP)
	.addImm(0)
	.setMIFlags(Flags);

	// B loop
	BuildMI(*LoopBodyMBB, LoopBodyMBB->end(), DL, TII->get(AArch64::B))
	.addMBB(LoopTestMBB)
	.setMIFlags(Flags);

	// LoopExit:
	// MOV SP, TargetReg
	BuildMI(*ExitMBB, ExitMBB->end(), DL, TII->get(AArch64::ADDXri), AArch64::SP)
	.addReg(TargetReg)
	.addImm(0)
	.addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0))
	.setMIFlags(Flags);

	// LDR XZR, [SP]
	BuildMI(*ExitMBB, ExitMBB->end(), DL, TII->get(AArch64::LDRXui))
	.addReg(AArch64::XZR, RegState::Define)
	.addReg(AArch64::SP)
	.addImm(0)
	.setMIFlags(Flags);

	ExitMBB->splice(ExitMBB->end(), &MBB, std::next(MBBI), MBB.end());
	ExitMBB->transferSuccessorsAndUpdatePHIs(&MBB);

	LoopTestMBB->addSuccessor(ExitMBB);
	LoopTestMBB->addSuccessor(LoopBodyMBB);
	LoopBodyMBB->addSuccessor(LoopTestMBB);
	MBB.addSuccessor(LoopTestMBB);

	// Update liveins.
	if (MF.getRegInfo().reservedRegsFrozen())
	fullyRecomputeLiveIns({ExitMBB, LoopBodyMBB, LoopTestMBB});

	return ExitMBB->begin();
	}

	namespace {
	class AArch64PipelinerLoopInfo : public TargetInstrInfo::PipelinerLoopInfo {
	MachineFunction *MF;
	const TargetInstrInfo *TII;
	const TargetRegisterInfo *TRI;
	MachineRegisterInfo &MRI;

	/// The block of the loop
	MachineBasicBlock *LoopBB;
	/// The conditional branch of the loop
	MachineInstr *CondBranch;
	/// The compare instruction for loop control
	MachineInstr *Comp;
	/// The number of the operand of the loop counter value in Comp
	unsigned CompCounterOprNum;
	/// The instruction that updates the loop counter value
	MachineInstr *Update;
	/// The number of the operand of the loop counter value in Update
	unsigned UpdateCounterOprNum;
	/// The initial value of the loop counter
	Register Init;
	/// True iff Update is a predecessor of Comp
	bool IsUpdatePriorComp;

	/// The normalized condition used by createTripCountGreaterCondition()
	SmallVector<MachineOperand, 4> Cond;

	public:
	AArch64PipelinerLoopInfo(MachineBasicBlock LoopBB, MachineInstr CondBranch,
	MachineInstr *Comp, unsigned CompCounterOprNum,
	MachineInstr *Update, unsigned UpdateCounterOprNum,
	Register Init, bool IsUpdatePriorComp,
	const SmallVectorImpl<MachineOperand> &Cond)
	: MF(Comp->getParent()->getParent()),
	TII(MF->getSubtarget().getInstrInfo()),
	TRI(MF->getSubtarget().getRegisterInfo()), MRI(MF->getRegInfo()),
	LoopBB(LoopBB), CondBranch(CondBranch), Comp(Comp),
	CompCounterOprNum(CompCounterOprNum), Update(Update),
	UpdateCounterOprNum(UpdateCounterOprNum), Init(Init),
	IsUpdatePriorComp(IsUpdatePriorComp), Cond(Cond.begin(), Cond.end()) {}

	bool shouldIgnoreForPipelining(const MachineInstr *MI) const override {
	// Make the instructions for loop control be placed in stage 0.
	// The predecessors of Comp are considered by the caller.
	return MI == Comp;
	}

	std::optional<bool> createTripCountGreaterCondition(
	int TC, MachineBasicBlock &MBB,
	SmallVectorImpl<MachineOperand> &CondParam) override {
	// A branch instruction will be inserted as "if (Cond) goto epilogue".
	// Cond is normalized for such use.
	// The predecessors of the branch are assumed to have already been inserted.
	CondParam = Cond;
	return {};
	}

	void createRemainingIterationsGreaterCondition(
	int TC, MachineBasicBlock &MBB, SmallVectorImpl<MachineOperand> &Cond,
	DenseMap<MachineInstr , MachineInstr > &LastStage0Insts) override;

	void setPreheader(MachineBasicBlock *NewPreheader) override {}

	void adjustTripCount(int TripCountAdjust) override {}

	void disposed() override {}
	bool isMVEExpanderSupported() override { return true; }
	};
	} // namespace

	/// Clone an instruction from MI. The register of ReplaceOprNum-th operand
	/// is replaced by ReplaceReg. The output register is newly created.
	/// The other operands are unchanged from MI.
	static Register cloneInstr(const MachineInstr *MI, unsigned ReplaceOprNum,
	Register ReplaceReg, MachineBasicBlock &MBB,
	MachineBasicBlock::iterator InsertTo) {
	MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
	const TargetInstrInfo *TII = MBB.getParent()->getSubtarget().getInstrInfo();
	const TargetRegisterInfo *TRI =
	MBB.getParent()->getSubtarget().getRegisterInfo();
	MachineInstr *NewMI = MBB.getParent()->CloneMachineInstr(MI);
	Register Result = 0;
	for (unsigned I = 0; I < NewMI->getNumOperands(); ++I) {
	if (I == 0 && NewMI->getOperand(0).getReg().isVirtual()) {
	Result = MRI.createVirtualRegister(
	MRI.getRegClass(NewMI->getOperand(0).getReg()));
	NewMI->getOperand(I).setReg(Result);
	} else if (I == ReplaceOprNum) {
	MRI.constrainRegClass(
	ReplaceReg,
	TII->getRegClass(NewMI->getDesc(), I, TRI, *MBB.getParent()));
	NewMI->getOperand(I).setReg(ReplaceReg);
	}
	}
	MBB.insert(InsertTo, NewMI);
	return Result;
	}

	void AArch64PipelinerLoopInfo::createRemainingIterationsGreaterCondition(
	int TC, MachineBasicBlock &MBB, SmallVectorImpl<MachineOperand> &Cond,
	DenseMap<MachineInstr , MachineInstr > &LastStage0Insts) {
	// Create and accumulate conditions for next TC iterations.
	// Example:
	// SUBSXrr N, counter, implicit-def $nzcv # compare instruction for the last
	// # iteration of the kernel
	//
	// # insert the following instructions
	// cond = CSINCXr 0, 0, C, implicit $nzcv
	// counter = ADDXri counter, 1 # clone from this->Update
	// SUBSXrr n, counter, implicit-def $nzcv # clone from this->Comp
	// cond = CSINCXr cond, cond, C, implicit $nzcv
	// ... (repeat TC times)
	// SUBSXri cond, 0, implicit-def $nzcv

	assert(CondBranch->getOpcode() == AArch64::Bcc);
	// CondCode to exit the loop
	AArch64CC::CondCode CC =
	(AArch64CC::CondCode)CondBranch->getOperand(0).getImm();
	if (CondBranch->getOperand(1).getMBB() == LoopBB)
	CC = AArch64CC::getInvertedCondCode(CC);

	// Accumulate conditions to exit the loop
	Register AccCond = AArch64::XZR;

	// If CC holds, CurCond+1 is returned; otherwise CurCond is returned.
	auto AccumulateCond = [&](Register CurCond,
	AArch64CC::CondCode CC) -> Register {
	Register NewCond = MRI.createVirtualRegister(&AArch64::GPR64commonRegClass);
	BuildMI(MBB, MBB.end(), Comp->getDebugLoc(), TII->get(AArch64::CSINCXr))
	.addReg(NewCond, RegState::Define)
	.addReg(CurCond)
	.addReg(CurCond)
	.addImm(AArch64CC::getInvertedCondCode(CC));
	return NewCond;
	};

	if (!LastStage0Insts.empty() && LastStage0Insts[Comp]->getParent() == &MBB) {
	// Update and Comp for I==0 are already exists in MBB
	// (MBB is an unrolled kernel)
	Register Counter;
	for (int I = 0; I <= TC; ++I) {
	Register NextCounter;
	if (I != 0)
	NextCounter =
	cloneInstr(Comp, CompCounterOprNum, Counter, MBB, MBB.end());

	AccCond = AccumulateCond(AccCond, CC);

	if (I != TC) {
	if (I == 0) {
	if (Update != Comp && IsUpdatePriorComp) {
	Counter =
	LastStage0Insts[Comp]->getOperand(CompCounterOprNum).getReg();
	NextCounter = cloneInstr(Update, UpdateCounterOprNum, Counter, MBB,
	MBB.end());
	} else {
	// can use already calculated value
	NextCounter = LastStage0Insts[Update]->getOperand(0).getReg();
	}
	} else if (Update != Comp) {
	NextCounter =
	cloneInstr(Update, UpdateCounterOprNum, Counter, MBB, MBB.end());
	}
	}
	Counter = NextCounter;
	}
	} else {
	Register Counter;
	if (LastStage0Insts.empty()) {
	// use initial counter value (testing if the trip count is sufficient to
	// be executed by pipelined code)
	Counter = Init;
	if (IsUpdatePriorComp)
	Counter =
	cloneInstr(Update, UpdateCounterOprNum, Counter, MBB, MBB.end());
	} else {
	// MBB is an epilogue block. LastStage0Insts[Comp] is in the kernel block.
	Counter = LastStage0Insts[Comp]->getOperand(CompCounterOprNum).getReg();
	}

	for (int I = 0; I <= TC; ++I) {
	Register NextCounter;
	NextCounter =
	cloneInstr(Comp, CompCounterOprNum, Counter, MBB, MBB.end());
	AccCond = AccumulateCond(AccCond, CC);
	if (I != TC && Update != Comp)
	NextCounter =
	cloneInstr(Update, UpdateCounterOprNum, Counter, MBB, MBB.end());
	Counter = NextCounter;
	}
	}

	// If AccCond == 0, the remainder is greater than TC.
	BuildMI(MBB, MBB.end(), Comp->getDebugLoc(), TII->get(AArch64::SUBSXri))
	.addReg(AArch64::XZR, RegState::Define \| RegState::Dead)
	.addReg(AccCond)
	.addImm(0)
	.addImm(0);
	Cond.clear();
	Cond.push_back(MachineOperand::CreateImm(AArch64CC::EQ));
	}

	static void extractPhiReg(const MachineInstr &Phi, const MachineBasicBlock *MBB,
	Register &RegMBB, Register &RegOther) {
	assert(Phi.getNumOperands() == 5);
	if (Phi.getOperand(2).getMBB() == MBB) {
	RegMBB = Phi.getOperand(1).getReg();
	RegOther = Phi.getOperand(3).getReg();
	} else {
	assert(Phi.getOperand(4).getMBB() == MBB);
	RegMBB = Phi.getOperand(3).getReg();
	RegOther = Phi.getOperand(1).getReg();
	}
	}

	static bool isDefinedOutside(Register Reg, const MachineBasicBlock *BB) {
	if (!Reg.isVirtual())
	return false;
	const MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
	return MRI.getVRegDef(Reg)->getParent() != BB;
	}

	/// If Reg is an induction variable, return true and set some parameters
	static bool getIndVarInfo(Register Reg, const MachineBasicBlock *LoopBB,
	MachineInstr *&UpdateInst,
	unsigned &UpdateCounterOprNum, Register &InitReg,
	bool &IsUpdatePriorComp) {
	// Example:
	//
	// Preheader:
	// InitReg = ...
	// LoopBB:
	// Reg0 = PHI (InitReg, Preheader), (Reg1, LoopBB)
	// Reg = COPY Reg0 ; COPY is ignored.
	// Reg1 = ADD Reg, #1; UpdateInst. Incremented by a loop invariant value.
	// ; Reg is the value calculated in the previous
	// ; iteration, so IsUpdatePriorComp == false.

	if (LoopBB->pred_size() != 2)
	return false;
	if (!Reg.isVirtual())
	return false;
	const MachineRegisterInfo &MRI = LoopBB->getParent()->getRegInfo();
	UpdateInst = nullptr;
	UpdateCounterOprNum = 0;
	InitReg = 0;
	IsUpdatePriorComp = true;
	Register CurReg = Reg;
	while (true) {
	MachineInstr *Def = MRI.getVRegDef(CurReg);
	if (Def->getParent() != LoopBB)
	return false;
	if (Def->isCopy()) {
	// Ignore copy instructions unless they contain subregisters
	if (Def->getOperand(0).getSubReg() \|\| Def->getOperand(1).getSubReg())
	return false;
	CurReg = Def->getOperand(1).getReg();
	} else if (Def->isPHI()) {
	if (InitReg != 0)
	return false;
	if (!UpdateInst)
	IsUpdatePriorComp = false;
	extractPhiReg(*Def, LoopBB, CurReg, InitReg);
	} else {
	if (UpdateInst)
	return false;
	switch (Def->getOpcode()) {
	case AArch64::ADDSXri:
	case AArch64::ADDSWri:
	case AArch64::SUBSXri:
	case AArch64::SUBSWri:
	case AArch64::ADDXri:
	case AArch64::ADDWri:
	case AArch64::SUBXri:
	case AArch64::SUBWri:
	UpdateInst = Def;
	UpdateCounterOprNum = 1;
	break;
	case AArch64::ADDSXrr:
	case AArch64::ADDSWrr:
	case AArch64::SUBSXrr:
	case AArch64::SUBSWrr:
	case AArch64::ADDXrr:
	case AArch64::ADDWrr:
	case AArch64::SUBXrr:
	case AArch64::SUBWrr:
	UpdateInst = Def;
	if (isDefinedOutside(Def->getOperand(2).getReg(), LoopBB))
	UpdateCounterOprNum = 1;
	else if (isDefinedOutside(Def->getOperand(1).getReg(), LoopBB))
	UpdateCounterOprNum = 2;
	else
	return false;
	break;
	default:
	return false;
	}
	CurReg = Def->getOperand(UpdateCounterOprNum).getReg();
	}

	if (!CurReg.isVirtual())
	return false;
	if (Reg == CurReg)
	break;
	}

	if (!UpdateInst)
	return false;

	return true;
	}

	std::unique_ptr<TargetInstrInfo::PipelinerLoopInfo>
	AArch64InstrInfo::analyzeLoopForPipelining(MachineBasicBlock *LoopBB) const {
	// Accept loops that meet the following conditions
	// * The conditional branch is BCC
	// * The compare instruction is ADDS/SUBS/WHILEXX
	// * One operand of the compare is an induction variable and the other is a
	// loop invariant value
	// * The induction variable is incremented/decremented by a single instruction
	// * Does not contain CALL or instructions which have unmodeled side effects

	for (MachineInstr &MI : *LoopBB)
	if (MI.isCall() \|\| MI.hasUnmodeledSideEffects())
	// This instruction may use NZCV, which interferes with the instruction to
	// be inserted for loop control.
	return nullptr;

	MachineBasicBlock TBB = nullptr, FBB = nullptr;
	SmallVector<MachineOperand, 4> Cond;
	if (analyzeBranch(*LoopBB, TBB, FBB, Cond))
	return nullptr;

	// Infinite loops are not supported
	if (TBB == LoopBB && FBB == LoopBB)
	return nullptr;

	// Must be conditional branch
	if (TBB != LoopBB && FBB == nullptr)
	return nullptr;

	assert((TBB == LoopBB \|\| FBB == LoopBB) &&
	"The Loop must be a single-basic-block loop");

	MachineInstr CondBranch = &LoopBB->getFirstTerminator();
	const TargetRegisterInfo &TRI = getRegisterInfo();

	if (CondBranch->getOpcode() != AArch64::Bcc)
	return nullptr;

	// Normalization for createTripCountGreaterCondition()
	if (TBB == LoopBB)
	reverseBranchCondition(Cond);

	MachineInstr *Comp = nullptr;
	unsigned CompCounterOprNum = 0;
	for (MachineInstr &MI : reverse(*LoopBB)) {
	if (MI.modifiesRegister(AArch64::NZCV, &TRI)) {
	// Guarantee that the compare is SUBS/ADDS/WHILEXX and that one of the
	// operands is a loop invariant value

	switch (MI.getOpcode()) {
	case AArch64::SUBSXri:
	case AArch64::SUBSWri:
	case AArch64::ADDSXri:
	case AArch64::ADDSWri:
	Comp = &MI;
	CompCounterOprNum = 1;
	break;
	case AArch64::ADDSWrr:
	case AArch64::ADDSXrr:
	case AArch64::SUBSWrr:
	case AArch64::SUBSXrr:
	Comp = &MI;
	break;
	default:
	if (isWhileOpcode(MI.getOpcode())) {
	Comp = &MI;
	break;
	}
	return nullptr;
	}

	if (CompCounterOprNum == 0) {
	if (isDefinedOutside(Comp->getOperand(1).getReg(), LoopBB))
	CompCounterOprNum = 2;
	else if (isDefinedOutside(Comp->getOperand(2).getReg(), LoopBB))
	CompCounterOprNum = 1;
	else
	return nullptr;
	}
	break;
	}
	}
	if (!Comp)
	return nullptr;

	MachineInstr *Update = nullptr;
	Register Init;
	bool IsUpdatePriorComp;
	unsigned UpdateCounterOprNum;
	if (!getIndVarInfo(Comp->getOperand(CompCounterOprNum).getReg(), LoopBB,
	Update, UpdateCounterOprNum, Init, IsUpdatePriorComp))
	return nullptr;

	return std::make_unique<AArch64PipelinerLoopInfo>(
	LoopBB, CondBranch, Comp, CompCounterOprNum, Update, UpdateCounterOprNum,
	Init, IsUpdatePriorComp, Cond);
	}

	#define GET_INSTRINFO_HELPERS
	#define GET_INSTRMAP_INFO
	#include "AArch64GenInstrInfo.inc"
	diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
	index 39ae7c96cf77..a71c9453d968 100644
	--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
	+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
	@@ -1,6050 +1,6056 @@
	//===-- AMDGPUISelLowering.cpp - AMDGPU Common DAG lowering functions -----===//
	//
	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
	// See https://llvm.org/LICENSE.txt for license information.
	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
	//
	//===----------------------------------------------------------------------===//
	//
	/// \file
	/// This is the parent TargetLowering class for hardware code gen
	/// targets.
	//
	//===----------------------------------------------------------------------===//

	#include "AMDGPUISelLowering.h"
	#include "AMDGPU.h"
	#include "AMDGPUInstrInfo.h"
	#include "AMDGPUMachineFunction.h"
	#include "SIMachineFunctionInfo.h"
	#include "llvm/CodeGen/Analysis.h"
	#include "llvm/CodeGen/GlobalISel/GISelKnownBits.h"
	#include "llvm/CodeGen/MachineFrameInfo.h"
	#include "llvm/IR/DiagnosticInfo.h"
	#include "llvm/IR/IntrinsicsAMDGPU.h"
	#include "llvm/IR/PatternMatch.h"
	#include "llvm/Support/CommandLine.h"
	#include "llvm/Support/KnownBits.h"
	#include "llvm/Target/TargetMachine.h"

	using namespace llvm;

	#include "AMDGPUGenCallingConv.inc"

	static cl::opt<bool> AMDGPUBypassSlowDiv(
	"amdgpu-bypass-slow-div",
	cl::desc("Skip 64-bit divide for dynamic 32-bit values"),
	cl::init(true));

	// Find a larger type to do a load / store of a vector with.
	EVT AMDGPUTargetLowering::getEquivalentMemType(LLVMContext &Ctx, EVT VT) {
	unsigned StoreSize = VT.getStoreSizeInBits();
	if (StoreSize <= 32)
	return EVT::getIntegerVT(Ctx, StoreSize);

	if (StoreSize % 32 == 0)
	return EVT::getVectorVT(Ctx, MVT::i32, StoreSize / 32);

	return VT;
	}

	unsigned AMDGPUTargetLowering::numBitsUnsigned(SDValue Op, SelectionDAG &DAG) {
	return DAG.computeKnownBits(Op).countMaxActiveBits();
	}

	unsigned AMDGPUTargetLowering::numBitsSigned(SDValue Op, SelectionDAG &DAG) {
	// In order for this to be a signed 24-bit value, bit 23, must
	// be a sign bit.
	return DAG.ComputeMaxSignificantBits(Op);
	}

	AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM,
	const AMDGPUSubtarget &STI)
	: TargetLowering(TM), Subtarget(&STI) {
	// Always lower memset, memcpy, and memmove intrinsics to load/store
	// instructions, rather then generating calls to memset, mempcy or memmove.
	MaxStoresPerMemset = MaxStoresPerMemsetOptSize = ~0U;
	MaxStoresPerMemcpy = MaxStoresPerMemcpyOptSize = ~0U;
	MaxStoresPerMemmove = MaxStoresPerMemmoveOptSize = ~0U;

	// Enable ganging up loads and stores in the memcpy DAG lowering.
	MaxGluedStoresPerMemcpy = 16;

	// Lower floating point store/load to integer store/load to reduce the number
	// of patterns in tablegen.
	setOperationAction(ISD::LOAD, MVT::f32, Promote);
	AddPromotedToType(ISD::LOAD, MVT::f32, MVT::i32);

	setOperationAction(ISD::LOAD, MVT::v2f32, Promote);
	AddPromotedToType(ISD::LOAD, MVT::v2f32, MVT::v2i32);

	setOperationAction(ISD::LOAD, MVT::v3f32, Promote);
	AddPromotedToType(ISD::LOAD, MVT::v3f32, MVT::v3i32);

	setOperationAction(ISD::LOAD, MVT::v4f32, Promote);
	AddPromotedToType(ISD::LOAD, MVT::v4f32, MVT::v4i32);

	setOperationAction(ISD::LOAD, MVT::v5f32, Promote);
	AddPromotedToType(ISD::LOAD, MVT::v5f32, MVT::v5i32);

	setOperationAction(ISD::LOAD, MVT::v6f32, Promote);
	AddPromotedToType(ISD::LOAD, MVT::v6f32, MVT::v6i32);

	setOperationAction(ISD::LOAD, MVT::v7f32, Promote);
	AddPromotedToType(ISD::LOAD, MVT::v7f32, MVT::v7i32);

	setOperationAction(ISD::LOAD, MVT::v8f32, Promote);
	AddPromotedToType(ISD::LOAD, MVT::v8f32, MVT::v8i32);

	setOperationAction(ISD::LOAD, MVT::v9f32, Promote);
	AddPromotedToType(ISD::LOAD, MVT::v9f32, MVT::v9i32);

	setOperationAction(ISD::LOAD, MVT::v10f32, Promote);
	AddPromotedToType(ISD::LOAD, MVT::v10f32, MVT::v10i32);

	setOperationAction(ISD::LOAD, MVT::v11f32, Promote);
	AddPromotedToType(ISD::LOAD, MVT::v11f32, MVT::v11i32);

	setOperationAction(ISD::LOAD, MVT::v12f32, Promote);
	AddPromotedToType(ISD::LOAD, MVT::v12f32, MVT::v12i32);

	setOperationAction(ISD::LOAD, MVT::v16f32, Promote);
	AddPromotedToType(ISD::LOAD, MVT::v16f32, MVT::v16i32);

	setOperationAction(ISD::LOAD, MVT::v32f32, Promote);
	AddPromotedToType(ISD::LOAD, MVT::v32f32, MVT::v32i32);

	setOperationAction(ISD::LOAD, MVT::i64, Promote);
	AddPromotedToType(ISD::LOAD, MVT::i64, MVT::v2i32);

	setOperationAction(ISD::LOAD, MVT::v2i64, Promote);
	AddPromotedToType(ISD::LOAD, MVT::v2i64, MVT::v4i32);

	setOperationAction(ISD::LOAD, MVT::f64, Promote);
	AddPromotedToType(ISD::LOAD, MVT::f64, MVT::v2i32);

	setOperationAction(ISD::LOAD, MVT::v2f64, Promote);
	AddPromotedToType(ISD::LOAD, MVT::v2f64, MVT::v4i32);

	setOperationAction(ISD::LOAD, MVT::v3i64, Promote);
	AddPromotedToType(ISD::LOAD, MVT::v3i64, MVT::v6i32);

	setOperationAction(ISD::LOAD, MVT::v4i64, Promote);
	AddPromotedToType(ISD::LOAD, MVT::v4i64, MVT::v8i32);

	setOperationAction(ISD::LOAD, MVT::v3f64, Promote);
	AddPromotedToType(ISD::LOAD, MVT::v3f64, MVT::v6i32);

	setOperationAction(ISD::LOAD, MVT::v4f64, Promote);
	AddPromotedToType(ISD::LOAD, MVT::v4f64, MVT::v8i32);

	setOperationAction(ISD::LOAD, MVT::v8i64, Promote);
	AddPromotedToType(ISD::LOAD, MVT::v8i64, MVT::v16i32);

	setOperationAction(ISD::LOAD, MVT::v8f64, Promote);
	AddPromotedToType(ISD::LOAD, MVT::v8f64, MVT::v16i32);

	setOperationAction(ISD::LOAD, MVT::v16i64, Promote);
	AddPromotedToType(ISD::LOAD, MVT::v16i64, MVT::v32i32);

	setOperationAction(ISD::LOAD, MVT::v16f64, Promote);
	AddPromotedToType(ISD::LOAD, MVT::v16f64, MVT::v32i32);

	setOperationAction(ISD::LOAD, MVT::i128, Promote);
	AddPromotedToType(ISD::LOAD, MVT::i128, MVT::v4i32);

	// TODO: Would be better to consume as directly legal
	setOperationAction(ISD::ATOMIC_LOAD, MVT::f32, Promote);
	AddPromotedToType(ISD::ATOMIC_LOAD, MVT::f32, MVT::i32);

	setOperationAction(ISD::ATOMIC_LOAD, MVT::f64, Promote);
	AddPromotedToType(ISD::ATOMIC_LOAD, MVT::f64, MVT::i64);

	setOperationAction(ISD::ATOMIC_LOAD, MVT::f16, Promote);
	AddPromotedToType(ISD::ATOMIC_LOAD, MVT::f16, MVT::i16);

	setOperationAction(ISD::ATOMIC_LOAD, MVT::bf16, Promote);
	AddPromotedToType(ISD::ATOMIC_LOAD, MVT::bf16, MVT::i16);

	setOperationAction(ISD::ATOMIC_STORE, MVT::f32, Promote);
	AddPromotedToType(ISD::ATOMIC_STORE, MVT::f32, MVT::i32);

	setOperationAction(ISD::ATOMIC_STORE, MVT::f64, Promote);
	AddPromotedToType(ISD::ATOMIC_STORE, MVT::f64, MVT::i64);

	setOperationAction(ISD::ATOMIC_STORE, MVT::f16, Promote);
	AddPromotedToType(ISD::ATOMIC_STORE, MVT::f16, MVT::i16);

	setOperationAction(ISD::ATOMIC_STORE, MVT::bf16, Promote);
	AddPromotedToType(ISD::ATOMIC_STORE, MVT::bf16, MVT::i16);

	// There are no 64-bit extloads. These should be done as a 32-bit extload and
	// an extension to 64-bit.
	for (MVT VT : MVT::integer_valuetypes())
	setLoadExtAction({ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}, MVT::i64, VT,
	Expand);

	for (MVT VT : MVT::integer_valuetypes()) {
	if (VT == MVT::i64)
	continue;

	for (auto Op : {ISD::SEXTLOAD, ISD::ZEXTLOAD, ISD::EXTLOAD}) {
	setLoadExtAction(Op, VT, MVT::i1, Promote);
	setLoadExtAction(Op, VT, MVT::i8, Legal);
	setLoadExtAction(Op, VT, MVT::i16, Legal);
	setLoadExtAction(Op, VT, MVT::i32, Expand);
	}
	}

	for (MVT VT : MVT::integer_fixedlen_vector_valuetypes())
	for (auto MemVT :
	{MVT::v2i8, MVT::v4i8, MVT::v2i16, MVT::v3i16, MVT::v4i16})
	setLoadExtAction({ISD::SEXTLOAD, ISD::ZEXTLOAD, ISD::EXTLOAD}, VT, MemVT,
	Expand);

	setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
	setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::bf16, Expand);
	setLoadExtAction(ISD::EXTLOAD, MVT::v2f32, MVT::v2f16, Expand);
	setLoadExtAction(ISD::EXTLOAD, MVT::v2f32, MVT::v2bf16, Expand);
	setLoadExtAction(ISD::EXTLOAD, MVT::v3f32, MVT::v3f16, Expand);
	setLoadExtAction(ISD::EXTLOAD, MVT::v3f32, MVT::v3bf16, Expand);
	setLoadExtAction(ISD::EXTLOAD, MVT::v4f32, MVT::v4f16, Expand);
	setLoadExtAction(ISD::EXTLOAD, MVT::v4f32, MVT::v4bf16, Expand);
	setLoadExtAction(ISD::EXTLOAD, MVT::v8f32, MVT::v8f16, Expand);
	setLoadExtAction(ISD::EXTLOAD, MVT::v8f32, MVT::v8bf16, Expand);
	setLoadExtAction(ISD::EXTLOAD, MVT::v16f32, MVT::v16f16, Expand);
	setLoadExtAction(ISD::EXTLOAD, MVT::v16f32, MVT::v16bf16, Expand);
	setLoadExtAction(ISD::EXTLOAD, MVT::v32f32, MVT::v32f16, Expand);
	setLoadExtAction(ISD::EXTLOAD, MVT::v32f32, MVT::v32bf16, Expand);

	setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
	setLoadExtAction(ISD::EXTLOAD, MVT::v2f64, MVT::v2f32, Expand);
	setLoadExtAction(ISD::EXTLOAD, MVT::v3f64, MVT::v3f32, Expand);
	setLoadExtAction(ISD::EXTLOAD, MVT::v4f64, MVT::v4f32, Expand);
	setLoadExtAction(ISD::EXTLOAD, MVT::v8f64, MVT::v8f32, Expand);
	setLoadExtAction(ISD::EXTLOAD, MVT::v16f64, MVT::v16f32, Expand);

	setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
	setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::bf16, Expand);
	setLoadExtAction(ISD::EXTLOAD, MVT::v2f64, MVT::v2f16, Expand);
	setLoadExtAction(ISD::EXTLOAD, MVT::v2f64, MVT::v2bf16, Expand);
	setLoadExtAction(ISD::EXTLOAD, MVT::v3f64, MVT::v3f16, Expand);
	setLoadExtAction(ISD::EXTLOAD, MVT::v3f64, MVT::v3bf16, Expand);
	setLoadExtAction(ISD::EXTLOAD, MVT::v4f64, MVT::v4f16, Expand);
	setLoadExtAction(ISD::EXTLOAD, MVT::v4f64, MVT::v4bf16, Expand);
	setLoadExtAction(ISD::EXTLOAD, MVT::v8f64, MVT::v8f16, Expand);
	setLoadExtAction(ISD::EXTLOAD, MVT::v8f64, MVT::v8bf16, Expand);
	setLoadExtAction(ISD::EXTLOAD, MVT::v16f64, MVT::v16f16, Expand);
	setLoadExtAction(ISD::EXTLOAD, MVT::v16f64, MVT::v16bf16, Expand);

	setOperationAction(ISD::STORE, MVT::f32, Promote);
	AddPromotedToType(ISD::STORE, MVT::f32, MVT::i32);

	setOperationAction(ISD::STORE, MVT::v2f32, Promote);
	AddPromotedToType(ISD::STORE, MVT::v2f32, MVT::v2i32);

	setOperationAction(ISD::STORE, MVT::v3f32, Promote);
	AddPromotedToType(ISD::STORE, MVT::v3f32, MVT::v3i32);

	setOperationAction(ISD::STORE, MVT::v4f32, Promote);
	AddPromotedToType(ISD::STORE, MVT::v4f32, MVT::v4i32);

	setOperationAction(ISD::STORE, MVT::v5f32, Promote);
	AddPromotedToType(ISD::STORE, MVT::v5f32, MVT::v5i32);

	setOperationAction(ISD::STORE, MVT::v6f32, Promote);
	AddPromotedToType(ISD::STORE, MVT::v6f32, MVT::v6i32);

	setOperationAction(ISD::STORE, MVT::v7f32, Promote);
	AddPromotedToType(ISD::STORE, MVT::v7f32, MVT::v7i32);

	setOperationAction(ISD::STORE, MVT::v8f32, Promote);
	AddPromotedToType(ISD::STORE, MVT::v8f32, MVT::v8i32);

	setOperationAction(ISD::STORE, MVT::v9f32, Promote);
	AddPromotedToType(ISD::STORE, MVT::v9f32, MVT::v9i32);

	setOperationAction(ISD::STORE, MVT::v10f32, Promote);
	AddPromotedToType(ISD::STORE, MVT::v10f32, MVT::v10i32);

	setOperationAction(ISD::STORE, MVT::v11f32, Promote);
	AddPromotedToType(ISD::STORE, MVT::v11f32, MVT::v11i32);

	setOperationAction(ISD::STORE, MVT::v12f32, Promote);
	AddPromotedToType(ISD::STORE, MVT::v12f32, MVT::v12i32);

	setOperationAction(ISD::STORE, MVT::v16f32, Promote);
	AddPromotedToType(ISD::STORE, MVT::v16f32, MVT::v16i32);

	setOperationAction(ISD::STORE, MVT::v32f32, Promote);
	AddPromotedToType(ISD::STORE, MVT::v32f32, MVT::v32i32);

	setOperationAction(ISD::STORE, MVT::i64, Promote);
	AddPromotedToType(ISD::STORE, MVT::i64, MVT::v2i32);

	setOperationAction(ISD::STORE, MVT::v2i64, Promote);
	AddPromotedToType(ISD::STORE, MVT::v2i64, MVT::v4i32);

	setOperationAction(ISD::STORE, MVT::f64, Promote);
	AddPromotedToType(ISD::STORE, MVT::f64, MVT::v2i32);

	setOperationAction(ISD::STORE, MVT::v2f64, Promote);
	AddPromotedToType(ISD::STORE, MVT::v2f64, MVT::v4i32);

	setOperationAction(ISD::STORE, MVT::v3i64, Promote);
	AddPromotedToType(ISD::STORE, MVT::v3i64, MVT::v6i32);

	setOperationAction(ISD::STORE, MVT::v3f64, Promote);
	AddPromotedToType(ISD::STORE, MVT::v3f64, MVT::v6i32);

	setOperationAction(ISD::STORE, MVT::v4i64, Promote);
	AddPromotedToType(ISD::STORE, MVT::v4i64, MVT::v8i32);

	setOperationAction(ISD::STORE, MVT::v4f64, Promote);
	AddPromotedToType(ISD::STORE, MVT::v4f64, MVT::v8i32);

	setOperationAction(ISD::STORE, MVT::v8i64, Promote);
	AddPromotedToType(ISD::STORE, MVT::v8i64, MVT::v16i32);

	setOperationAction(ISD::STORE, MVT::v8f64, Promote);
	AddPromotedToType(ISD::STORE, MVT::v8f64, MVT::v16i32);

	setOperationAction(ISD::STORE, MVT::v16i64, Promote);
	AddPromotedToType(ISD::STORE, MVT::v16i64, MVT::v32i32);

	setOperationAction(ISD::STORE, MVT::v16f64, Promote);
	AddPromotedToType(ISD::STORE, MVT::v16f64, MVT::v32i32);

	setOperationAction(ISD::STORE, MVT::i128, Promote);
	AddPromotedToType(ISD::STORE, MVT::i128, MVT::v4i32);

	setTruncStoreAction(MVT::i64, MVT::i1, Expand);
	setTruncStoreAction(MVT::i64, MVT::i8, Expand);
	setTruncStoreAction(MVT::i64, MVT::i16, Expand);
	setTruncStoreAction(MVT::i64, MVT::i32, Expand);

	setTruncStoreAction(MVT::v2i64, MVT::v2i1, Expand);
	setTruncStoreAction(MVT::v2i64, MVT::v2i8, Expand);
	setTruncStoreAction(MVT::v2i64, MVT::v2i16, Expand);
	setTruncStoreAction(MVT::v2i64, MVT::v2i32, Expand);

	setTruncStoreAction(MVT::f32, MVT::bf16, Expand);
	setTruncStoreAction(MVT::f32, MVT::f16, Expand);
	setTruncStoreAction(MVT::v2f32, MVT::v2bf16, Expand);
	setTruncStoreAction(MVT::v2f32, MVT::v2f16, Expand);
	setTruncStoreAction(MVT::v3f32, MVT::v3bf16, Expand);
	setTruncStoreAction(MVT::v3f32, MVT::v3f16, Expand);
	setTruncStoreAction(MVT::v4f32, MVT::v4bf16, Expand);
	setTruncStoreAction(MVT::v4f32, MVT::v4f16, Expand);
	setTruncStoreAction(MVT::v8f32, MVT::v8bf16, Expand);
	setTruncStoreAction(MVT::v8f32, MVT::v8f16, Expand);
	setTruncStoreAction(MVT::v16f32, MVT::v16bf16, Expand);
	setTruncStoreAction(MVT::v16f32, MVT::v16f16, Expand);
	setTruncStoreAction(MVT::v32f32, MVT::v32bf16, Expand);
	setTruncStoreAction(MVT::v32f32, MVT::v32f16, Expand);

	setTruncStoreAction(MVT::f64, MVT::bf16, Expand);
	setTruncStoreAction(MVT::f64, MVT::f16, Expand);
	setTruncStoreAction(MVT::f64, MVT::f32, Expand);

	setTruncStoreAction(MVT::v2f64, MVT::v2f32, Expand);
	setTruncStoreAction(MVT::v2f64, MVT::v2bf16, Expand);
	setTruncStoreAction(MVT::v2f64, MVT::v2f16, Expand);

	setTruncStoreAction(MVT::v3i32, MVT::v3i8, Expand);

	setTruncStoreAction(MVT::v3i64, MVT::v3i32, Expand);
	setTruncStoreAction(MVT::v3i64, MVT::v3i16, Expand);
	setTruncStoreAction(MVT::v3i64, MVT::v3i8, Expand);
	setTruncStoreAction(MVT::v3i64, MVT::v3i1, Expand);
	setTruncStoreAction(MVT::v3f64, MVT::v3f32, Expand);
	setTruncStoreAction(MVT::v3f64, MVT::v3bf16, Expand);
	setTruncStoreAction(MVT::v3f64, MVT::v3f16, Expand);

	setTruncStoreAction(MVT::v4i64, MVT::v4i32, Expand);
	setTruncStoreAction(MVT::v4i64, MVT::v4i16, Expand);
	setTruncStoreAction(MVT::v4f64, MVT::v4f32, Expand);
	setTruncStoreAction(MVT::v4f64, MVT::v4bf16, Expand);
	setTruncStoreAction(MVT::v4f64, MVT::v4f16, Expand);

	setTruncStoreAction(MVT::v8f64, MVT::v8f32, Expand);
	setTruncStoreAction(MVT::v8f64, MVT::v8bf16, Expand);
	setTruncStoreAction(MVT::v8f64, MVT::v8f16, Expand);

	setTruncStoreAction(MVT::v16f64, MVT::v16f32, Expand);
	setTruncStoreAction(MVT::v16f64, MVT::v16bf16, Expand);
	setTruncStoreAction(MVT::v16f64, MVT::v16f16, Expand);
	setTruncStoreAction(MVT::v16i64, MVT::v16i16, Expand);
	setTruncStoreAction(MVT::v16i64, MVT::v16i16, Expand);
	setTruncStoreAction(MVT::v16i64, MVT::v16i8, Expand);
	setTruncStoreAction(MVT::v16i64, MVT::v16i8, Expand);
	setTruncStoreAction(MVT::v16i64, MVT::v16i1, Expand);

	setOperationAction(ISD::Constant, {MVT::i32, MVT::i64}, Legal);
	setOperationAction(ISD::ConstantFP, {MVT::f32, MVT::f64}, Legal);

	setOperationAction({ISD::BR_JT, ISD::BRIND}, MVT::Other, Expand);

	// For R600, this is totally unsupported, just custom lower to produce an
	// error.
	setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Custom);

	// Library functions. These default to Expand, but we have instructions
	// for them.
	setOperationAction({ISD::FCEIL, ISD::FPOW, ISD::FABS, ISD::FFLOOR,
	ISD::FROUNDEVEN, ISD::FTRUNC, ISD::FMINNUM, ISD::FMAXNUM},
	MVT::f32, Legal);

	setOperationAction(ISD::FLOG2, MVT::f32, Custom);
	setOperationAction(ISD::FROUND, {MVT::f32, MVT::f64}, Custom);

	setOperationAction(
	{ISD::FLOG, ISD::FLOG10, ISD::FEXP, ISD::FEXP2, ISD::FEXP10}, MVT::f32,
	Custom);

	setOperationAction(ISD::FNEARBYINT, {MVT::f16, MVT::f32, MVT::f64}, Custom);

	setOperationAction(ISD::FRINT, {MVT::f16, MVT::f32, MVT::f64}, Custom);

	setOperationAction(ISD::FREM, {MVT::f16, MVT::f32, MVT::f64}, Custom);

	if (Subtarget->has16BitInsts())
	setOperationAction(ISD::IS_FPCLASS, {MVT::f16, MVT::f32, MVT::f64}, Legal);
	else {
	setOperationAction(ISD::IS_FPCLASS, {MVT::f32, MVT::f64}, Legal);
	setOperationAction({ISD::FLOG2, ISD::FEXP2}, MVT::f16, Custom);
	}

	setOperationAction({ISD::FLOG10, ISD::FLOG, ISD::FEXP, ISD::FEXP10}, MVT::f16,
	Custom);

	// FIXME: These IS_FPCLASS vector fp types are marked custom so it reaches
	// scalarization code. Can be removed when IS_FPCLASS expand isn't called by
	// default unless marked custom/legal.
	setOperationAction(
	ISD::IS_FPCLASS,
	{MVT::v2f16, MVT::v3f16, MVT::v4f16, MVT::v16f16, MVT::v2f32, MVT::v3f32,
	MVT::v4f32, MVT::v5f32, MVT::v6f32, MVT::v7f32, MVT::v8f32, MVT::v16f32,
	MVT::v2f64, MVT::v3f64, MVT::v4f64, MVT::v8f64, MVT::v16f64},
	Custom);

	// Expand to fneg + fadd.
	setOperationAction(ISD::FSUB, MVT::f64, Expand);

	setOperationAction(ISD::CONCAT_VECTORS,
	{MVT::v3i32, MVT::v3f32, MVT::v4i32, MVT::v4f32,
	MVT::v5i32, MVT::v5f32, MVT::v6i32, MVT::v6f32,
	MVT::v7i32, MVT::v7f32, MVT::v8i32, MVT::v8f32,
	MVT::v9i32, MVT::v9f32, MVT::v10i32, MVT::v10f32,
	MVT::v11i32, MVT::v11f32, MVT::v12i32, MVT::v12f32},
	Custom);

	// FIXME: Why is v8f16/v8bf16 missing?
	setOperationAction(
	ISD::EXTRACT_SUBVECTOR,
	{MVT::v2f16, MVT::v2bf16, MVT::v2i16, MVT::v4f16, MVT::v4bf16,
	MVT::v4i16, MVT::v2f32, MVT::v2i32, MVT::v3f32, MVT::v3i32,
	MVT::v4f32, MVT::v4i32, MVT::v5f32, MVT::v5i32, MVT::v6f32,
	MVT::v6i32, MVT::v7f32, MVT::v7i32, MVT::v8f32, MVT::v8i32,
	MVT::v9f32, MVT::v9i32, MVT::v10i32, MVT::v10f32, MVT::v11i32,
	MVT::v11f32, MVT::v12i32, MVT::v12f32, MVT::v16f16, MVT::v16bf16,
	MVT::v16i16, MVT::v16f32, MVT::v16i32, MVT::v32f32, MVT::v32i32,
	MVT::v2f64, MVT::v2i64, MVT::v3f64, MVT::v3i64, MVT::v4f64,
	MVT::v4i64, MVT::v8f64, MVT::v8i64, MVT::v16f64, MVT::v16i64,
	MVT::v32i16, MVT::v32f16, MVT::v32bf16},
	Custom);

	setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand);
	setOperationAction(ISD::FP_TO_FP16, {MVT::f64, MVT::f32}, Custom);

	const MVT ScalarIntVTs[] = { MVT::i32, MVT::i64 };
	for (MVT VT : ScalarIntVTs) {
	// These should use [SU]DIVREM, so set them to expand
	setOperationAction({ISD::SDIV, ISD::UDIV, ISD::SREM, ISD::UREM}, VT,
	Expand);

	// GPU does not have divrem function for signed or unsigned.
	setOperationAction({ISD::SDIVREM, ISD::UDIVREM}, VT, Custom);

	// GPU does not have [S\|U]MUL_LOHI functions as a single instruction.
	setOperationAction({ISD::SMUL_LOHI, ISD::UMUL_LOHI}, VT, Expand);

	setOperationAction({ISD::BSWAP, ISD::CTTZ, ISD::CTLZ}, VT, Expand);

	// AMDGPU uses ADDC/SUBC/ADDE/SUBE
	setOperationAction({ISD::ADDC, ISD::SUBC, ISD::ADDE, ISD::SUBE}, VT, Legal);
	}

	// The hardware supports 32-bit FSHR, but not FSHL.
	setOperationAction(ISD::FSHR, MVT::i32, Legal);

	// The hardware supports 32-bit ROTR, but not ROTL.
	setOperationAction(ISD::ROTL, {MVT::i32, MVT::i64}, Expand);
	setOperationAction(ISD::ROTR, MVT::i64, Expand);

	setOperationAction({ISD::MULHU, ISD::MULHS}, MVT::i16, Expand);

	setOperationAction({ISD::MUL, ISD::MULHU, ISD::MULHS}, MVT::i64, Expand);
	setOperationAction(
	{ISD::UINT_TO_FP, ISD::SINT_TO_FP, ISD::FP_TO_SINT, ISD::FP_TO_UINT},
	MVT::i64, Custom);
	setOperationAction(ISD::SELECT_CC, MVT::i64, Expand);

	setOperationAction({ISD::SMIN, ISD::UMIN, ISD::SMAX, ISD::UMAX}, MVT::i32,
	Legal);

	setOperationAction(
	{ISD::CTTZ, ISD::CTTZ_ZERO_UNDEF, ISD::CTLZ, ISD::CTLZ_ZERO_UNDEF},
	MVT::i64, Custom);

	for (auto VT : {MVT::i8, MVT::i16})
	setOperationAction({ISD::CTLZ, ISD::CTLZ_ZERO_UNDEF}, VT, Custom);

	static const MVT::SimpleValueType VectorIntTypes[] = {
	MVT::v2i32, MVT::v3i32, MVT::v4i32, MVT::v5i32, MVT::v6i32, MVT::v7i32,
	MVT::v9i32, MVT::v10i32, MVT::v11i32, MVT::v12i32};

	for (MVT VT : VectorIntTypes) {
	// Expand the following operations for the current type by default.
	setOperationAction({ISD::ADD, ISD::AND, ISD::FP_TO_SINT,
	ISD::FP_TO_UINT, ISD::MUL, ISD::MULHU,
	ISD::MULHS, ISD::OR, ISD::SHL,
	ISD::SRA, ISD::SRL, ISD::ROTL,
	ISD::ROTR, ISD::SUB, ISD::SINT_TO_FP,
	ISD::UINT_TO_FP, ISD::SDIV, ISD::UDIV,
	ISD::SREM, ISD::UREM, ISD::SMUL_LOHI,
	ISD::UMUL_LOHI, ISD::SDIVREM, ISD::UDIVREM,
	ISD::SELECT, ISD::VSELECT, ISD::SELECT_CC,
	ISD::XOR, ISD::BSWAP, ISD::CTPOP,
	ISD::CTTZ, ISD::CTLZ, ISD::VECTOR_SHUFFLE,
	ISD::SETCC},
	VT, Expand);
	}

	static const MVT::SimpleValueType FloatVectorTypes[] = {
	MVT::v2f32, MVT::v3f32, MVT::v4f32, MVT::v5f32, MVT::v6f32, MVT::v7f32,
	MVT::v9f32, MVT::v10f32, MVT::v11f32, MVT::v12f32};

	for (MVT VT : FloatVectorTypes) {
	setOperationAction(
	{ISD::FABS, ISD::FMINNUM, ISD::FMAXNUM,
	ISD::FADD, ISD::FCEIL, ISD::FCOS,
	ISD::FDIV, ISD::FEXP2, ISD::FEXP,
	ISD::FEXP10, ISD::FLOG2, ISD::FREM,
	ISD::FLOG, ISD::FLOG10, ISD::FPOW,
	ISD::FFLOOR, ISD::FTRUNC, ISD::FMUL,
	ISD::FMA, ISD::FRINT, ISD::FNEARBYINT,
	ISD::FSQRT, ISD::FSIN, ISD::FSUB,
	ISD::FNEG, ISD::VSELECT, ISD::SELECT_CC,
	ISD::FCOPYSIGN, ISD::VECTOR_SHUFFLE, ISD::SETCC,
	ISD::FCANONICALIZE, ISD::FROUNDEVEN},
	VT, Expand);
	}

	// This causes using an unrolled select operation rather than expansion with
	// bit operations. This is in general better, but the alternative using BFI
	// instructions may be better if the select sources are SGPRs.
	setOperationAction(ISD::SELECT, MVT::v2f32, Promote);
	AddPromotedToType(ISD::SELECT, MVT::v2f32, MVT::v2i32);

	setOperationAction(ISD::SELECT, MVT::v3f32, Promote);
	AddPromotedToType(ISD::SELECT, MVT::v3f32, MVT::v3i32);

	setOperationAction(ISD::SELECT, MVT::v4f32, Promote);
	AddPromotedToType(ISD::SELECT, MVT::v4f32, MVT::v4i32);

	setOperationAction(ISD::SELECT, MVT::v5f32, Promote);
	AddPromotedToType(ISD::SELECT, MVT::v5f32, MVT::v5i32);

	setOperationAction(ISD::SELECT, MVT::v6f32, Promote);
	AddPromotedToType(ISD::SELECT, MVT::v6f32, MVT::v6i32);

	setOperationAction(ISD::SELECT, MVT::v7f32, Promote);
	AddPromotedToType(ISD::SELECT, MVT::v7f32, MVT::v7i32);

	setOperationAction(ISD::SELECT, MVT::v9f32, Promote);
	AddPromotedToType(ISD::SELECT, MVT::v9f32, MVT::v9i32);

	setOperationAction(ISD::SELECT, MVT::v10f32, Promote);
	AddPromotedToType(ISD::SELECT, MVT::v10f32, MVT::v10i32);

	setOperationAction(ISD::SELECT, MVT::v11f32, Promote);
	AddPromotedToType(ISD::SELECT, MVT::v11f32, MVT::v11i32);

	setOperationAction(ISD::SELECT, MVT::v12f32, Promote);
	AddPromotedToType(ISD::SELECT, MVT::v12f32, MVT::v12i32);

	setSchedulingPreference(Sched::RegPressure);
	setJumpIsExpensive(true);

	// FIXME: This is only partially true. If we have to do vector compares, any
	// SGPR pair can be a condition register. If we have a uniform condition, we
	// are better off doing SALU operations, where there is only one SCC. For now,
	// we don't have a way of knowing during instruction selection if a condition
	// will be uniform and we always use vector compares. Assume we are using
	// vector compares until that is fixed.
	setHasMultipleConditionRegisters(true);

	setMinCmpXchgSizeInBits(32);
	setSupportsUnalignedAtomics(false);

	PredictableSelectIsExpensive = false;

	// We want to find all load dependencies for long chains of stores to enable
	// merging into very wide vectors. The problem is with vectors with > 4
	// elements. MergeConsecutiveStores will attempt to merge these because x8/x16
	// vectors are a legal type, even though we have to split the loads
	// usually. When we can more precisely specify load legality per address
	// space, we should be able to make FindBetterChain/MergeConsecutiveStores
	// smarter so that they can figure out what to do in 2 iterations without all
	// N > 4 stores on the same chain.
	GatherAllAliasesMaxDepth = 16;

	// memcpy/memmove/memset are expanded in the IR, so we shouldn't need to worry
	// about these during lowering.
	MaxStoresPerMemcpy = 0xffffffff;
	MaxStoresPerMemmove = 0xffffffff;
	MaxStoresPerMemset = 0xffffffff;

	// The expansion for 64-bit division is enormous.
	if (AMDGPUBypassSlowDiv)
	addBypassSlowDiv(64, 32);

	setTargetDAGCombine({ISD::BITCAST, ISD::SHL,
	ISD::SRA, ISD::SRL,
	ISD::TRUNCATE, ISD::MUL,
	ISD::SMUL_LOHI, ISD::UMUL_LOHI,
	ISD::MULHU, ISD::MULHS,
	ISD::SELECT, ISD::SELECT_CC,
	ISD::STORE, ISD::FADD,
	ISD::FSUB, ISD::FNEG,
	ISD::FABS, ISD::AssertZext,
	ISD::AssertSext, ISD::INTRINSIC_WO_CHAIN});

	setMaxAtomicSizeInBitsSupported(64);
	setMaxDivRemBitWidthSupported(64);
	setMaxLargeFPConvertBitWidthSupported(64);
	}

	bool AMDGPUTargetLowering::mayIgnoreSignedZero(SDValue Op) const {
	if (getTargetMachine().Options.NoSignedZerosFPMath)
	return true;

	const auto Flags = Op.getNode()->getFlags();
	if (Flags.hasNoSignedZeros())
	return true;

	return false;
	}

	//===----------------------------------------------------------------------===//
	// Target Information
	//===----------------------------------------------------------------------===//

	LLVM_READNONE
	static bool fnegFoldsIntoOpcode(unsigned Opc) {
	switch (Opc) {
	case ISD::FADD:
	case ISD::FSUB:
	case ISD::FMUL:
	case ISD::FMA:
	case ISD::FMAD:
	case ISD::FMINNUM:
	case ISD::FMAXNUM:
	case ISD::FMINNUM_IEEE:
	case ISD::FMAXNUM_IEEE:
	case ISD::FMINIMUM:
	case ISD::FMAXIMUM:
	case ISD::SELECT:
	case ISD::FSIN:
	case ISD::FTRUNC:
	case ISD::FRINT:
	case ISD::FNEARBYINT:
	case ISD::FROUNDEVEN:
	case ISD::FCANONICALIZE:
	case AMDGPUISD::RCP:
	case AMDGPUISD::RCP_LEGACY:
	case AMDGPUISD::RCP_IFLAG:
	case AMDGPUISD::SIN_HW:
	case AMDGPUISD::FMUL_LEGACY:
	case AMDGPUISD::FMIN_LEGACY:
	case AMDGPUISD::FMAX_LEGACY:
	case AMDGPUISD::FMED3:
	// TODO: handle llvm.amdgcn.fma.legacy
	return true;
	case ISD::BITCAST:
	llvm_unreachable("bitcast is special cased");
	default:
	return false;
	}
	}

	static bool fnegFoldsIntoOp(const SDNode *N) {
	unsigned Opc = N->getOpcode();
	if (Opc == ISD::BITCAST) {
	// TODO: Is there a benefit to checking the conditions performFNegCombine
	// does? We don't for the other cases.
	SDValue BCSrc = N->getOperand(0);
	if (BCSrc.getOpcode() == ISD::BUILD_VECTOR) {
	return BCSrc.getNumOperands() == 2 &&
	BCSrc.getOperand(1).getValueSizeInBits() == 32;
	}

	return BCSrc.getOpcode() == ISD::SELECT && BCSrc.getValueType() == MVT::f32;
	}

	return fnegFoldsIntoOpcode(Opc);
	}

	/// \p returns true if the operation will definitely need to use a 64-bit
	/// encoding, and thus will use a VOP3 encoding regardless of the source
	/// modifiers.
	LLVM_READONLY
	static bool opMustUseVOP3Encoding(const SDNode *N, MVT VT) {
	return (N->getNumOperands() > 2 && N->getOpcode() != ISD::SELECT) \|\|
	VT == MVT::f64;
	}

	/// Return true if v_cndmask_b32 will support fabs/fneg source modifiers for the
	/// type for ISD::SELECT.
	LLVM_READONLY
	static bool selectSupportsSourceMods(const SDNode *N) {
	// TODO: Only applies if select will be vector
	return N->getValueType(0) == MVT::f32;
	}

	// Most FP instructions support source modifiers, but this could be refined
	// slightly.
	LLVM_READONLY
	static bool hasSourceMods(const SDNode *N) {
	if (isa<MemSDNode>(N))
	return false;

	switch (N->getOpcode()) {
	case ISD::CopyToReg:
	case ISD::FDIV:
	case ISD::FREM:
	case ISD::INLINEASM:
	case ISD::INLINEASM_BR:
	case AMDGPUISD::DIV_SCALE:
	case ISD::INTRINSIC_W_CHAIN:

	// TODO: Should really be looking at the users of the bitcast. These are
	// problematic because bitcasts are used to legalize all stores to integer
	// types.
	case ISD::BITCAST:
	return false;
	case ISD::INTRINSIC_WO_CHAIN: {
	switch (N->getConstantOperandVal(0)) {
	case Intrinsic::amdgcn_interp_p1:
	case Intrinsic::amdgcn_interp_p2:
	case Intrinsic::amdgcn_interp_mov:
	case Intrinsic::amdgcn_interp_p1_f16:
	case Intrinsic::amdgcn_interp_p2_f16:
	return false;
	default:
	return true;
	}
	}
	case ISD::SELECT:
	return selectSupportsSourceMods(N);
	default:
	return true;
	}
	}

	bool AMDGPUTargetLowering::allUsesHaveSourceMods(const SDNode *N,
	unsigned CostThreshold) {
	// Some users (such as 3-operand FMA/MAD) must use a VOP3 encoding, and thus
	// it is truly free to use a source modifier in all cases. If there are
	// multiple users but for each one will necessitate using VOP3, there will be
	// a code size increase. Try to avoid increasing code size unless we know it
	// will save on the instruction count.
	unsigned NumMayIncreaseSize = 0;
	MVT VT = N->getValueType(0).getScalarType().getSimpleVT();

	assert(!N->use_empty());

	// XXX - Should this limit number of uses to check?
	for (const SDNode *U : N->uses()) {
	if (!hasSourceMods(U))
	return false;

	if (!opMustUseVOP3Encoding(U, VT)) {
	if (++NumMayIncreaseSize > CostThreshold)
	return false;
	}
	}

	return true;
	}

	EVT AMDGPUTargetLowering::getTypeForExtReturn(LLVMContext &Context, EVT VT,
	ISD::NodeType ExtendKind) const {
	assert(!VT.isVector() && "only scalar expected");

	// Round to the next multiple of 32-bits.
	unsigned Size = VT.getSizeInBits();
	if (Size <= 32)
	return MVT::i32;
	return EVT::getIntegerVT(Context, 32 * ((Size + 31) / 32));
	}

	MVT AMDGPUTargetLowering::getVectorIdxTy(const DataLayout &) const {
	return MVT::i32;
	}

	bool AMDGPUTargetLowering::isSelectSupported(SelectSupportKind SelType) const {
	return true;
	}

	// The backend supports 32 and 64 bit floating point immediates.
	// FIXME: Why are we reporting vectors of FP immediates as legal?
	bool AMDGPUTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
	bool ForCodeSize) const {
	EVT ScalarVT = VT.getScalarType();
	return (ScalarVT == MVT::f32 \|\| ScalarVT == MVT::f64 \|\|
	(ScalarVT == MVT::f16 && Subtarget->has16BitInsts()));
	}

	// We don't want to shrink f64 / f32 constants.
	bool AMDGPUTargetLowering::ShouldShrinkFPConstant(EVT VT) const {
	EVT ScalarVT = VT.getScalarType();
	return (ScalarVT != MVT::f32 && ScalarVT != MVT::f64);
	}

	bool AMDGPUTargetLowering::shouldReduceLoadWidth(SDNode *N,
	ISD::LoadExtType ExtTy,
	EVT NewVT) const {
	// TODO: This may be worth removing. Check regression tests for diffs.
	if (!TargetLoweringBase::shouldReduceLoadWidth(N, ExtTy, NewVT))
	return false;

	unsigned NewSize = NewVT.getStoreSizeInBits();

	// If we are reducing to a 32-bit load or a smaller multi-dword load,
	// this is always better.
	if (NewSize >= 32)
	return true;

	EVT OldVT = N->getValueType(0);
	unsigned OldSize = OldVT.getStoreSizeInBits();

	MemSDNode *MN = cast<MemSDNode>(N);
	unsigned AS = MN->getAddressSpace();
	// Do not shrink an aligned scalar load to sub-dword.
	// Scalar engine cannot do sub-dword loads.
	// TODO: Update this for GFX12 which does have scalar sub-dword loads.
	if (OldSize >= 32 && NewSize < 32 && MN->getAlign() >= Align(4) &&
	(AS == AMDGPUAS::CONSTANT_ADDRESS \|\|
	AS == AMDGPUAS::CONSTANT_ADDRESS_32BIT \|\|
	(isa<LoadSDNode>(N) && AS == AMDGPUAS::GLOBAL_ADDRESS &&
	MN->isInvariant())) &&
	AMDGPUInstrInfo::isUniformMMO(MN->getMemOperand()))
	return false;

	// Don't produce extloads from sub 32-bit types. SI doesn't have scalar
	// extloads, so doing one requires using a buffer_load. In cases where we
	// still couldn't use a scalar load, using the wider load shouldn't really
	// hurt anything.

	// If the old size already had to be an extload, there's no harm in continuing
	// to reduce the width.
	return (OldSize < 32);
	}

	bool AMDGPUTargetLowering::isLoadBitCastBeneficial(EVT LoadTy, EVT CastTy,
	const SelectionDAG &DAG,
	const MachineMemOperand &MMO) const {

	assert(LoadTy.getSizeInBits() == CastTy.getSizeInBits());

	if (LoadTy.getScalarType() == MVT::i32)
	return false;

	unsigned LScalarSize = LoadTy.getScalarSizeInBits();
	unsigned CastScalarSize = CastTy.getScalarSizeInBits();

	if ((LScalarSize >= CastScalarSize) && (CastScalarSize < 32))
	return false;

	unsigned Fast = 0;
	return allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(),
	CastTy, MMO, &Fast) &&
	Fast;
	}

	// SI+ has instructions for cttz / ctlz for 32-bit values. This is probably also
	// profitable with the expansion for 64-bit since it's generally good to
	// speculate things.
	bool AMDGPUTargetLowering::isCheapToSpeculateCttz(Type *Ty) const {
	return true;
	}

	bool AMDGPUTargetLowering::isCheapToSpeculateCtlz(Type *Ty) const {
	return true;
	}

	bool AMDGPUTargetLowering::isSDNodeAlwaysUniform(const SDNode *N) const {
	switch (N->getOpcode()) {
	case ISD::EntryToken:
	case ISD::TokenFactor:
	return true;
	case ISD::INTRINSIC_WO_CHAIN: {
	unsigned IntrID = N->getConstantOperandVal(0);
	return AMDGPU::isIntrinsicAlwaysUniform(IntrID);
	}
	case ISD::LOAD:
	if (cast<LoadSDNode>(N)->getMemOperand()->getAddrSpace() ==
	AMDGPUAS::CONSTANT_ADDRESS_32BIT)
	return true;
	return false;
	case AMDGPUISD::SETCC: // ballot-style instruction
	return true;
	}
	return false;
	}

	SDValue AMDGPUTargetLowering::getNegatedExpression(
	SDValue Op, SelectionDAG &DAG, bool LegalOperations, bool ForCodeSize,
	NegatibleCost &Cost, unsigned Depth) const {

	switch (Op.getOpcode()) {
	case ISD::FMA:
	case ISD::FMAD: {
	// Negating a fma is not free if it has users without source mods.
	if (!allUsesHaveSourceMods(Op.getNode()))
	return SDValue();
	break;
	}
	case AMDGPUISD::RCP: {
	SDValue Src = Op.getOperand(0);
	EVT VT = Op.getValueType();
	SDLoc SL(Op);

	SDValue NegSrc = getNegatedExpression(Src, DAG, LegalOperations,
	ForCodeSize, Cost, Depth + 1);
	if (NegSrc)
	return DAG.getNode(AMDGPUISD::RCP, SL, VT, NegSrc, Op->getFlags());
	return SDValue();
	}
	default:
	break;
	}

	return TargetLowering::getNegatedExpression(Op, DAG, LegalOperations,
	ForCodeSize, Cost, Depth);
	}

	//===---------------------------------------------------------------------===//
	// Target Properties
	//===---------------------------------------------------------------------===//

	bool AMDGPUTargetLowering::isFAbsFree(EVT VT) const {
	assert(VT.isFloatingPoint());

	// Packed operations do not have a fabs modifier.
	return VT == MVT::f32 \|\| VT == MVT::f64 \|\|
	(Subtarget->has16BitInsts() && (VT == MVT::f16 \|\| VT == MVT::bf16));
	}

	bool AMDGPUTargetLowering::isFNegFree(EVT VT) const {
	assert(VT.isFloatingPoint());
	// Report this based on the end legalized type.
	VT = VT.getScalarType();
	return VT == MVT::f32 \|\| VT == MVT::f64 \|\| VT == MVT::f16 \|\| VT == MVT::bf16;
	}

	bool AMDGPUTargetLowering:: storeOfVectorConstantIsCheap(bool IsZero, EVT MemVT,
	unsigned NumElem,
	unsigned AS) const {
	return true;
	}

	bool AMDGPUTargetLowering::aggressivelyPreferBuildVectorSources(EVT VecVT) const {
	// There are few operations which truly have vector input operands. Any vector
	// operation is going to involve operations on each component, and a
	// build_vector will be a copy per element, so it always makes sense to use a
	// build_vector input in place of the extracted element to avoid a copy into a
	// super register.
	//
	// We should probably only do this if all users are extracts only, but this
	// should be the common case.
	return true;
	}

	bool AMDGPUTargetLowering::isTruncateFree(EVT Source, EVT Dest) const {
	// Truncate is just accessing a subregister.

	unsigned SrcSize = Source.getSizeInBits();
	unsigned DestSize = Dest.getSizeInBits();

	return DestSize < SrcSize && DestSize % 32 == 0 ;
	}

	bool AMDGPUTargetLowering::isTruncateFree(Type Source, Type Dest) const {
	// Truncate is just accessing a subregister.

	unsigned SrcSize = Source->getScalarSizeInBits();
	unsigned DestSize = Dest->getScalarSizeInBits();

	if (DestSize== 16 && Subtarget->has16BitInsts())
	return SrcSize >= 32;

	return DestSize < SrcSize && DestSize % 32 == 0;
	}

	bool AMDGPUTargetLowering::isZExtFree(Type Src, Type Dest) const {
	unsigned SrcSize = Src->getScalarSizeInBits();
	unsigned DestSize = Dest->getScalarSizeInBits();

	if (SrcSize == 16 && Subtarget->has16BitInsts())
	return DestSize >= 32;

	return SrcSize == 32 && DestSize == 64;
	}

	bool AMDGPUTargetLowering::isZExtFree(EVT Src, EVT Dest) const {
	// Any register load of a 64-bit value really requires 2 32-bit moves. For all
	// practical purposes, the extra mov 0 to load a 64-bit is free. As used,
	// this will enable reducing 64-bit operations the 32-bit, which is always
	// good.

	if (Src == MVT::i16)
	return Dest == MVT::i32 \|\|Dest == MVT::i64 ;

	return Src == MVT::i32 && Dest == MVT::i64;
	}

	bool AMDGPUTargetLowering::isNarrowingProfitable(EVT SrcVT, EVT DestVT) const {
	// There aren't really 64-bit registers, but pairs of 32-bit ones and only a
	// limited number of native 64-bit operations. Shrinking an operation to fit
	// in a single 32-bit register should always be helpful. As currently used,
	// this is much less general than the name suggests, and is only used in
	// places trying to reduce the sizes of loads. Shrinking loads to < 32-bits is
	// not profitable, and may actually be harmful.
	return SrcVT.getSizeInBits() > 32 && DestVT.getSizeInBits() == 32;
	}

	bool AMDGPUTargetLowering::isDesirableToCommuteWithShift(
	const SDNode* N, CombineLevel Level) const {
	assert((N->getOpcode() == ISD::SHL \|\| N->getOpcode() == ISD::SRA \|\|
	N->getOpcode() == ISD::SRL) &&
	"Expected shift op");
	// Always commute pre-type legalization and right shifts.
	// We're looking for shl(or(x,y),z) patterns.
	if (Level < CombineLevel::AfterLegalizeTypes \|\|
	N->getOpcode() != ISD::SHL \|\| N->getOperand(0).getOpcode() != ISD::OR)
	return true;

	// If only user is a i32 right-shift, then don't destroy a BFE pattern.
	if (N->getValueType(0) == MVT::i32 && N->use_size() == 1 &&
	(N->use_begin()->getOpcode() == ISD::SRA \|\|
	N->use_begin()->getOpcode() == ISD::SRL))
	return false;

	// Don't destroy or(shl(load_zext(),c), load_zext()) patterns.
	auto IsShiftAndLoad = [](SDValue LHS, SDValue RHS) {
	if (LHS.getOpcode() != ISD::SHL)
	return false;
	auto *RHSLd = dyn_cast<LoadSDNode>(RHS);
	auto *LHS0 = dyn_cast<LoadSDNode>(LHS.getOperand(0));
	auto *LHS1 = dyn_cast<ConstantSDNode>(LHS.getOperand(1));
	return LHS0 && LHS1 && RHSLd && LHS0->getExtensionType() == ISD::ZEXTLOAD &&
	LHS1->getAPIntValue() == LHS0->getMemoryVT().getScalarSizeInBits() &&
	RHSLd->getExtensionType() == ISD::ZEXTLOAD;
	};
	SDValue LHS = N->getOperand(0).getOperand(0);
	SDValue RHS = N->getOperand(0).getOperand(1);
	return !(IsShiftAndLoad(LHS, RHS) \|\| IsShiftAndLoad(RHS, LHS));
	}

	//===---------------------------------------------------------------------===//
	// TargetLowering Callbacks
	//===---------------------------------------------------------------------===//

	CCAssignFn *AMDGPUCallLowering::CCAssignFnForCall(CallingConv::ID CC,
	bool IsVarArg) {
	switch (CC) {
	case CallingConv::AMDGPU_VS:
	case CallingConv::AMDGPU_GS:
	case CallingConv::AMDGPU_PS:
	case CallingConv::AMDGPU_CS:
	case CallingConv::AMDGPU_HS:
	case CallingConv::AMDGPU_ES:
	case CallingConv::AMDGPU_LS:
	return CC_AMDGPU;
	case CallingConv::AMDGPU_CS_Chain:
	case CallingConv::AMDGPU_CS_ChainPreserve:
	return CC_AMDGPU_CS_CHAIN;
	case CallingConv::C:
	case CallingConv::Fast:
	case CallingConv::Cold:
	return CC_AMDGPU_Func;
	case CallingConv::AMDGPU_Gfx:
	return CC_SI_Gfx;
	case CallingConv::AMDGPU_KERNEL:
	case CallingConv::SPIR_KERNEL:
	default:
	report_fatal_error("Unsupported calling convention for call");
	}
	}

	CCAssignFn *AMDGPUCallLowering::CCAssignFnForReturn(CallingConv::ID CC,
	bool IsVarArg) {
	switch (CC) {
	case CallingConv::AMDGPU_KERNEL:
	case CallingConv::SPIR_KERNEL:
	llvm_unreachable("kernels should not be handled here");
	case CallingConv::AMDGPU_VS:
	case CallingConv::AMDGPU_GS:
	case CallingConv::AMDGPU_PS:
	case CallingConv::AMDGPU_CS:
	case CallingConv::AMDGPU_CS_Chain:
	case CallingConv::AMDGPU_CS_ChainPreserve:
	case CallingConv::AMDGPU_HS:
	case CallingConv::AMDGPU_ES:
	case CallingConv::AMDGPU_LS:
	return RetCC_SI_Shader;
	case CallingConv::AMDGPU_Gfx:
	return RetCC_SI_Gfx;
	case CallingConv::C:
	case CallingConv::Fast:
	case CallingConv::Cold:
	return RetCC_AMDGPU_Func;
	default:
	report_fatal_error("Unsupported calling convention.");
	}
	}

	/// The SelectionDAGBuilder will automatically promote function arguments
	/// with illegal types. However, this does not work for the AMDGPU targets
	/// since the function arguments are stored in memory as these illegal types.
	/// In order to handle this properly we need to get the original types sizes
	/// from the LLVM IR Function and fixup the ISD:InputArg values before
	/// passing them to AnalyzeFormalArguments()

	/// When the SelectionDAGBuilder computes the Ins, it takes care of splitting
	/// input values across multiple registers. Each item in the Ins array
	/// represents a single value that will be stored in registers. Ins[x].VT is
	/// the value type of the value that will be stored in the register, so
	/// whatever SDNode we lower the argument to needs to be this type.
	///
	/// In order to correctly lower the arguments we need to know the size of each
	/// argument. Since Ins[x].VT gives us the size of the register that will
	/// hold the value, we need to look at Ins[x].ArgVT to see the 'real' type
	/// for the original function argument so that we can deduce the correct memory
	/// type to use for Ins[x]. In most cases the correct memory type will be
	/// Ins[x].ArgVT. However, this will not always be the case. If, for example,
	/// we have a kernel argument of type v8i8, this argument will be split into
	/// 8 parts and each part will be represented by its own item in the Ins array.
	/// For each part the Ins[x].ArgVT will be the v8i8, which is the full type of
	/// the argument before it was split. From this, we deduce that the memory type
	/// for each individual part is i8. We pass the memory type as LocVT to the
	/// calling convention analysis function and the register type (Ins[x].VT) as
	/// the ValVT.
	void AMDGPUTargetLowering::analyzeFormalArgumentsCompute(
	CCState &State,
	const SmallVectorImpl<ISD::InputArg> &Ins) const {
	const MachineFunction &MF = State.getMachineFunction();
	const Function &Fn = MF.getFunction();
	LLVMContext &Ctx = Fn.getParent()->getContext();
	const AMDGPUSubtarget &ST = AMDGPUSubtarget::get(MF);
	const unsigned ExplicitOffset = ST.getExplicitKernelArgOffset();
	CallingConv::ID CC = Fn.getCallingConv();

	Align MaxAlign = Align(1);
	uint64_t ExplicitArgOffset = 0;
	const DataLayout &DL = Fn.getDataLayout();

	unsigned InIndex = 0;

	for (const Argument &Arg : Fn.args()) {
	const bool IsByRef = Arg.hasByRefAttr();
	Type *BaseArgTy = Arg.getType();
	Type *MemArgTy = IsByRef ? Arg.getParamByRefType() : BaseArgTy;
	Align Alignment = DL.getValueOrABITypeAlignment(
	IsByRef ? Arg.getParamAlign() : std::nullopt, MemArgTy);
	MaxAlign = std::max(Alignment, MaxAlign);
	uint64_t AllocSize = DL.getTypeAllocSize(MemArgTy);

	uint64_t ArgOffset = alignTo(ExplicitArgOffset, Alignment) + ExplicitOffset;
	ExplicitArgOffset = alignTo(ExplicitArgOffset, Alignment) + AllocSize;

	// We're basically throwing away everything passed into us and starting over
	// to get accurate in-memory offsets. The "PartOffset" is completely useless
	// to us as computed in Ins.
	//
	// We also need to figure out what type legalization is trying to do to get
	// the correct memory offsets.

	SmallVector<EVT, 16> ValueVTs;
	SmallVector<uint64_t, 16> Offsets;
	ComputeValueVTs(*this, DL, BaseArgTy, ValueVTs, &Offsets, ArgOffset);

	for (unsigned Value = 0, NumValues = ValueVTs.size();
	Value != NumValues; ++Value) {
	uint64_t BasePartOffset = Offsets[Value];

	EVT ArgVT = ValueVTs[Value];
	EVT MemVT = ArgVT;
	MVT RegisterVT = getRegisterTypeForCallingConv(Ctx, CC, ArgVT);
	unsigned NumRegs = getNumRegistersForCallingConv(Ctx, CC, ArgVT);

	if (NumRegs == 1) {
	// This argument is not split, so the IR type is the memory type.
	if (ArgVT.isExtended()) {
	// We have an extended type, like i24, so we should just use the
	// register type.
	MemVT = RegisterVT;
	} else {
	MemVT = ArgVT;
	}
	} else if (ArgVT.isVector() && RegisterVT.isVector() &&
	ArgVT.getScalarType() == RegisterVT.getScalarType()) {
	assert(ArgVT.getVectorNumElements() > RegisterVT.getVectorNumElements());
	// We have a vector value which has been split into a vector with
	// the same scalar type, but fewer elements. This should handle
	// all the floating-point vector types.
	MemVT = RegisterVT;
	} else if (ArgVT.isVector() &&
	ArgVT.getVectorNumElements() == NumRegs) {
	// This arg has been split so that each element is stored in a separate
	// register.
	MemVT = ArgVT.getScalarType();
	} else if (ArgVT.isExtended()) {
	// We have an extended type, like i65.
	MemVT = RegisterVT;
	} else {
	unsigned MemoryBits = ArgVT.getStoreSizeInBits() / NumRegs;
	assert(ArgVT.getStoreSizeInBits() % NumRegs == 0);
	if (RegisterVT.isInteger()) {
	MemVT = EVT::getIntegerVT(State.getContext(), MemoryBits);
	} else if (RegisterVT.isVector()) {
	assert(!RegisterVT.getScalarType().isFloatingPoint());
	unsigned NumElements = RegisterVT.getVectorNumElements();
	assert(MemoryBits % NumElements == 0);
	// This vector type has been split into another vector type with
	// a different elements size.
	EVT ScalarVT = EVT::getIntegerVT(State.getContext(),
	MemoryBits / NumElements);
	MemVT = EVT::getVectorVT(State.getContext(), ScalarVT, NumElements);
	} else {
	llvm_unreachable("cannot deduce memory type.");
	}
	}

	// Convert one element vectors to scalar.
	if (MemVT.isVector() && MemVT.getVectorNumElements() == 1)
	MemVT = MemVT.getScalarType();

	// Round up vec3/vec5 argument.
	if (MemVT.isVector() && !MemVT.isPow2VectorType()) {
	assert(MemVT.getVectorNumElements() == 3 \|\|
	MemVT.getVectorNumElements() == 5 \|\|
	(MemVT.getVectorNumElements() >= 9 &&
	MemVT.getVectorNumElements() <= 12));
	MemVT = MemVT.getPow2VectorType(State.getContext());
	} else if (!MemVT.isSimple() && !MemVT.isVector()) {
	MemVT = MemVT.getRoundIntegerType(State.getContext());
	}

	unsigned PartOffset = 0;
	for (unsigned i = 0; i != NumRegs; ++i) {
	State.addLoc(CCValAssign::getCustomMem(InIndex++, RegisterVT,
	BasePartOffset + PartOffset,
	MemVT.getSimpleVT(),
	CCValAssign::Full));
	PartOffset += MemVT.getStoreSize();
	}
	}
	}
	}

	SDValue AMDGPUTargetLowering::LowerReturn(
	SDValue Chain, CallingConv::ID CallConv,
	bool isVarArg,
	const SmallVectorImpl<ISD::OutputArg> &Outs,
	const SmallVectorImpl<SDValue> &OutVals,
	const SDLoc &DL, SelectionDAG &DAG) const {
	// FIXME: Fails for r600 tests
	//assert(!isVarArg && Outs.empty() && OutVals.empty() &&
	// "wave terminate should not have return values");
	return DAG.getNode(AMDGPUISD::ENDPGM, DL, MVT::Other, Chain);
	}

	//===---------------------------------------------------------------------===//
	// Target specific lowering
	//===---------------------------------------------------------------------===//

	/// Selects the correct CCAssignFn for a given CallingConvention value.
	CCAssignFn *AMDGPUTargetLowering::CCAssignFnForCall(CallingConv::ID CC,
	bool IsVarArg) {
	return AMDGPUCallLowering::CCAssignFnForCall(CC, IsVarArg);
	}

	CCAssignFn *AMDGPUTargetLowering::CCAssignFnForReturn(CallingConv::ID CC,
	bool IsVarArg) {
	return AMDGPUCallLowering::CCAssignFnForReturn(CC, IsVarArg);
	}

	SDValue AMDGPUTargetLowering::addTokenForArgument(SDValue Chain,
	SelectionDAG &DAG,
	MachineFrameInfo &MFI,
	int ClobberedFI) const {
	SmallVector<SDValue, 8> ArgChains;
	int64_t FirstByte = MFI.getObjectOffset(ClobberedFI);
	int64_t LastByte = FirstByte + MFI.getObjectSize(ClobberedFI) - 1;

	// Include the original chain at the beginning of the list. When this is
	// used by target LowerCall hooks, this helps legalize find the
	// CALLSEQ_BEGIN node.
	ArgChains.push_back(Chain);

	// Add a chain value for each stack argument corresponding
	for (SDNode *U : DAG.getEntryNode().getNode()->uses()) {
	if (LoadSDNode *L = dyn_cast<LoadSDNode>(U)) {
	if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(L->getBasePtr())) {
	if (FI->getIndex() < 0) {
	int64_t InFirstByte = MFI.getObjectOffset(FI->getIndex());
	int64_t InLastByte = InFirstByte;
	InLastByte += MFI.getObjectSize(FI->getIndex()) - 1;

	if ((InFirstByte <= FirstByte && FirstByte <= InLastByte) \|\|
	(FirstByte <= InFirstByte && InFirstByte <= LastByte))
	ArgChains.push_back(SDValue(L, 1));
	}
	}
	}
	}

	// Build a tokenfactor for all the chains.
	return DAG.getNode(ISD::TokenFactor, SDLoc(Chain), MVT::Other, ArgChains);
	}

	SDValue AMDGPUTargetLowering::lowerUnhandledCall(CallLoweringInfo &CLI,
	SmallVectorImpl<SDValue> &InVals,
	StringRef Reason) const {
	SDValue Callee = CLI.Callee;
	SelectionDAG &DAG = CLI.DAG;

	const Function &Fn = DAG.getMachineFunction().getFunction();

	StringRef FuncName("<unknown>");

	if (const ExternalSymbolSDNode *G = dyn_cast<ExternalSymbolSDNode>(Callee))
	FuncName = G->getSymbol();
	else if (const GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
	FuncName = G->getGlobal()->getName();

	DiagnosticInfoUnsupported NoCalls(
	Fn, Reason + FuncName, CLI.DL.getDebugLoc());
	DAG.getContext()->diagnose(NoCalls);

	if (!CLI.IsTailCall) {
	for (ISD::InputArg &Arg : CLI.Ins)
	InVals.push_back(DAG.getUNDEF(Arg.VT));
	}

	return DAG.getEntryNode();
	}

	SDValue AMDGPUTargetLowering::LowerCall(CallLoweringInfo &CLI,
	SmallVectorImpl<SDValue> &InVals) const {
	return lowerUnhandledCall(CLI, InVals, "unsupported call to function ");
	}

	SDValue AMDGPUTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
	SelectionDAG &DAG) const {
	const Function &Fn = DAG.getMachineFunction().getFunction();

	DiagnosticInfoUnsupported NoDynamicAlloca(Fn, "unsupported dynamic alloca",
	SDLoc(Op).getDebugLoc());
	DAG.getContext()->diagnose(NoDynamicAlloca);
	auto Ops = {DAG.getConstant(0, SDLoc(), Op.getValueType()), Op.getOperand(0)};
	return DAG.getMergeValues(Ops, SDLoc());
	}

	SDValue AMDGPUTargetLowering::LowerOperation(SDValue Op,
	SelectionDAG &DAG) const {
	switch (Op.getOpcode()) {
	default:
	Op->print(errs(), &DAG);
	llvm_unreachable("Custom lowering code for this "
	"instruction is not implemented yet!");
	break;
	case ISD::SIGN_EXTEND_INREG: return LowerSIGN_EXTEND_INREG(Op, DAG);
	case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op, DAG);
	case ISD::EXTRACT_SUBVECTOR: return LowerEXTRACT_SUBVECTOR(Op, DAG);
	case ISD::UDIVREM: return LowerUDIVREM(Op, DAG);
	case ISD::SDIVREM: return LowerSDIVREM(Op, DAG);
	case ISD::FREM: return LowerFREM(Op, DAG);
	case ISD::FCEIL: return LowerFCEIL(Op, DAG);
	case ISD::FTRUNC: return LowerFTRUNC(Op, DAG);
	case ISD::FRINT: return LowerFRINT(Op, DAG);
	case ISD::FNEARBYINT: return LowerFNEARBYINT(Op, DAG);
	case ISD::FROUNDEVEN:
	return LowerFROUNDEVEN(Op, DAG);
	case ISD::FROUND: return LowerFROUND(Op, DAG);
	case ISD::FFLOOR: return LowerFFLOOR(Op, DAG);
	case ISD::FLOG2:
	return LowerFLOG2(Op, DAG);
	case ISD::FLOG:
	case ISD::FLOG10:
	return LowerFLOGCommon(Op, DAG);
	case ISD::FEXP:
	case ISD::FEXP10:
	return lowerFEXP(Op, DAG);
	case ISD::FEXP2:
	return lowerFEXP2(Op, DAG);
	case ISD::SINT_TO_FP: return LowerSINT_TO_FP(Op, DAG);
	case ISD::UINT_TO_FP: return LowerUINT_TO_FP(Op, DAG);
	case ISD::FP_TO_FP16: return LowerFP_TO_FP16(Op, DAG);
	case ISD::FP_TO_SINT:
	case ISD::FP_TO_UINT:
	return LowerFP_TO_INT(Op, DAG);
	case ISD::CTTZ:
	case ISD::CTTZ_ZERO_UNDEF:
	case ISD::CTLZ:
	case ISD::CTLZ_ZERO_UNDEF:
	return LowerCTLZ_CTTZ(Op, DAG);
	case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG);
	}
	return Op;
	}

	void AMDGPUTargetLowering::ReplaceNodeResults(SDNode *N,
	SmallVectorImpl<SDValue> &Results,
	SelectionDAG &DAG) const {
	switch (N->getOpcode()) {
	case ISD::SIGN_EXTEND_INREG:
	// Different parts of legalization seem to interpret which type of
	// sign_extend_inreg is the one to check for custom lowering. The extended
	// from type is what really matters, but some places check for custom
	// lowering of the result type. This results in trying to use
	// ReplaceNodeResults to sext_in_reg to an illegal type, so we'll just do
	// nothing here and let the illegal result integer be handled normally.
	return;
	case ISD::FLOG2:
	if (SDValue Lowered = LowerFLOG2(SDValue(N, 0), DAG))
	Results.push_back(Lowered);
	return;
	case ISD::FLOG:
	case ISD::FLOG10:
	if (SDValue Lowered = LowerFLOGCommon(SDValue(N, 0), DAG))
	Results.push_back(Lowered);
	return;
	case ISD::FEXP2:
	if (SDValue Lowered = lowerFEXP2(SDValue(N, 0), DAG))
	Results.push_back(Lowered);
	return;
	case ISD::FEXP:
	case ISD::FEXP10:
	if (SDValue Lowered = lowerFEXP(SDValue(N, 0), DAG))
	Results.push_back(Lowered);
	return;
	case ISD::CTLZ:
	case ISD::CTLZ_ZERO_UNDEF:
	if (auto Lowered = lowerCTLZResults(SDValue(N, 0u), DAG))
	Results.push_back(Lowered);
	return;
	default:
	return;
	}
	}

	SDValue AMDGPUTargetLowering::LowerGlobalAddress(AMDGPUMachineFunction* MFI,
	SDValue Op,
	SelectionDAG &DAG) const {

	const DataLayout &DL = DAG.getDataLayout();
	GlobalAddressSDNode *G = cast<GlobalAddressSDNode>(Op);
	const GlobalValue *GV = G->getGlobal();

	if (!MFI->isModuleEntryFunction()) {
	if (std::optional<uint32_t> Address =
	AMDGPUMachineFunction::getLDSAbsoluteAddress(*GV)) {
	return DAG.getConstant(*Address, SDLoc(Op), Op.getValueType());
	}
	}

	if (G->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS \|\|
	G->getAddressSpace() == AMDGPUAS::REGION_ADDRESS) {
	if (!MFI->isModuleEntryFunction() &&
	GV->getName() != "llvm.amdgcn.module.lds") {
	SDLoc DL(Op);
	const Function &Fn = DAG.getMachineFunction().getFunction();
	DiagnosticInfoUnsupported BadLDSDecl(
	Fn, "local memory global used by non-kernel function",
	DL.getDebugLoc(), DS_Warning);
	DAG.getContext()->diagnose(BadLDSDecl);

	// We currently don't have a way to correctly allocate LDS objects that
	// aren't directly associated with a kernel. We do force inlining of
	// functions that use local objects. However, if these dead functions are
	// not eliminated, we don't want a compile time error. Just emit a warning
	// and a trap, since there should be no callable path here.
	SDValue Trap = DAG.getNode(ISD::TRAP, DL, MVT::Other, DAG.getEntryNode());
	SDValue OutputChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
	Trap, DAG.getRoot());
	DAG.setRoot(OutputChain);
	return DAG.getUNDEF(Op.getValueType());
	}

	// XXX: What does the value of G->getOffset() mean?
	assert(G->getOffset() == 0 &&
	"Do not know what to do with an non-zero offset");

	// TODO: We could emit code to handle the initialization somewhere.
	// We ignore the initializer for now and legalize it to allow selection.
	// The initializer will anyway get errored out during assembly emission.
	unsigned Offset = MFI->allocateLDSGlobal(DL, *cast<GlobalVariable>(GV));
	return DAG.getConstant(Offset, SDLoc(Op), Op.getValueType());
	}
	return SDValue();
	}

	SDValue AMDGPUTargetLowering::LowerCONCAT_VECTORS(SDValue Op,
	SelectionDAG &DAG) const {
	SmallVector<SDValue, 8> Args;
	SDLoc SL(Op);

	EVT VT = Op.getValueType();
	if (VT.getVectorElementType().getSizeInBits() < 32) {
	unsigned OpBitSize = Op.getOperand(0).getValueType().getSizeInBits();
	if (OpBitSize >= 32 && OpBitSize % 32 == 0) {
	unsigned NewNumElt = OpBitSize / 32;
	EVT NewEltVT = (NewNumElt == 1) ? MVT::i32
	: EVT::getVectorVT(*DAG.getContext(),
	MVT::i32, NewNumElt);
	for (const SDUse &U : Op->ops()) {
	SDValue In = U.get();
	SDValue NewIn = DAG.getNode(ISD::BITCAST, SL, NewEltVT, In);
	if (NewNumElt > 1)
	DAG.ExtractVectorElements(NewIn, Args);
	else
	Args.push_back(NewIn);
	}

	EVT NewVT = EVT::getVectorVT(*DAG.getContext(), MVT::i32,
	NewNumElt * Op.getNumOperands());
	SDValue BV = DAG.getBuildVector(NewVT, SL, Args);
	return DAG.getNode(ISD::BITCAST, SL, VT, BV);
	}
	}

	for (const SDUse &U : Op->ops())
	DAG.ExtractVectorElements(U.get(), Args);

	return DAG.getBuildVector(Op.getValueType(), SL, Args);
	}

	SDValue AMDGPUTargetLowering::LowerEXTRACT_SUBVECTOR(SDValue Op,
	SelectionDAG &DAG) const {
	SDLoc SL(Op);
	SmallVector<SDValue, 8> Args;
	unsigned Start = Op.getConstantOperandVal(1);
	EVT VT = Op.getValueType();
	EVT SrcVT = Op.getOperand(0).getValueType();

	if (VT.getScalarSizeInBits() == 16 && Start % 2 == 0) {
	unsigned NumElt = VT.getVectorNumElements();
	unsigned NumSrcElt = SrcVT.getVectorNumElements();
	assert(NumElt % 2 == 0 && NumSrcElt % 2 == 0 && "expect legal types");

	// Extract 32-bit registers at a time.
	EVT NewSrcVT = EVT::getVectorVT(*DAG.getContext(), MVT::i32, NumSrcElt / 2);
	EVT NewVT = NumElt == 2
	? MVT::i32
	: EVT::getVectorVT(*DAG.getContext(), MVT::i32, NumElt / 2);
	SDValue Tmp = DAG.getNode(ISD::BITCAST, SL, NewSrcVT, Op.getOperand(0));

	DAG.ExtractVectorElements(Tmp, Args, Start / 2, NumElt / 2);
	if (NumElt == 2)
	Tmp = Args[0];
	else
	Tmp = DAG.getBuildVector(NewVT, SL, Args);

	return DAG.getNode(ISD::BITCAST, SL, VT, Tmp);
	}

	DAG.ExtractVectorElements(Op.getOperand(0), Args, Start,
	VT.getVectorNumElements());

	return DAG.getBuildVector(Op.getValueType(), SL, Args);
	}

	// TODO: Handle fabs too
	static SDValue peekFNeg(SDValue Val) {
	if (Val.getOpcode() == ISD::FNEG)
	return Val.getOperand(0);

	return Val;
	}

	static SDValue peekFPSignOps(SDValue Val) {
	if (Val.getOpcode() == ISD::FNEG)
	Val = Val.getOperand(0);
	if (Val.getOpcode() == ISD::FABS)
	Val = Val.getOperand(0);
	if (Val.getOpcode() == ISD::FCOPYSIGN)
	Val = Val.getOperand(0);
	return Val;
	}

	SDValue AMDGPUTargetLowering::combineFMinMaxLegacyImpl(
	const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, SDValue True,
	SDValue False, SDValue CC, DAGCombinerInfo &DCI) const {
	SelectionDAG &DAG = DCI.DAG;
	ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
	switch (CCOpcode) {
	case ISD::SETOEQ:
	case ISD::SETONE:
	case ISD::SETUNE:
	case ISD::SETNE:
	case ISD::SETUEQ:
	case ISD::SETEQ:
	case ISD::SETFALSE:
	case ISD::SETFALSE2:
	case ISD::SETTRUE:
	case ISD::SETTRUE2:
	case ISD::SETUO:
	case ISD::SETO:
	break;
	case ISD::SETULE:
	case ISD::SETULT: {
	if (LHS == True)
	return DAG.getNode(AMDGPUISD::FMIN_LEGACY, DL, VT, RHS, LHS);
	return DAG.getNode(AMDGPUISD::FMAX_LEGACY, DL, VT, LHS, RHS);
	}
	case ISD::SETOLE:
	case ISD::SETOLT:
	case ISD::SETLE:
	case ISD::SETLT: {
	// Ordered. Assume ordered for undefined.

	// Only do this after legalization to avoid interfering with other combines
	// which might occur.
	if (DCI.getDAGCombineLevel() < AfterLegalizeDAG &&
	!DCI.isCalledByLegalizer())
	return SDValue();

	// We need to permute the operands to get the correct NaN behavior. The
	// selected operand is the second one based on the failing compare with NaN,
	// so permute it based on the compare type the hardware uses.
	if (LHS == True)
	return DAG.getNode(AMDGPUISD::FMIN_LEGACY, DL, VT, LHS, RHS);
	return DAG.getNode(AMDGPUISD::FMAX_LEGACY, DL, VT, RHS, LHS);
	}
	case ISD::SETUGE:
	case ISD::SETUGT: {
	if (LHS == True)
	return DAG.getNode(AMDGPUISD::FMAX_LEGACY, DL, VT, RHS, LHS);
	return DAG.getNode(AMDGPUISD::FMIN_LEGACY, DL, VT, LHS, RHS);
	}
	case ISD::SETGT:
	case ISD::SETGE:
	case ISD::SETOGE:
	case ISD::SETOGT: {
	if (DCI.getDAGCombineLevel() < AfterLegalizeDAG &&
	!DCI.isCalledByLegalizer())
	return SDValue();

	if (LHS == True)
	return DAG.getNode(AMDGPUISD::FMAX_LEGACY, DL, VT, LHS, RHS);
	return DAG.getNode(AMDGPUISD::FMIN_LEGACY, DL, VT, RHS, LHS);
	}
	case ISD::SETCC_INVALID:
	llvm_unreachable("Invalid setcc condcode!");
	}
	return SDValue();
	}

	/// Generate Min/Max node
	SDValue AMDGPUTargetLowering::combineFMinMaxLegacy(const SDLoc &DL, EVT VT,
	SDValue LHS, SDValue RHS,
	SDValue True, SDValue False,
	SDValue CC,
	DAGCombinerInfo &DCI) const {
	if ((LHS == True && RHS == False) \|\| (LHS == False && RHS == True))
	return combineFMinMaxLegacyImpl(DL, VT, LHS, RHS, True, False, CC, DCI);

	SelectionDAG &DAG = DCI.DAG;

	// If we can't directly match this, try to see if we can fold an fneg to
	// match.

	ConstantFPSDNode *CRHS = dyn_cast<ConstantFPSDNode>(RHS);
	ConstantFPSDNode *CFalse = dyn_cast<ConstantFPSDNode>(False);
	SDValue NegTrue = peekFNeg(True);

	// Undo the combine foldFreeOpFromSelect does if it helps us match the
	// fmin/fmax.
	//
	// select (fcmp olt (lhs, K)), (fneg lhs), -K
	// -> fneg (fmin_legacy lhs, K)
	//
	// TODO: Use getNegatedExpression
	if (LHS == NegTrue && CFalse && CRHS) {
	APFloat NegRHS = neg(CRHS->getValueAPF());
	if (NegRHS == CFalse->getValueAPF()) {
	SDValue Combined =
	combineFMinMaxLegacyImpl(DL, VT, LHS, RHS, NegTrue, False, CC, DCI);
	if (Combined)
	return DAG.getNode(ISD::FNEG, DL, VT, Combined);
	return SDValue();
	}
	}

	return SDValue();
	}

	std::pair<SDValue, SDValue>
	AMDGPUTargetLowering::split64BitValue(SDValue Op, SelectionDAG &DAG) const {
	SDLoc SL(Op);

	SDValue Vec = DAG.getNode(ISD::BITCAST, SL, MVT::v2i32, Op);

	const SDValue Zero = DAG.getConstant(0, SL, MVT::i32);
	const SDValue One = DAG.getConstant(1, SL, MVT::i32);

	SDValue Lo = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32, Vec, Zero);
	SDValue Hi = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32, Vec, One);

	return std::pair(Lo, Hi);
	}

	SDValue AMDGPUTargetLowering::getLoHalf64(SDValue Op, SelectionDAG &DAG) const {
	SDLoc SL(Op);

	SDValue Vec = DAG.getNode(ISD::BITCAST, SL, MVT::v2i32, Op);
	const SDValue Zero = DAG.getConstant(0, SL, MVT::i32);
	return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32, Vec, Zero);
	}

	SDValue AMDGPUTargetLowering::getHiHalf64(SDValue Op, SelectionDAG &DAG) const {
	SDLoc SL(Op);

	SDValue Vec = DAG.getNode(ISD::BITCAST, SL, MVT::v2i32, Op);
	const SDValue One = DAG.getConstant(1, SL, MVT::i32);
	return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32, Vec, One);
	}

	// Split a vector type into two parts. The first part is a power of two vector.
	// The second part is whatever is left over, and is a scalar if it would
	// otherwise be a 1-vector.
	std::pair<EVT, EVT>
	AMDGPUTargetLowering::getSplitDestVTs(const EVT &VT, SelectionDAG &DAG) const {
	EVT LoVT, HiVT;
	EVT EltVT = VT.getVectorElementType();
	unsigned NumElts = VT.getVectorNumElements();
	unsigned LoNumElts = PowerOf2Ceil((NumElts + 1) / 2);
	LoVT = EVT::getVectorVT(*DAG.getContext(), EltVT, LoNumElts);
	HiVT = NumElts - LoNumElts == 1
	? EltVT
	: EVT::getVectorVT(*DAG.getContext(), EltVT, NumElts - LoNumElts);
	return std::pair(LoVT, HiVT);
	}

	// Split a vector value into two parts of types LoVT and HiVT. HiVT could be
	// scalar.
	std::pair<SDValue, SDValue>
	AMDGPUTargetLowering::splitVector(const SDValue &N, const SDLoc &DL,
	const EVT &LoVT, const EVT &HiVT,
	SelectionDAG &DAG) const {
	assert(LoVT.getVectorNumElements() +
	(HiVT.isVector() ? HiVT.getVectorNumElements() : 1) <=
	N.getValueType().getVectorNumElements() &&
	"More vector elements requested than available!");
	SDValue Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, LoVT, N,
	DAG.getVectorIdxConstant(0, DL));
	SDValue Hi = DAG.getNode(
	HiVT.isVector() ? ISD::EXTRACT_SUBVECTOR : ISD::EXTRACT_VECTOR_ELT, DL,
	HiVT, N, DAG.getVectorIdxConstant(LoVT.getVectorNumElements(), DL));
	return std::pair(Lo, Hi);
	}

	SDValue AMDGPUTargetLowering::SplitVectorLoad(const SDValue Op,
	SelectionDAG &DAG) const {
	LoadSDNode *Load = cast<LoadSDNode>(Op);
	EVT VT = Op.getValueType();
	SDLoc SL(Op);


	// If this is a 2 element vector, we really want to scalarize and not create
	// weird 1 element vectors.
	if (VT.getVectorNumElements() == 2) {
	SDValue Ops[2];
	std::tie(Ops[0], Ops[1]) = scalarizeVectorLoad(Load, DAG);
	return DAG.getMergeValues(Ops, SL);
	}

	SDValue BasePtr = Load->getBasePtr();
	EVT MemVT = Load->getMemoryVT();

	const MachinePointerInfo &SrcValue = Load->getMemOperand()->getPointerInfo();

	EVT LoVT, HiVT;
	EVT LoMemVT, HiMemVT;
	SDValue Lo, Hi;

	std::tie(LoVT, HiVT) = getSplitDestVTs(VT, DAG);
	std::tie(LoMemVT, HiMemVT) = getSplitDestVTs(MemVT, DAG);
	std::tie(Lo, Hi) = splitVector(Op, SL, LoVT, HiVT, DAG);

	unsigned Size = LoMemVT.getStoreSize();
	Align BaseAlign = Load->getAlign();
	Align HiAlign = commonAlignment(BaseAlign, Size);

	SDValue LoLoad = DAG.getExtLoad(Load->getExtensionType(), SL, LoVT,
	Load->getChain(), BasePtr, SrcValue, LoMemVT,
	BaseAlign, Load->getMemOperand()->getFlags());
	SDValue HiPtr = DAG.getObjectPtrOffset(SL, BasePtr, TypeSize::getFixed(Size));
	SDValue HiLoad =
	DAG.getExtLoad(Load->getExtensionType(), SL, HiVT, Load->getChain(),
	HiPtr, SrcValue.getWithOffset(LoMemVT.getStoreSize()),
	HiMemVT, HiAlign, Load->getMemOperand()->getFlags());

	SDValue Join;
	if (LoVT == HiVT) {
	// This is the case that the vector is power of two so was evenly split.
	Join = DAG.getNode(ISD::CONCAT_VECTORS, SL, VT, LoLoad, HiLoad);
	} else {
	Join = DAG.getNode(ISD::INSERT_SUBVECTOR, SL, VT, DAG.getUNDEF(VT), LoLoad,
	DAG.getVectorIdxConstant(0, SL));
	Join = DAG.getNode(
	HiVT.isVector() ? ISD::INSERT_SUBVECTOR : ISD::INSERT_VECTOR_ELT, SL,
	VT, Join, HiLoad,
	DAG.getVectorIdxConstant(LoVT.getVectorNumElements(), SL));
	}

	SDValue Ops[] = {Join, DAG.getNode(ISD::TokenFactor, SL, MVT::Other,
	LoLoad.getValue(1), HiLoad.getValue(1))};

	return DAG.getMergeValues(Ops, SL);
	}

	SDValue AMDGPUTargetLowering::WidenOrSplitVectorLoad(SDValue Op,
	SelectionDAG &DAG) const {
	LoadSDNode *Load = cast<LoadSDNode>(Op);
	EVT VT = Op.getValueType();
	SDValue BasePtr = Load->getBasePtr();
	EVT MemVT = Load->getMemoryVT();
	SDLoc SL(Op);
	const MachinePointerInfo &SrcValue = Load->getMemOperand()->getPointerInfo();
	Align BaseAlign = Load->getAlign();
	unsigned NumElements = MemVT.getVectorNumElements();

	// Widen from vec3 to vec4 when the load is at least 8-byte aligned
	// or 16-byte fully dereferenceable. Otherwise, split the vector load.
	if (NumElements != 3 \|\|
	(BaseAlign < Align(8) &&
	!SrcValue.isDereferenceable(16, *DAG.getContext(), DAG.getDataLayout())))
	return SplitVectorLoad(Op, DAG);

	assert(NumElements == 3);

	EVT WideVT =
	EVT::getVectorVT(*DAG.getContext(), VT.getVectorElementType(), 4);
	EVT WideMemVT =
	EVT::getVectorVT(*DAG.getContext(), MemVT.getVectorElementType(), 4);
	SDValue WideLoad = DAG.getExtLoad(
	Load->getExtensionType(), SL, WideVT, Load->getChain(), BasePtr, SrcValue,
	WideMemVT, BaseAlign, Load->getMemOperand()->getFlags());
	return DAG.getMergeValues(
	{DAG.getNode(ISD::EXTRACT_SUBVECTOR, SL, VT, WideLoad,
	DAG.getVectorIdxConstant(0, SL)),
	WideLoad.getValue(1)},
	SL);
	}

	SDValue AMDGPUTargetLowering::SplitVectorStore(SDValue Op,
	SelectionDAG &DAG) const {
	StoreSDNode *Store = cast<StoreSDNode>(Op);
	SDValue Val = Store->getValue();
	EVT VT = Val.getValueType();

	// If this is a 2 element vector, we really want to scalarize and not create
	// weird 1 element vectors.
	if (VT.getVectorNumElements() == 2)
	return scalarizeVectorStore(Store, DAG);

	EVT MemVT = Store->getMemoryVT();
	SDValue Chain = Store->getChain();
	SDValue BasePtr = Store->getBasePtr();
	SDLoc SL(Op);

	EVT LoVT, HiVT;
	EVT LoMemVT, HiMemVT;
	SDValue Lo, Hi;

	std::tie(LoVT, HiVT) = getSplitDestVTs(VT, DAG);
	std::tie(LoMemVT, HiMemVT) = getSplitDestVTs(MemVT, DAG);
	std::tie(Lo, Hi) = splitVector(Val, SL, LoVT, HiVT, DAG);

	SDValue HiPtr = DAG.getObjectPtrOffset(SL, BasePtr, LoMemVT.getStoreSize());

	const MachinePointerInfo &SrcValue = Store->getMemOperand()->getPointerInfo();
	Align BaseAlign = Store->getAlign();
	unsigned Size = LoMemVT.getStoreSize();
	Align HiAlign = commonAlignment(BaseAlign, Size);

	SDValue LoStore =
	DAG.getTruncStore(Chain, SL, Lo, BasePtr, SrcValue, LoMemVT, BaseAlign,
	Store->getMemOperand()->getFlags());
	SDValue HiStore =
	DAG.getTruncStore(Chain, SL, Hi, HiPtr, SrcValue.getWithOffset(Size),
	HiMemVT, HiAlign, Store->getMemOperand()->getFlags());

	return DAG.getNode(ISD::TokenFactor, SL, MVT::Other, LoStore, HiStore);
	}

	// This is a shortcut for integer division because we have fast i32<->f32
	// conversions, and fast f32 reciprocal instructions. The fractional part of a
	// float is enough to accurately represent up to a 24-bit signed integer.
	SDValue AMDGPUTargetLowering::LowerDIVREM24(SDValue Op, SelectionDAG &DAG,
	bool Sign) const {
	SDLoc DL(Op);
	EVT VT = Op.getValueType();
	SDValue LHS = Op.getOperand(0);
	SDValue RHS = Op.getOperand(1);
	MVT IntVT = MVT::i32;
	MVT FltVT = MVT::f32;

	unsigned LHSSignBits = DAG.ComputeNumSignBits(LHS);
	if (LHSSignBits < 9)
	return SDValue();

	unsigned RHSSignBits = DAG.ComputeNumSignBits(RHS);
	if (RHSSignBits < 9)
	return SDValue();

	unsigned BitSize = VT.getSizeInBits();
	unsigned SignBits = std::min(LHSSignBits, RHSSignBits);
	unsigned DivBits = BitSize - SignBits;
	if (Sign)
	++DivBits;

	ISD::NodeType ToFp = Sign ? ISD::SINT_TO_FP : ISD::UINT_TO_FP;
	ISD::NodeType ToInt = Sign ? ISD::FP_TO_SINT : ISD::FP_TO_UINT;

	SDValue jq = DAG.getConstant(1, DL, IntVT);

	if (Sign) {
	// char\|short jq = ia ^ ib;
	jq = DAG.getNode(ISD::XOR, DL, VT, LHS, RHS);

	// jq = jq >> (bitsize - 2)
	jq = DAG.getNode(ISD::SRA, DL, VT, jq,
	DAG.getConstant(BitSize - 2, DL, VT));

	// jq = jq \| 0x1
	jq = DAG.getNode(ISD::OR, DL, VT, jq, DAG.getConstant(1, DL, VT));
	}

	// int ia = (int)LHS;
	SDValue ia = LHS;

	// int ib, (int)RHS;
	SDValue ib = RHS;

	// float fa = (float)ia;
	SDValue fa = DAG.getNode(ToFp, DL, FltVT, ia);

	// float fb = (float)ib;
	SDValue fb = DAG.getNode(ToFp, DL, FltVT, ib);

	SDValue fq = DAG.getNode(ISD::FMUL, DL, FltVT,
	fa, DAG.getNode(AMDGPUISD::RCP, DL, FltVT, fb));

	// fq = trunc(fq);
	fq = DAG.getNode(ISD::FTRUNC, DL, FltVT, fq);

	// float fqneg = -fq;
	SDValue fqneg = DAG.getNode(ISD::FNEG, DL, FltVT, fq);

	MachineFunction &MF = DAG.getMachineFunction();

	bool UseFmadFtz = false;
	if (Subtarget->isGCN()) {
	const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
	UseFmadFtz =
	MFI->getMode().FP32Denormals != DenormalMode::getPreserveSign();
	}

	// float fr = mad(fqneg, fb, fa);
	unsigned OpCode = !Subtarget->hasMadMacF32Insts() ? (unsigned)ISD::FMA
	: UseFmadFtz ? (unsigned)AMDGPUISD::FMAD_FTZ
	: (unsigned)ISD::FMAD;
	SDValue fr = DAG.getNode(OpCode, DL, FltVT, fqneg, fb, fa);

	// int iq = (int)fq;
	SDValue iq = DAG.getNode(ToInt, DL, IntVT, fq);

	// fr = fabs(fr);
	fr = DAG.getNode(ISD::FABS, DL, FltVT, fr);

	// fb = fabs(fb);
	fb = DAG.getNode(ISD::FABS, DL, FltVT, fb);

	EVT SetCCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);

	// int cv = fr >= fb;
	SDValue cv = DAG.getSetCC(DL, SetCCVT, fr, fb, ISD::SETOGE);

	// jq = (cv ? jq : 0);
	jq = DAG.getNode(ISD::SELECT, DL, VT, cv, jq, DAG.getConstant(0, DL, VT));

	// dst = iq + jq;
	SDValue Div = DAG.getNode(ISD::ADD, DL, VT, iq, jq);

	// Rem needs compensation, it's easier to recompute it
	SDValue Rem = DAG.getNode(ISD::MUL, DL, VT, Div, RHS);
	Rem = DAG.getNode(ISD::SUB, DL, VT, LHS, Rem);

	// Truncate to number of bits this divide really is.
	if (Sign) {
	SDValue InRegSize
	= DAG.getValueType(EVT::getIntegerVT(*DAG.getContext(), DivBits));
	Div = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, Div, InRegSize);
	Rem = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, Rem, InRegSize);
	} else {
	SDValue TruncMask = DAG.getConstant((UINT64_C(1) << DivBits) - 1, DL, VT);
	Div = DAG.getNode(ISD::AND, DL, VT, Div, TruncMask);
	Rem = DAG.getNode(ISD::AND, DL, VT, Rem, TruncMask);
	}

	return DAG.getMergeValues({ Div, Rem }, DL);
	}

	void AMDGPUTargetLowering::LowerUDIVREM64(SDValue Op,
	SelectionDAG &DAG,
	SmallVectorImpl<SDValue> &Results) const {
	SDLoc DL(Op);
	EVT VT = Op.getValueType();

	assert(VT == MVT::i64 && "LowerUDIVREM64 expects an i64");

	EVT HalfVT = VT.getHalfSizedIntegerVT(*DAG.getContext());

	SDValue One = DAG.getConstant(1, DL, HalfVT);
	SDValue Zero = DAG.getConstant(0, DL, HalfVT);

	//HiLo split
	SDValue LHS_Lo, LHS_Hi;
	SDValue LHS = Op.getOperand(0);
	std::tie(LHS_Lo, LHS_Hi) = DAG.SplitScalar(LHS, DL, HalfVT, HalfVT);

	SDValue RHS_Lo, RHS_Hi;
	SDValue RHS = Op.getOperand(1);
	std::tie(RHS_Lo, RHS_Hi) = DAG.SplitScalar(RHS, DL, HalfVT, HalfVT);

	if (DAG.MaskedValueIsZero(RHS, APInt::getHighBitsSet(64, 32)) &&
	DAG.MaskedValueIsZero(LHS, APInt::getHighBitsSet(64, 32))) {

	SDValue Res = DAG.getNode(ISD::UDIVREM, DL, DAG.getVTList(HalfVT, HalfVT),
	LHS_Lo, RHS_Lo);

	SDValue DIV = DAG.getBuildVector(MVT::v2i32, DL, {Res.getValue(0), Zero});
	SDValue REM = DAG.getBuildVector(MVT::v2i32, DL, {Res.getValue(1), Zero});

	Results.push_back(DAG.getNode(ISD::BITCAST, DL, MVT::i64, DIV));
	Results.push_back(DAG.getNode(ISD::BITCAST, DL, MVT::i64, REM));
	return;
	}

	if (isTypeLegal(MVT::i64)) {
	// The algorithm here is based on ideas from "Software Integer Division",
	// Tom Rodeheffer, August 2008.

	MachineFunction &MF = DAG.getMachineFunction();
	const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();

	// Compute denominator reciprocal.
	unsigned FMAD =
	!Subtarget->hasMadMacF32Insts() ? (unsigned)ISD::FMA
	: MFI->getMode().FP32Denormals == DenormalMode::getPreserveSign()
	? (unsigned)ISD::FMAD
	: (unsigned)AMDGPUISD::FMAD_FTZ;

	SDValue Cvt_Lo = DAG.getNode(ISD::UINT_TO_FP, DL, MVT::f32, RHS_Lo);
	SDValue Cvt_Hi = DAG.getNode(ISD::UINT_TO_FP, DL, MVT::f32, RHS_Hi);
	SDValue Mad1 = DAG.getNode(FMAD, DL, MVT::f32, Cvt_Hi,
	DAG.getConstantFP(APInt(32, 0x4f800000).bitsToFloat(), DL, MVT::f32),
	Cvt_Lo);
	SDValue Rcp = DAG.getNode(AMDGPUISD::RCP, DL, MVT::f32, Mad1);
	SDValue Mul1 = DAG.getNode(ISD::FMUL, DL, MVT::f32, Rcp,
	DAG.getConstantFP(APInt(32, 0x5f7ffffc).bitsToFloat(), DL, MVT::f32));
	SDValue Mul2 = DAG.getNode(ISD::FMUL, DL, MVT::f32, Mul1,
	DAG.getConstantFP(APInt(32, 0x2f800000).bitsToFloat(), DL, MVT::f32));
	SDValue Trunc = DAG.getNode(ISD::FTRUNC, DL, MVT::f32, Mul2);
	SDValue Mad2 = DAG.getNode(FMAD, DL, MVT::f32, Trunc,
	DAG.getConstantFP(APInt(32, 0xcf800000).bitsToFloat(), DL, MVT::f32),
	Mul1);
	SDValue Rcp_Lo = DAG.getNode(ISD::FP_TO_UINT, DL, HalfVT, Mad2);
	SDValue Rcp_Hi = DAG.getNode(ISD::FP_TO_UINT, DL, HalfVT, Trunc);
	SDValue Rcp64 = DAG.getBitcast(VT,
	DAG.getBuildVector(MVT::v2i32, DL, {Rcp_Lo, Rcp_Hi}));

	SDValue Zero64 = DAG.getConstant(0, DL, VT);
	SDValue One64 = DAG.getConstant(1, DL, VT);
	SDValue Zero1 = DAG.getConstant(0, DL, MVT::i1);
	SDVTList HalfCarryVT = DAG.getVTList(HalfVT, MVT::i1);

	// First round of UNR (Unsigned integer Newton-Raphson).
	SDValue Neg_RHS = DAG.getNode(ISD::SUB, DL, VT, Zero64, RHS);
	SDValue Mullo1 = DAG.getNode(ISD::MUL, DL, VT, Neg_RHS, Rcp64);
	SDValue Mulhi1 = DAG.getNode(ISD::MULHU, DL, VT, Rcp64, Mullo1);
	SDValue Mulhi1_Lo, Mulhi1_Hi;
	std::tie(Mulhi1_Lo, Mulhi1_Hi) =
	DAG.SplitScalar(Mulhi1, DL, HalfVT, HalfVT);
	SDValue Add1_Lo = DAG.getNode(ISD::UADDO_CARRY, DL, HalfCarryVT, Rcp_Lo,
	Mulhi1_Lo, Zero1);
	SDValue Add1_Hi = DAG.getNode(ISD::UADDO_CARRY, DL, HalfCarryVT, Rcp_Hi,
	Mulhi1_Hi, Add1_Lo.getValue(1));
	SDValue Add1 = DAG.getBitcast(VT,
	DAG.getBuildVector(MVT::v2i32, DL, {Add1_Lo, Add1_Hi}));

	// Second round of UNR.
	SDValue Mullo2 = DAG.getNode(ISD::MUL, DL, VT, Neg_RHS, Add1);
	SDValue Mulhi2 = DAG.getNode(ISD::MULHU, DL, VT, Add1, Mullo2);
	SDValue Mulhi2_Lo, Mulhi2_Hi;
	std::tie(Mulhi2_Lo, Mulhi2_Hi) =
	DAG.SplitScalar(Mulhi2, DL, HalfVT, HalfVT);
	SDValue Add2_Lo = DAG.getNode(ISD::UADDO_CARRY, DL, HalfCarryVT, Add1_Lo,
	Mulhi2_Lo, Zero1);
	SDValue Add2_Hi = DAG.getNode(ISD::UADDO_CARRY, DL, HalfCarryVT, Add1_Hi,
	Mulhi2_Hi, Add2_Lo.getValue(1));
	SDValue Add2 = DAG.getBitcast(VT,
	DAG.getBuildVector(MVT::v2i32, DL, {Add2_Lo, Add2_Hi}));

	SDValue Mulhi3 = DAG.getNode(ISD::MULHU, DL, VT, LHS, Add2);

	SDValue Mul3 = DAG.getNode(ISD::MUL, DL, VT, RHS, Mulhi3);

	SDValue Mul3_Lo, Mul3_Hi;
	std::tie(Mul3_Lo, Mul3_Hi) = DAG.SplitScalar(Mul3, DL, HalfVT, HalfVT);
	SDValue Sub1_Lo = DAG.getNode(ISD::USUBO_CARRY, DL, HalfCarryVT, LHS_Lo,
	Mul3_Lo, Zero1);
	SDValue Sub1_Hi = DAG.getNode(ISD::USUBO_CARRY, DL, HalfCarryVT, LHS_Hi,
	Mul3_Hi, Sub1_Lo.getValue(1));
	SDValue Sub1_Mi = DAG.getNode(ISD::SUB, DL, HalfVT, LHS_Hi, Mul3_Hi);
	SDValue Sub1 = DAG.getBitcast(VT,
	DAG.getBuildVector(MVT::v2i32, DL, {Sub1_Lo, Sub1_Hi}));

	SDValue MinusOne = DAG.getConstant(0xffffffffu, DL, HalfVT);
	SDValue C1 = DAG.getSelectCC(DL, Sub1_Hi, RHS_Hi, MinusOne, Zero,
	ISD::SETUGE);
	SDValue C2 = DAG.getSelectCC(DL, Sub1_Lo, RHS_Lo, MinusOne, Zero,
	ISD::SETUGE);
	SDValue C3 = DAG.getSelectCC(DL, Sub1_Hi, RHS_Hi, C2, C1, ISD::SETEQ);

	// TODO: Here and below portions of the code can be enclosed into if/endif.
	// Currently control flow is unconditional and we have 4 selects after
	// potential endif to substitute PHIs.

	// if C3 != 0 ...
	SDValue Sub2_Lo = DAG.getNode(ISD::USUBO_CARRY, DL, HalfCarryVT, Sub1_Lo,
	RHS_Lo, Zero1);
	SDValue Sub2_Mi = DAG.getNode(ISD::USUBO_CARRY, DL, HalfCarryVT, Sub1_Mi,
	RHS_Hi, Sub1_Lo.getValue(1));
	SDValue Sub2_Hi = DAG.getNode(ISD::USUBO_CARRY, DL, HalfCarryVT, Sub2_Mi,
	Zero, Sub2_Lo.getValue(1));
	SDValue Sub2 = DAG.getBitcast(VT,
	DAG.getBuildVector(MVT::v2i32, DL, {Sub2_Lo, Sub2_Hi}));

	SDValue Add3 = DAG.getNode(ISD::ADD, DL, VT, Mulhi3, One64);

	SDValue C4 = DAG.getSelectCC(DL, Sub2_Hi, RHS_Hi, MinusOne, Zero,
	ISD::SETUGE);
	SDValue C5 = DAG.getSelectCC(DL, Sub2_Lo, RHS_Lo, MinusOne, Zero,
	ISD::SETUGE);
	SDValue C6 = DAG.getSelectCC(DL, Sub2_Hi, RHS_Hi, C5, C4, ISD::SETEQ);

	// if (C6 != 0)
	SDValue Add4 = DAG.getNode(ISD::ADD, DL, VT, Add3, One64);

	SDValue Sub3_Lo = DAG.getNode(ISD::USUBO_CARRY, DL, HalfCarryVT, Sub2_Lo,
	RHS_Lo, Zero1);
	SDValue Sub3_Mi = DAG.getNode(ISD::USUBO_CARRY, DL, HalfCarryVT, Sub2_Mi,
	RHS_Hi, Sub2_Lo.getValue(1));
	SDValue Sub3_Hi = DAG.getNode(ISD::USUBO_CARRY, DL, HalfCarryVT, Sub3_Mi,
	Zero, Sub3_Lo.getValue(1));
	SDValue Sub3 = DAG.getBitcast(VT,
	DAG.getBuildVector(MVT::v2i32, DL, {Sub3_Lo, Sub3_Hi}));

	// endif C6
	// endif C3

	SDValue Sel1 = DAG.getSelectCC(DL, C6, Zero, Add4, Add3, ISD::SETNE);
	SDValue Div = DAG.getSelectCC(DL, C3, Zero, Sel1, Mulhi3, ISD::SETNE);

	SDValue Sel2 = DAG.getSelectCC(DL, C6, Zero, Sub3, Sub2, ISD::SETNE);
	SDValue Rem = DAG.getSelectCC(DL, C3, Zero, Sel2, Sub1, ISD::SETNE);

	Results.push_back(Div);
	Results.push_back(Rem);

	return;
	}

	// r600 expandion.
	// Get Speculative values
	SDValue DIV_Part = DAG.getNode(ISD::UDIV, DL, HalfVT, LHS_Hi, RHS_Lo);
	SDValue REM_Part = DAG.getNode(ISD::UREM, DL, HalfVT, LHS_Hi, RHS_Lo);

	SDValue REM_Lo = DAG.getSelectCC(DL, RHS_Hi, Zero, REM_Part, LHS_Hi, ISD::SETEQ);
	SDValue REM = DAG.getBuildVector(MVT::v2i32, DL, {REM_Lo, Zero});
	REM = DAG.getNode(ISD::BITCAST, DL, MVT::i64, REM);

	SDValue DIV_Hi = DAG.getSelectCC(DL, RHS_Hi, Zero, DIV_Part, Zero, ISD::SETEQ);
	SDValue DIV_Lo = Zero;

	const unsigned halfBitWidth = HalfVT.getSizeInBits();

	for (unsigned i = 0; i < halfBitWidth; ++i) {
	const unsigned bitPos = halfBitWidth - i - 1;
	SDValue POS = DAG.getConstant(bitPos, DL, HalfVT);
	// Get value of high bit
	SDValue HBit = DAG.getNode(ISD::SRL, DL, HalfVT, LHS_Lo, POS);
	HBit = DAG.getNode(ISD::AND, DL, HalfVT, HBit, One);
	HBit = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, HBit);

	// Shift
	REM = DAG.getNode(ISD::SHL, DL, VT, REM, DAG.getConstant(1, DL, VT));
	// Add LHS high bit
	REM = DAG.getNode(ISD::OR, DL, VT, REM, HBit);

	SDValue BIT = DAG.getConstant(1ULL << bitPos, DL, HalfVT);
	SDValue realBIT = DAG.getSelectCC(DL, REM, RHS, BIT, Zero, ISD::SETUGE);

	DIV_Lo = DAG.getNode(ISD::OR, DL, HalfVT, DIV_Lo, realBIT);

	// Update REM
	SDValue REM_sub = DAG.getNode(ISD::SUB, DL, VT, REM, RHS);
	REM = DAG.getSelectCC(DL, REM, RHS, REM_sub, REM, ISD::SETUGE);
	}

	SDValue DIV = DAG.getBuildVector(MVT::v2i32, DL, {DIV_Lo, DIV_Hi});
	DIV = DAG.getNode(ISD::BITCAST, DL, MVT::i64, DIV);
	Results.push_back(DIV);
	Results.push_back(REM);
	}

	SDValue AMDGPUTargetLowering::LowerUDIVREM(SDValue Op,
	SelectionDAG &DAG) const {
	SDLoc DL(Op);
	EVT VT = Op.getValueType();

	if (VT == MVT::i64) {
	SmallVector<SDValue, 2> Results;
	LowerUDIVREM64(Op, DAG, Results);
	return DAG.getMergeValues(Results, DL);
	}

	if (VT == MVT::i32) {
	if (SDValue Res = LowerDIVREM24(Op, DAG, false))
	return Res;
	}

	SDValue X = Op.getOperand(0);
	SDValue Y = Op.getOperand(1);

	// See AMDGPUCodeGenPrepare::expandDivRem32 for a description of the
	// algorithm used here.

	// Initial estimate of inv(y).
	SDValue Z = DAG.getNode(AMDGPUISD::URECIP, DL, VT, Y);

	// One round of UNR.
	SDValue NegY = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Y);
	SDValue NegYZ = DAG.getNode(ISD::MUL, DL, VT, NegY, Z);
	Z = DAG.getNode(ISD::ADD, DL, VT, Z,
	DAG.getNode(ISD::MULHU, DL, VT, Z, NegYZ));

	// Quotient/remainder estimate.
	SDValue Q = DAG.getNode(ISD::MULHU, DL, VT, X, Z);
	SDValue R =
	DAG.getNode(ISD::SUB, DL, VT, X, DAG.getNode(ISD::MUL, DL, VT, Q, Y));

	// First quotient/remainder refinement.
	EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
	SDValue One = DAG.getConstant(1, DL, VT);
	SDValue Cond = DAG.getSetCC(DL, CCVT, R, Y, ISD::SETUGE);
	Q = DAG.getNode(ISD::SELECT, DL, VT, Cond,
	DAG.getNode(ISD::ADD, DL, VT, Q, One), Q);
	R = DAG.getNode(ISD::SELECT, DL, VT, Cond,
	DAG.getNode(ISD::SUB, DL, VT, R, Y), R);

	// Second quotient/remainder refinement.
	Cond = DAG.getSetCC(DL, CCVT, R, Y, ISD::SETUGE);
	Q = DAG.getNode(ISD::SELECT, DL, VT, Cond,
	DAG.getNode(ISD::ADD, DL, VT, Q, One), Q);
	R = DAG.getNode(ISD::SELECT, DL, VT, Cond,
	DAG.getNode(ISD::SUB, DL, VT, R, Y), R);

	return DAG.getMergeValues({Q, R}, DL);
	}

	SDValue AMDGPUTargetLowering::LowerSDIVREM(SDValue Op,
	SelectionDAG &DAG) const {
	SDLoc DL(Op);
	EVT VT = Op.getValueType();

	SDValue LHS = Op.getOperand(0);
	SDValue RHS = Op.getOperand(1);

	SDValue Zero = DAG.getConstant(0, DL, VT);
	SDValue NegOne = DAG.getConstant(-1, DL, VT);

	if (VT == MVT::i32) {
	if (SDValue Res = LowerDIVREM24(Op, DAG, true))
	return Res;
	}

	if (VT == MVT::i64 &&
	DAG.ComputeNumSignBits(LHS) > 32 &&
	DAG.ComputeNumSignBits(RHS) > 32) {
	EVT HalfVT = VT.getHalfSizedIntegerVT(*DAG.getContext());

	//HiLo split
	SDValue LHS_Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, LHS, Zero);
	SDValue RHS_Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, RHS, Zero);
	SDValue DIVREM = DAG.getNode(ISD::SDIVREM, DL, DAG.getVTList(HalfVT, HalfVT),
	LHS_Lo, RHS_Lo);
	SDValue Res[2] = {
	DAG.getNode(ISD::SIGN_EXTEND, DL, VT, DIVREM.getValue(0)),
	DAG.getNode(ISD::SIGN_EXTEND, DL, VT, DIVREM.getValue(1))
	};
	return DAG.getMergeValues(Res, DL);
	}

	SDValue LHSign = DAG.getSelectCC(DL, LHS, Zero, NegOne, Zero, ISD::SETLT);
	SDValue RHSign = DAG.getSelectCC(DL, RHS, Zero, NegOne, Zero, ISD::SETLT);
	SDValue DSign = DAG.getNode(ISD::XOR, DL, VT, LHSign, RHSign);
	SDValue RSign = LHSign; // Remainder sign is the same as LHS

	LHS = DAG.getNode(ISD::ADD, DL, VT, LHS, LHSign);
	RHS = DAG.getNode(ISD::ADD, DL, VT, RHS, RHSign);

	LHS = DAG.getNode(ISD::XOR, DL, VT, LHS, LHSign);
	RHS = DAG.getNode(ISD::XOR, DL, VT, RHS, RHSign);

	SDValue Div = DAG.getNode(ISD::UDIVREM, DL, DAG.getVTList(VT, VT), LHS, RHS);
	SDValue Rem = Div.getValue(1);

	Div = DAG.getNode(ISD::XOR, DL, VT, Div, DSign);
	Rem = DAG.getNode(ISD::XOR, DL, VT, Rem, RSign);

	Div = DAG.getNode(ISD::SUB, DL, VT, Div, DSign);
	Rem = DAG.getNode(ISD::SUB, DL, VT, Rem, RSign);

	SDValue Res[2] = {
	Div,
	Rem
	};
	return DAG.getMergeValues(Res, DL);
	}

	// (frem x, y) -> (fma (fneg (ftrunc (fdiv x, y))), y, x)
	SDValue AMDGPUTargetLowering::LowerFREM(SDValue Op, SelectionDAG &DAG) const {
	SDLoc SL(Op);
	EVT VT = Op.getValueType();
	auto Flags = Op->getFlags();
	SDValue X = Op.getOperand(0);
	SDValue Y = Op.getOperand(1);

	SDValue Div = DAG.getNode(ISD::FDIV, SL, VT, X, Y, Flags);
	SDValue Trunc = DAG.getNode(ISD::FTRUNC, SL, VT, Div, Flags);
	SDValue Neg = DAG.getNode(ISD::FNEG, SL, VT, Trunc, Flags);
	// TODO: For f32 use FMAD instead if !hasFastFMA32?
	return DAG.getNode(ISD::FMA, SL, VT, Neg, Y, X, Flags);
	}

	SDValue AMDGPUTargetLowering::LowerFCEIL(SDValue Op, SelectionDAG &DAG) const {
	SDLoc SL(Op);
	SDValue Src = Op.getOperand(0);

	// result = trunc(src)
	// if (src > 0.0 && src != result)
	// result += 1.0

	SDValue Trunc = DAG.getNode(ISD::FTRUNC, SL, MVT::f64, Src);

	const SDValue Zero = DAG.getConstantFP(0.0, SL, MVT::f64);
	const SDValue One = DAG.getConstantFP(1.0, SL, MVT::f64);

	EVT SetCCVT =
	getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), MVT::f64);

	SDValue Lt0 = DAG.getSetCC(SL, SetCCVT, Src, Zero, ISD::SETOGT);
	SDValue NeTrunc = DAG.getSetCC(SL, SetCCVT, Src, Trunc, ISD::SETONE);
	SDValue And = DAG.getNode(ISD::AND, SL, SetCCVT, Lt0, NeTrunc);

	SDValue Add = DAG.getNode(ISD::SELECT, SL, MVT::f64, And, One, Zero);
	// TODO: Should this propagate fast-math-flags?
	return DAG.getNode(ISD::FADD, SL, MVT::f64, Trunc, Add);
	}

	static SDValue extractF64Exponent(SDValue Hi, const SDLoc &SL,
	SelectionDAG &DAG) {
	const unsigned FractBits = 52;
	const unsigned ExpBits = 11;

	SDValue ExpPart = DAG.getNode(AMDGPUISD::BFE_U32, SL, MVT::i32,
	Hi,
	DAG.getConstant(FractBits - 32, SL, MVT::i32),
	DAG.getConstant(ExpBits, SL, MVT::i32));
	SDValue Exp = DAG.getNode(ISD::SUB, SL, MVT::i32, ExpPart,
	DAG.getConstant(1023, SL, MVT::i32));

	return Exp;
	}

	SDValue AMDGPUTargetLowering::LowerFTRUNC(SDValue Op, SelectionDAG &DAG) const {
	SDLoc SL(Op);
	SDValue Src = Op.getOperand(0);

	assert(Op.getValueType() == MVT::f64);

	const SDValue Zero = DAG.getConstant(0, SL, MVT::i32);

	// Extract the upper half, since this is where we will find the sign and
	// exponent.
	SDValue Hi = getHiHalf64(Src, DAG);

	SDValue Exp = extractF64Exponent(Hi, SL, DAG);

	const unsigned FractBits = 52;

	// Extract the sign bit.
	const SDValue SignBitMask = DAG.getConstant(UINT32_C(1) << 31, SL, MVT::i32);
	SDValue SignBit = DAG.getNode(ISD::AND, SL, MVT::i32, Hi, SignBitMask);

	// Extend back to 64-bits.
	SDValue SignBit64 = DAG.getBuildVector(MVT::v2i32, SL, {Zero, SignBit});
	SignBit64 = DAG.getNode(ISD::BITCAST, SL, MVT::i64, SignBit64);

	SDValue BcInt = DAG.getNode(ISD::BITCAST, SL, MVT::i64, Src);
	const SDValue FractMask
	= DAG.getConstant((UINT64_C(1) << FractBits) - 1, SL, MVT::i64);

	SDValue Shr = DAG.getNode(ISD::SRA, SL, MVT::i64, FractMask, Exp);
	SDValue Not = DAG.getNOT(SL, Shr, MVT::i64);
	SDValue Tmp0 = DAG.getNode(ISD::AND, SL, MVT::i64, BcInt, Not);

	EVT SetCCVT =
	getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), MVT::i32);

	const SDValue FiftyOne = DAG.getConstant(FractBits - 1, SL, MVT::i32);

	SDValue ExpLt0 = DAG.getSetCC(SL, SetCCVT, Exp, Zero, ISD::SETLT);
	SDValue ExpGt51 = DAG.getSetCC(SL, SetCCVT, Exp, FiftyOne, ISD::SETGT);

	SDValue Tmp1 = DAG.getNode(ISD::SELECT, SL, MVT::i64, ExpLt0, SignBit64, Tmp0);
	SDValue Tmp2 = DAG.getNode(ISD::SELECT, SL, MVT::i64, ExpGt51, BcInt, Tmp1);

	return DAG.getNode(ISD::BITCAST, SL, MVT::f64, Tmp2);
	}

	SDValue AMDGPUTargetLowering::LowerFROUNDEVEN(SDValue Op,
	SelectionDAG &DAG) const {
	SDLoc SL(Op);
	SDValue Src = Op.getOperand(0);

	assert(Op.getValueType() == MVT::f64);

	APFloat C1Val(APFloat::IEEEdouble(), "0x1.0p+52");
	SDValue C1 = DAG.getConstantFP(C1Val, SL, MVT::f64);
	SDValue CopySign = DAG.getNode(ISD::FCOPYSIGN, SL, MVT::f64, C1, Src);

	// TODO: Should this propagate fast-math-flags?

	SDValue Tmp1 = DAG.getNode(ISD::FADD, SL, MVT::f64, Src, CopySign);
	SDValue Tmp2 = DAG.getNode(ISD::FSUB, SL, MVT::f64, Tmp1, CopySign);

	SDValue Fabs = DAG.getNode(ISD::FABS, SL, MVT::f64, Src);

	APFloat C2Val(APFloat::IEEEdouble(), "0x1.fffffffffffffp+51");
	SDValue C2 = DAG.getConstantFP(C2Val, SL, MVT::f64);

	EVT SetCCVT =
	getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), MVT::f64);
	SDValue Cond = DAG.getSetCC(SL, SetCCVT, Fabs, C2, ISD::SETOGT);

	return DAG.getSelect(SL, MVT::f64, Cond, Src, Tmp2);
	}

	SDValue AMDGPUTargetLowering::LowerFNEARBYINT(SDValue Op,
	SelectionDAG &DAG) const {
	// FNEARBYINT and FRINT are the same, except in their handling of FP
	// exceptions. Those aren't really meaningful for us, and OpenCL only has
	// rint, so just treat them as equivalent.
	return DAG.getNode(ISD::FROUNDEVEN, SDLoc(Op), Op.getValueType(),
	Op.getOperand(0));
	}

	SDValue AMDGPUTargetLowering::LowerFRINT(SDValue Op, SelectionDAG &DAG) const {
	auto VT = Op.getValueType();
	auto Arg = Op.getOperand(0u);
	return DAG.getNode(ISD::FROUNDEVEN, SDLoc(Op), VT, Arg);
	}

	// XXX - May require not supporting f32 denormals?

	// Don't handle v2f16. The extra instructions to scalarize and repack around the
	// compare and vselect end up producing worse code than scalarizing the whole
	// operation.
	SDValue AMDGPUTargetLowering::LowerFROUND(SDValue Op, SelectionDAG &DAG) const {
	SDLoc SL(Op);
	SDValue X = Op.getOperand(0);
	EVT VT = Op.getValueType();

	SDValue T = DAG.getNode(ISD::FTRUNC, SL, VT, X);

	// TODO: Should this propagate fast-math-flags?

	SDValue Diff = DAG.getNode(ISD::FSUB, SL, VT, X, T);

	SDValue AbsDiff = DAG.getNode(ISD::FABS, SL, VT, Diff);

	const SDValue Zero = DAG.getConstantFP(0.0, SL, VT);
	const SDValue One = DAG.getConstantFP(1.0, SL, VT);

	EVT SetCCVT =
	getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);

	const SDValue Half = DAG.getConstantFP(0.5, SL, VT);
	SDValue Cmp = DAG.getSetCC(SL, SetCCVT, AbsDiff, Half, ISD::SETOGE);
	SDValue OneOrZeroFP = DAG.getNode(ISD::SELECT, SL, VT, Cmp, One, Zero);

	SDValue SignedOffset = DAG.getNode(ISD::FCOPYSIGN, SL, VT, OneOrZeroFP, X);
	return DAG.getNode(ISD::FADD, SL, VT, T, SignedOffset);
	}

	SDValue AMDGPUTargetLowering::LowerFFLOOR(SDValue Op, SelectionDAG &DAG) const {
	SDLoc SL(Op);
	SDValue Src = Op.getOperand(0);

	// result = trunc(src);
	// if (src < 0.0 && src != result)
	// result += -1.0.

	SDValue Trunc = DAG.getNode(ISD::FTRUNC, SL, MVT::f64, Src);

	const SDValue Zero = DAG.getConstantFP(0.0, SL, MVT::f64);
	const SDValue NegOne = DAG.getConstantFP(-1.0, SL, MVT::f64);

	EVT SetCCVT =
	getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), MVT::f64);

	SDValue Lt0 = DAG.getSetCC(SL, SetCCVT, Src, Zero, ISD::SETOLT);
	SDValue NeTrunc = DAG.getSetCC(SL, SetCCVT, Src, Trunc, ISD::SETONE);
	SDValue And = DAG.getNode(ISD::AND, SL, SetCCVT, Lt0, NeTrunc);

	SDValue Add = DAG.getNode(ISD::SELECT, SL, MVT::f64, And, NegOne, Zero);
	// TODO: Should this propagate fast-math-flags?
	return DAG.getNode(ISD::FADD, SL, MVT::f64, Trunc, Add);
	}

	/// Return true if it's known that \p Src can never be an f32 denormal value.
	static bool valueIsKnownNeverF32Denorm(SDValue Src) {
	switch (Src.getOpcode()) {
	case ISD::FP_EXTEND:
	return Src.getOperand(0).getValueType() == MVT::f16;
	case ISD::FP16_TO_FP:
	case ISD::FFREXP:
	return true;
	case ISD::INTRINSIC_WO_CHAIN: {
	unsigned IntrinsicID = Src.getConstantOperandVal(0);
	switch (IntrinsicID) {
	case Intrinsic::amdgcn_frexp_mant:
	return true;
	default:
	return false;
	}
	}
	default:
	return false;
	}

	llvm_unreachable("covered opcode switch");
	}

	bool AMDGPUTargetLowering::allowApproxFunc(const SelectionDAG &DAG,
	SDNodeFlags Flags) {
	if (Flags.hasApproximateFuncs())
	return true;
	auto &Options = DAG.getTarget().Options;
	return Options.UnsafeFPMath \|\| Options.ApproxFuncFPMath;
	}

	bool AMDGPUTargetLowering::needsDenormHandlingF32(const SelectionDAG &DAG,
	SDValue Src,
	SDNodeFlags Flags) {
	return !valueIsKnownNeverF32Denorm(Src) &&
	DAG.getMachineFunction()
	.getDenormalMode(APFloat::IEEEsingle())
	.Input != DenormalMode::PreserveSign;
	}

	SDValue AMDGPUTargetLowering::getIsLtSmallestNormal(SelectionDAG &DAG,
	SDValue Src,
	SDNodeFlags Flags) const {
	SDLoc SL(Src);
	EVT VT = Src.getValueType();
	const fltSemantics &Semantics = SelectionDAG::EVTToAPFloatSemantics(VT);
	SDValue SmallestNormal =
	DAG.getConstantFP(APFloat::getSmallestNormalized(Semantics), SL, VT);

	// Want to scale denormals up, but negatives and 0 work just as well on the
	// scaled path.
	SDValue IsLtSmallestNormal = DAG.getSetCC(
	SL, getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT), Src,
	SmallestNormal, ISD::SETOLT);

	return IsLtSmallestNormal;
	}

	SDValue AMDGPUTargetLowering::getIsFinite(SelectionDAG &DAG, SDValue Src,
	SDNodeFlags Flags) const {
	SDLoc SL(Src);
	EVT VT = Src.getValueType();
	const fltSemantics &Semantics = SelectionDAG::EVTToAPFloatSemantics(VT);
	SDValue Inf = DAG.getConstantFP(APFloat::getInf(Semantics), SL, VT);

	SDValue Fabs = DAG.getNode(ISD::FABS, SL, VT, Src, Flags);
	SDValue IsFinite = DAG.getSetCC(
	SL, getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT), Fabs,
	Inf, ISD::SETOLT);
	return IsFinite;
	}

	/// If denormal handling is required return the scaled input to FLOG2, and the
	/// check for denormal range. Otherwise, return null values.
	std::pair<SDValue, SDValue>
	AMDGPUTargetLowering::getScaledLogInput(SelectionDAG &DAG, const SDLoc SL,
	SDValue Src, SDNodeFlags Flags) const {
	if (!needsDenormHandlingF32(DAG, Src, Flags))
	return {};

	MVT VT = MVT::f32;
	const fltSemantics &Semantics = APFloat::IEEEsingle();
	SDValue SmallestNormal =
	DAG.getConstantFP(APFloat::getSmallestNormalized(Semantics), SL, VT);

	SDValue IsLtSmallestNormal = DAG.getSetCC(
	SL, getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT), Src,
	SmallestNormal, ISD::SETOLT);

	SDValue Scale32 = DAG.getConstantFP(0x1.0p+32, SL, VT);
	SDValue One = DAG.getConstantFP(1.0, SL, VT);
	SDValue ScaleFactor =
	DAG.getNode(ISD::SELECT, SL, VT, IsLtSmallestNormal, Scale32, One, Flags);

	SDValue ScaledInput = DAG.getNode(ISD::FMUL, SL, VT, Src, ScaleFactor, Flags);
	return {ScaledInput, IsLtSmallestNormal};
	}

	SDValue AMDGPUTargetLowering::LowerFLOG2(SDValue Op, SelectionDAG &DAG) const {
	// v_log_f32 is good enough for OpenCL, except it doesn't handle denormals.
	// If we have to handle denormals, scale up the input and adjust the result.

	// scaled = x * (is_denormal ? 0x1.0p+32 : 1.0)
	// log2 = amdgpu_log2 - (is_denormal ? 32.0 : 0.0)

	SDLoc SL(Op);
	EVT VT = Op.getValueType();
	SDValue Src = Op.getOperand(0);
	SDNodeFlags Flags = Op->getFlags();

	if (VT == MVT::f16) {
	// Nothing in half is a denormal when promoted to f32.
	assert(!Subtarget->has16BitInsts());
	SDValue Ext = DAG.getNode(ISD::FP_EXTEND, SL, MVT::f32, Src, Flags);
	SDValue Log = DAG.getNode(AMDGPUISD::LOG, SL, MVT::f32, Ext, Flags);
	return DAG.getNode(ISD::FP_ROUND, SL, VT, Log,
	DAG.getTargetConstant(0, SL, MVT::i32), Flags);
	}

	auto [ScaledInput, IsLtSmallestNormal] =
	getScaledLogInput(DAG, SL, Src, Flags);
	if (!ScaledInput)
	return DAG.getNode(AMDGPUISD::LOG, SL, VT, Src, Flags);

	SDValue Log2 = DAG.getNode(AMDGPUISD::LOG, SL, VT, ScaledInput, Flags);

	SDValue ThirtyTwo = DAG.getConstantFP(32.0, SL, VT);
	SDValue Zero = DAG.getConstantFP(0.0, SL, VT);
	SDValue ResultOffset =
	DAG.getNode(ISD::SELECT, SL, VT, IsLtSmallestNormal, ThirtyTwo, Zero);
	return DAG.getNode(ISD::FSUB, SL, VT, Log2, ResultOffset, Flags);
	}

	static SDValue getMad(SelectionDAG &DAG, const SDLoc &SL, EVT VT, SDValue X,
	SDValue Y, SDValue C, SDNodeFlags Flags = SDNodeFlags()) {
	SDValue Mul = DAG.getNode(ISD::FMUL, SL, VT, X, Y, Flags);
	return DAG.getNode(ISD::FADD, SL, VT, Mul, C, Flags);
	}

	SDValue AMDGPUTargetLowering::LowerFLOGCommon(SDValue Op,
	SelectionDAG &DAG) const {
	SDValue X = Op.getOperand(0);
	EVT VT = Op.getValueType();
	SDNodeFlags Flags = Op->getFlags();
	SDLoc DL(Op);

	const bool IsLog10 = Op.getOpcode() == ISD::FLOG10;
	assert(IsLog10 \|\| Op.getOpcode() == ISD::FLOG);

	const auto &Options = getTargetMachine().Options;
	if (VT == MVT::f16 \|\| Flags.hasApproximateFuncs() \|\|
	Options.ApproxFuncFPMath \|\| Options.UnsafeFPMath) {

	if (VT == MVT::f16 && !Subtarget->has16BitInsts()) {
	// Log and multiply in f32 is good enough for f16.
	X = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, X, Flags);
	}

	SDValue Lowered = LowerFLOGUnsafe(X, DL, DAG, IsLog10, Flags);
	if (VT == MVT::f16 && !Subtarget->has16BitInsts()) {
	return DAG.getNode(ISD::FP_ROUND, DL, VT, Lowered,
	DAG.getTargetConstant(0, DL, MVT::i32), Flags);
	}

	return Lowered;
	}

	auto [ScaledInput, IsScaled] = getScaledLogInput(DAG, DL, X, Flags);
	if (ScaledInput)
	X = ScaledInput;

	SDValue Y = DAG.getNode(AMDGPUISD::LOG, DL, VT, X, Flags);

	SDValue R;
	if (Subtarget->hasFastFMAF32()) {
	// c+cc are ln(2)/ln(10) to more than 49 bits
	const float c_log10 = 0x1.344134p-2f;
	const float cc_log10 = 0x1.09f79ep-26f;

	// c + cc is ln(2) to more than 49 bits
	const float c_log = 0x1.62e42ep-1f;
	const float cc_log = 0x1.efa39ep-25f;

	SDValue C = DAG.getConstantFP(IsLog10 ? c_log10 : c_log, DL, VT);
	SDValue CC = DAG.getConstantFP(IsLog10 ? cc_log10 : cc_log, DL, VT);

	R = DAG.getNode(ISD::FMUL, DL, VT, Y, C, Flags);
	SDValue NegR = DAG.getNode(ISD::FNEG, DL, VT, R, Flags);
	SDValue FMA0 = DAG.getNode(ISD::FMA, DL, VT, Y, C, NegR, Flags);
	SDValue FMA1 = DAG.getNode(ISD::FMA, DL, VT, Y, CC, FMA0, Flags);
	R = DAG.getNode(ISD::FADD, DL, VT, R, FMA1, Flags);
	} else {
	// ch+ct is ln(2)/ln(10) to more than 36 bits
	const float ch_log10 = 0x1.344000p-2f;
	const float ct_log10 = 0x1.3509f6p-18f;

	// ch + ct is ln(2) to more than 36 bits
	const float ch_log = 0x1.62e000p-1f;
	const float ct_log = 0x1.0bfbe8p-15f;

	SDValue CH = DAG.getConstantFP(IsLog10 ? ch_log10 : ch_log, DL, VT);
	SDValue CT = DAG.getConstantFP(IsLog10 ? ct_log10 : ct_log, DL, VT);

	SDValue YAsInt = DAG.getNode(ISD::BITCAST, DL, MVT::i32, Y);
	SDValue MaskConst = DAG.getConstant(0xfffff000, DL, MVT::i32);
	SDValue YHInt = DAG.getNode(ISD::AND, DL, MVT::i32, YAsInt, MaskConst);
	SDValue YH = DAG.getNode(ISD::BITCAST, DL, MVT::f32, YHInt);
	SDValue YT = DAG.getNode(ISD::FSUB, DL, VT, Y, YH, Flags);

	SDValue YTCT = DAG.getNode(ISD::FMUL, DL, VT, YT, CT, Flags);
	SDValue Mad0 = getMad(DAG, DL, VT, YH, CT, YTCT, Flags);
	SDValue Mad1 = getMad(DAG, DL, VT, YT, CH, Mad0, Flags);
	R = getMad(DAG, DL, VT, YH, CH, Mad1);
	}

	const bool IsFiniteOnly = (Flags.hasNoNaNs() \|\| Options.NoNaNsFPMath) &&
	(Flags.hasNoInfs() \|\| Options.NoInfsFPMath);

	// TODO: Check if known finite from source value.
	if (!IsFiniteOnly) {
	SDValue IsFinite = getIsFinite(DAG, Y, Flags);
	R = DAG.getNode(ISD::SELECT, DL, VT, IsFinite, R, Y, Flags);
	}

	if (IsScaled) {
	SDValue Zero = DAG.getConstantFP(0.0f, DL, VT);
	SDValue ShiftK =
	DAG.getConstantFP(IsLog10 ? 0x1.344136p+3f : 0x1.62e430p+4f, DL, VT);
	SDValue Shift =
	DAG.getNode(ISD::SELECT, DL, VT, IsScaled, ShiftK, Zero, Flags);
	R = DAG.getNode(ISD::FSUB, DL, VT, R, Shift, Flags);
	}

	return R;
	}

	SDValue AMDGPUTargetLowering::LowerFLOG10(SDValue Op, SelectionDAG &DAG) const {
	return LowerFLOGCommon(Op, DAG);
	}

	// Do f32 fast math expansion for flog2 or flog10. This is accurate enough for a
	// promote f16 operation.
	SDValue AMDGPUTargetLowering::LowerFLOGUnsafe(SDValue Src, const SDLoc &SL,
	SelectionDAG &DAG, bool IsLog10,
	SDNodeFlags Flags) const {
	EVT VT = Src.getValueType();
	unsigned LogOp =
	VT == MVT::f32 ? (unsigned)AMDGPUISD::LOG : (unsigned)ISD::FLOG2;

	double Log2BaseInverted =
	IsLog10 ? numbers::ln2 / numbers::ln10 : numbers::ln2;

	if (VT == MVT::f32) {
	auto [ScaledInput, IsScaled] = getScaledLogInput(DAG, SL, Src, Flags);
	if (ScaledInput) {
	SDValue LogSrc = DAG.getNode(AMDGPUISD::LOG, SL, VT, ScaledInput, Flags);
	SDValue ScaledResultOffset =
	DAG.getConstantFP(-32.0 * Log2BaseInverted, SL, VT);

	SDValue Zero = DAG.getConstantFP(0.0f, SL, VT);

	SDValue ResultOffset = DAG.getNode(ISD::SELECT, SL, VT, IsScaled,
	ScaledResultOffset, Zero, Flags);

	SDValue Log2Inv = DAG.getConstantFP(Log2BaseInverted, SL, VT);

	if (Subtarget->hasFastFMAF32())
	return DAG.getNode(ISD::FMA, SL, VT, LogSrc, Log2Inv, ResultOffset,
	Flags);
	SDValue Mul = DAG.getNode(ISD::FMUL, SL, VT, LogSrc, Log2Inv, Flags);
	return DAG.getNode(ISD::FADD, SL, VT, Mul, ResultOffset);
	}
	}

	SDValue Log2Operand = DAG.getNode(LogOp, SL, VT, Src, Flags);
	SDValue Log2BaseInvertedOperand = DAG.getConstantFP(Log2BaseInverted, SL, VT);

	return DAG.getNode(ISD::FMUL, SL, VT, Log2Operand, Log2BaseInvertedOperand,
	Flags);
	}

	SDValue AMDGPUTargetLowering::lowerFEXP2(SDValue Op, SelectionDAG &DAG) const {
	// v_exp_f32 is good enough for OpenCL, except it doesn't handle denormals.
	// If we have to handle denormals, scale up the input and adjust the result.

	SDLoc SL(Op);
	EVT VT = Op.getValueType();
	SDValue Src = Op.getOperand(0);
	SDNodeFlags Flags = Op->getFlags();

	if (VT == MVT::f16) {
	// Nothing in half is a denormal when promoted to f32.
	assert(!Subtarget->has16BitInsts());
	SDValue Ext = DAG.getNode(ISD::FP_EXTEND, SL, MVT::f32, Src, Flags);
	SDValue Log = DAG.getNode(AMDGPUISD::EXP, SL, MVT::f32, Ext, Flags);
	return DAG.getNode(ISD::FP_ROUND, SL, VT, Log,
	DAG.getTargetConstant(0, SL, MVT::i32), Flags);
	}

	assert(VT == MVT::f32);

	if (!needsDenormHandlingF32(DAG, Src, Flags))
	return DAG.getNode(AMDGPUISD::EXP, SL, MVT::f32, Src, Flags);

	// bool needs_scaling = x < -0x1.f80000p+6f;
	// v_exp_f32(x + (s ? 0x1.0p+6f : 0.0f)) * (s ? 0x1.0p-64f : 1.0f);

	// -nextafter(128.0, -1)
	SDValue RangeCheckConst = DAG.getConstantFP(-0x1.f80000p+6f, SL, VT);

	EVT SetCCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);

	SDValue NeedsScaling =
	DAG.getSetCC(SL, SetCCVT, Src, RangeCheckConst, ISD::SETOLT);

	SDValue SixtyFour = DAG.getConstantFP(0x1.0p+6f, SL, VT);
	SDValue Zero = DAG.getConstantFP(0.0, SL, VT);

	SDValue AddOffset =
	DAG.getNode(ISD::SELECT, SL, VT, NeedsScaling, SixtyFour, Zero);

	SDValue AddInput = DAG.getNode(ISD::FADD, SL, VT, Src, AddOffset, Flags);
	SDValue Exp2 = DAG.getNode(AMDGPUISD::EXP, SL, VT, AddInput, Flags);

	SDValue TwoExpNeg64 = DAG.getConstantFP(0x1.0p-64f, SL, VT);
	SDValue One = DAG.getConstantFP(1.0, SL, VT);
	SDValue ResultScale =
	DAG.getNode(ISD::SELECT, SL, VT, NeedsScaling, TwoExpNeg64, One);

	return DAG.getNode(ISD::FMUL, SL, VT, Exp2, ResultScale, Flags);
	}

	SDValue AMDGPUTargetLowering::lowerFEXPUnsafe(SDValue X, const SDLoc &SL,
	SelectionDAG &DAG,
	SDNodeFlags Flags) const {
	EVT VT = X.getValueType();
	const SDValue Log2E = DAG.getConstantFP(numbers::log2e, SL, VT);

	if (VT != MVT::f32 \|\| !needsDenormHandlingF32(DAG, X, Flags)) {
	// exp2(M_LOG2E_F * f);
	SDValue Mul = DAG.getNode(ISD::FMUL, SL, VT, X, Log2E, Flags);
	return DAG.getNode(VT == MVT::f32 ? (unsigned)AMDGPUISD::EXP
	: (unsigned)ISD::FEXP2,
	SL, VT, Mul, Flags);
	}

	EVT SetCCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);

	SDValue Threshold = DAG.getConstantFP(-0x1.5d58a0p+6f, SL, VT);
	SDValue NeedsScaling = DAG.getSetCC(SL, SetCCVT, X, Threshold, ISD::SETOLT);

	SDValue ScaleOffset = DAG.getConstantFP(0x1.0p+6f, SL, VT);

	SDValue ScaledX = DAG.getNode(ISD::FADD, SL, VT, X, ScaleOffset, Flags);

	SDValue AdjustedX =
	DAG.getNode(ISD::SELECT, SL, VT, NeedsScaling, ScaledX, X);

	SDValue ExpInput = DAG.getNode(ISD::FMUL, SL, VT, AdjustedX, Log2E, Flags);

	SDValue Exp2 = DAG.getNode(AMDGPUISD::EXP, SL, VT, ExpInput, Flags);

	SDValue ResultScaleFactor = DAG.getConstantFP(0x1.969d48p-93f, SL, VT);
	SDValue AdjustedResult =
	DAG.getNode(ISD::FMUL, SL, VT, Exp2, ResultScaleFactor, Flags);

	return DAG.getNode(ISD::SELECT, SL, VT, NeedsScaling, AdjustedResult, Exp2,
	Flags);
	}

	/// Emit approx-funcs appropriate lowering for exp10. inf/nan should still be
	/// handled correctly.
	SDValue AMDGPUTargetLowering::lowerFEXP10Unsafe(SDValue X, const SDLoc &SL,
	SelectionDAG &DAG,
	SDNodeFlags Flags) const {
	const EVT VT = X.getValueType();
	const unsigned Exp2Op = VT == MVT::f32 ? AMDGPUISD::EXP : ISD::FEXP2;

	if (VT != MVT::f32 \|\| !needsDenormHandlingF32(DAG, X, Flags)) {
	// exp2(x * 0x1.a92000p+1f) * exp2(x * 0x1.4f0978p-11f);
	SDValue K0 = DAG.getConstantFP(0x1.a92000p+1f, SL, VT);
	SDValue K1 = DAG.getConstantFP(0x1.4f0978p-11f, SL, VT);

	SDValue Mul0 = DAG.getNode(ISD::FMUL, SL, VT, X, K0, Flags);
	SDValue Exp2_0 = DAG.getNode(Exp2Op, SL, VT, Mul0, Flags);
	SDValue Mul1 = DAG.getNode(ISD::FMUL, SL, VT, X, K1, Flags);
	SDValue Exp2_1 = DAG.getNode(Exp2Op, SL, VT, Mul1, Flags);
	return DAG.getNode(ISD::FMUL, SL, VT, Exp2_0, Exp2_1);
	}

	// bool s = x < -0x1.2f7030p+5f;
	// x += s ? 0x1.0p+5f : 0.0f;
	// exp10 = exp2(x * 0x1.a92000p+1f) *
	// exp2(x * 0x1.4f0978p-11f) *
	// (s ? 0x1.9f623ep-107f : 1.0f);

	EVT SetCCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);

	SDValue Threshold = DAG.getConstantFP(-0x1.2f7030p+5f, SL, VT);
	SDValue NeedsScaling = DAG.getSetCC(SL, SetCCVT, X, Threshold, ISD::SETOLT);

	SDValue ScaleOffset = DAG.getConstantFP(0x1.0p+5f, SL, VT);
	SDValue ScaledX = DAG.getNode(ISD::FADD, SL, VT, X, ScaleOffset, Flags);
	SDValue AdjustedX =
	DAG.getNode(ISD::SELECT, SL, VT, NeedsScaling, ScaledX, X);

	SDValue K0 = DAG.getConstantFP(0x1.a92000p+1f, SL, VT);
	SDValue K1 = DAG.getConstantFP(0x1.4f0978p-11f, SL, VT);

	SDValue Mul0 = DAG.getNode(ISD::FMUL, SL, VT, AdjustedX, K0, Flags);
	SDValue Exp2_0 = DAG.getNode(Exp2Op, SL, VT, Mul0, Flags);
	SDValue Mul1 = DAG.getNode(ISD::FMUL, SL, VT, AdjustedX, K1, Flags);
	SDValue Exp2_1 = DAG.getNode(Exp2Op, SL, VT, Mul1, Flags);

	SDValue MulExps = DAG.getNode(ISD::FMUL, SL, VT, Exp2_0, Exp2_1, Flags);

	SDValue ResultScaleFactor = DAG.getConstantFP(0x1.9f623ep-107f, SL, VT);
	SDValue AdjustedResult =
	DAG.getNode(ISD::FMUL, SL, VT, MulExps, ResultScaleFactor, Flags);

	return DAG.getNode(ISD::SELECT, SL, VT, NeedsScaling, AdjustedResult, MulExps,
	Flags);
	}

	SDValue AMDGPUTargetLowering::lowerFEXP(SDValue Op, SelectionDAG &DAG) const {
	EVT VT = Op.getValueType();
	SDLoc SL(Op);
	SDValue X = Op.getOperand(0);
	SDNodeFlags Flags = Op->getFlags();
	const bool IsExp10 = Op.getOpcode() == ISD::FEXP10;

	if (VT.getScalarType() == MVT::f16) {
	// v_exp_f16 (fmul x, log2e)
	if (allowApproxFunc(DAG, Flags)) // TODO: Does this really require fast?
	return lowerFEXPUnsafe(X, SL, DAG, Flags);

	if (VT.isVector())
	return SDValue();

	// exp(f16 x) ->
	// fptrunc (v_exp_f32 (fmul (fpext x), log2e))

	// Nothing in half is a denormal when promoted to f32.
	SDValue Ext = DAG.getNode(ISD::FP_EXTEND, SL, MVT::f32, X, Flags);
	SDValue Lowered = lowerFEXPUnsafe(Ext, SL, DAG, Flags);
	return DAG.getNode(ISD::FP_ROUND, SL, VT, Lowered,
	DAG.getTargetConstant(0, SL, MVT::i32), Flags);
	}

	assert(VT == MVT::f32);

	// TODO: Interpret allowApproxFunc as ignoring DAZ. This is currently copying
	// library behavior. Also, is known-not-daz source sufficient?
	if (allowApproxFunc(DAG, Flags)) {
	return IsExp10 ? lowerFEXP10Unsafe(X, SL, DAG, Flags)
	: lowerFEXPUnsafe(X, SL, DAG, Flags);
	}

	// Algorithm:
	//
	// e^x = 2^(x/ln(2)) = 2^(x*(64/ln(2))/64)
	//
	// x*(64/ln(2)) = n + f, \|f\| <= 0.5, n is integer
	// n = 64*m + j, 0 <= j < 64
	//
	// e^x = 2^((64*m + j + f)/64)
	// = (2^m) * (2^(j/64)) * 2^(f/64)
	// = (2^m) * (2^(j/64)) * e^(f*(ln(2)/64))
	//
	// f = x*(64/ln(2)) - n
	// r = f(ln(2)/64) = x - n(ln(2)/64)
	//
	// e^x = (2^m) * (2^(j/64)) * e^r
	//
	// (2^(j/64)) is precomputed
	//
	// e^r = 1 + r + (r^2)/2! + (r^3)/3! + (r^4)/4! + (r^5)/5!
	// e^r = 1 + q
	//
	// q = r + (r^2)/2! + (r^3)/3! + (r^4)/4! + (r^5)/5!
	//
	// e^x = (2^m) * ( (2^(j/64)) + q*(2^(j/64)) )
	SDNodeFlags FlagsNoContract = Flags;
	FlagsNoContract.setAllowContract(false);

	SDValue PH, PL;
	if (Subtarget->hasFastFMAF32()) {
	const float c_exp = numbers::log2ef;
	const float cc_exp = 0x1.4ae0bep-26f; // c+cc are 49 bits
	const float c_exp10 = 0x1.a934f0p+1f;
	const float cc_exp10 = 0x1.2f346ep-24f;

	SDValue C = DAG.getConstantFP(IsExp10 ? c_exp10 : c_exp, SL, VT);
	SDValue CC = DAG.getConstantFP(IsExp10 ? cc_exp10 : cc_exp, SL, VT);

	PH = DAG.getNode(ISD::FMUL, SL, VT, X, C, Flags);
	SDValue NegPH = DAG.getNode(ISD::FNEG, SL, VT, PH, Flags);
	SDValue FMA0 = DAG.getNode(ISD::FMA, SL, VT, X, C, NegPH, Flags);
	PL = DAG.getNode(ISD::FMA, SL, VT, X, CC, FMA0, Flags);
	} else {
	const float ch_exp = 0x1.714000p+0f;
	const float cl_exp = 0x1.47652ap-12f; // ch + cl are 36 bits

	const float ch_exp10 = 0x1.a92000p+1f;
	const float cl_exp10 = 0x1.4f0978p-11f;

	SDValue CH = DAG.getConstantFP(IsExp10 ? ch_exp10 : ch_exp, SL, VT);
	SDValue CL = DAG.getConstantFP(IsExp10 ? cl_exp10 : cl_exp, SL, VT);

	SDValue XAsInt = DAG.getNode(ISD::BITCAST, SL, MVT::i32, X);
	SDValue MaskConst = DAG.getConstant(0xfffff000, SL, MVT::i32);
	SDValue XHAsInt = DAG.getNode(ISD::AND, SL, MVT::i32, XAsInt, MaskConst);
	SDValue XH = DAG.getNode(ISD::BITCAST, SL, VT, XHAsInt);
	SDValue XL = DAG.getNode(ISD::FSUB, SL, VT, X, XH, Flags);

	PH = DAG.getNode(ISD::FMUL, SL, VT, XH, CH, Flags);

	SDValue XLCL = DAG.getNode(ISD::FMUL, SL, VT, XL, CL, Flags);
	SDValue Mad0 = getMad(DAG, SL, VT, XL, CH, XLCL, Flags);
	PL = getMad(DAG, SL, VT, XH, CL, Mad0, Flags);
	}

	SDValue E = DAG.getNode(ISD::FROUNDEVEN, SL, VT, PH, Flags);

	// It is unsafe to contract this fsub into the PH multiply.
	SDValue PHSubE = DAG.getNode(ISD::FSUB, SL, VT, PH, E, FlagsNoContract);

	SDValue A = DAG.getNode(ISD::FADD, SL, VT, PHSubE, PL, Flags);
	SDValue IntE = DAG.getNode(ISD::FP_TO_SINT, SL, MVT::i32, E);
	SDValue Exp2 = DAG.getNode(AMDGPUISD::EXP, SL, VT, A, Flags);

	SDValue R = DAG.getNode(ISD::FLDEXP, SL, VT, Exp2, IntE, Flags);

	SDValue UnderflowCheckConst =
	DAG.getConstantFP(IsExp10 ? -0x1.66d3e8p+5f : -0x1.9d1da0p+6f, SL, VT);

	EVT SetCCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
	SDValue Zero = DAG.getConstantFP(0.0, SL, VT);
	SDValue Underflow =
	DAG.getSetCC(SL, SetCCVT, X, UnderflowCheckConst, ISD::SETOLT);

	R = DAG.getNode(ISD::SELECT, SL, VT, Underflow, Zero, R);
	const auto &Options = getTargetMachine().Options;

	if (!Flags.hasNoInfs() && !Options.NoInfsFPMath) {
	SDValue OverflowCheckConst =
	DAG.getConstantFP(IsExp10 ? 0x1.344136p+5f : 0x1.62e430p+6f, SL, VT);
	SDValue Overflow =
	DAG.getSetCC(SL, SetCCVT, X, OverflowCheckConst, ISD::SETOGT);
	SDValue Inf =
	DAG.getConstantFP(APFloat::getInf(APFloat::IEEEsingle()), SL, VT);
	R = DAG.getNode(ISD::SELECT, SL, VT, Overflow, Inf, R);
	}

	return R;
	}

	static bool isCtlzOpc(unsigned Opc) {
	return Opc == ISD::CTLZ \|\| Opc == ISD::CTLZ_ZERO_UNDEF;
	}

	static bool isCttzOpc(unsigned Opc) {
	return Opc == ISD::CTTZ \|\| Opc == ISD::CTTZ_ZERO_UNDEF;
	}

	SDValue AMDGPUTargetLowering::lowerCTLZResults(SDValue Op,
	SelectionDAG &DAG) const {
	auto SL = SDLoc(Op);
	auto Opc = Op.getOpcode();
	auto Arg = Op.getOperand(0u);
	auto ResultVT = Op.getValueType();

	if (ResultVT != MVT::i8 && ResultVT != MVT::i16)
	return {};

	assert(isCtlzOpc(Opc));
	assert(ResultVT == Arg.getValueType());

	const uint64_t NumBits = ResultVT.getFixedSizeInBits();
	SDValue NumExtBits = DAG.getConstant(32u - NumBits, SL, MVT::i32);
	SDValue NewOp;

	if (Opc == ISD::CTLZ_ZERO_UNDEF) {
	NewOp = DAG.getNode(ISD::ANY_EXTEND, SL, MVT::i32, Arg);
	NewOp = DAG.getNode(ISD::SHL, SL, MVT::i32, NewOp, NumExtBits);
	NewOp = DAG.getNode(Opc, SL, MVT::i32, NewOp);
	} else {
	NewOp = DAG.getNode(ISD::ZERO_EXTEND, SL, MVT::i32, Arg);
	NewOp = DAG.getNode(Opc, SL, MVT::i32, NewOp);
	NewOp = DAG.getNode(ISD::SUB, SL, MVT::i32, NewOp, NumExtBits);
	}

	return DAG.getNode(ISD::TRUNCATE, SL, ResultVT, NewOp);
	}

	SDValue AMDGPUTargetLowering::LowerCTLZ_CTTZ(SDValue Op, SelectionDAG &DAG) const {
	SDLoc SL(Op);
	SDValue Src = Op.getOperand(0);

	assert(isCtlzOpc(Op.getOpcode()) \|\| isCttzOpc(Op.getOpcode()));
	bool Ctlz = isCtlzOpc(Op.getOpcode());
	unsigned NewOpc = Ctlz ? AMDGPUISD::FFBH_U32 : AMDGPUISD::FFBL_B32;

	bool ZeroUndef = Op.getOpcode() == ISD::CTLZ_ZERO_UNDEF \|\|
	Op.getOpcode() == ISD::CTTZ_ZERO_UNDEF;
	bool Is64BitScalar = !Src->isDivergent() && Src.getValueType() == MVT::i64;

	if (Src.getValueType() == MVT::i32 \|\| Is64BitScalar) {
	// (ctlz hi:lo) -> (umin (ffbh src), 32)
	// (cttz hi:lo) -> (umin (ffbl src), 32)
	// (ctlz_zero_undef src) -> (ffbh src)
	// (cttz_zero_undef src) -> (ffbl src)

	// 64-bit scalar version produce 32-bit result
	// (ctlz hi:lo) -> (umin (S_FLBIT_I32_B64 src), 64)
	// (cttz hi:lo) -> (umin (S_FF1_I32_B64 src), 64)
	// (ctlz_zero_undef src) -> (S_FLBIT_I32_B64 src)
	// (cttz_zero_undef src) -> (S_FF1_I32_B64 src)
	SDValue NewOpr = DAG.getNode(NewOpc, SL, MVT::i32, Src);
	if (!ZeroUndef) {
	const SDValue ConstVal = DAG.getConstant(
	Op.getValueType().getScalarSizeInBits(), SL, MVT::i32);
	NewOpr = DAG.getNode(ISD::UMIN, SL, MVT::i32, NewOpr, ConstVal);
	}
	return DAG.getNode(ISD::ZERO_EXTEND, SL, Src.getValueType(), NewOpr);
	}

	SDValue Lo, Hi;
	std::tie(Lo, Hi) = split64BitValue(Src, DAG);

	SDValue OprLo = DAG.getNode(NewOpc, SL, MVT::i32, Lo);
	SDValue OprHi = DAG.getNode(NewOpc, SL, MVT::i32, Hi);

	// (ctlz hi:lo) -> (umin3 (ffbh hi), (uaddsat (ffbh lo), 32), 64)
	// (cttz hi:lo) -> (umin3 (uaddsat (ffbl hi), 32), (ffbl lo), 64)
	// (ctlz_zero_undef hi:lo) -> (umin (ffbh hi), (add (ffbh lo), 32))
	// (cttz_zero_undef hi:lo) -> (umin (add (ffbl hi), 32), (ffbl lo))

	unsigned AddOpc = ZeroUndef ? ISD::ADD : ISD::UADDSAT;
	const SDValue Const32 = DAG.getConstant(32, SL, MVT::i32);
	if (Ctlz)
	OprLo = DAG.getNode(AddOpc, SL, MVT::i32, OprLo, Const32);
	else
	OprHi = DAG.getNode(AddOpc, SL, MVT::i32, OprHi, Const32);

	SDValue NewOpr;
	NewOpr = DAG.getNode(ISD::UMIN, SL, MVT::i32, OprLo, OprHi);
	if (!ZeroUndef) {
	const SDValue Const64 = DAG.getConstant(64, SL, MVT::i32);
	NewOpr = DAG.getNode(ISD::UMIN, SL, MVT::i32, NewOpr, Const64);
	}

	return DAG.getNode(ISD::ZERO_EXTEND, SL, MVT::i64, NewOpr);
	}

	SDValue AMDGPUTargetLowering::LowerINT_TO_FP32(SDValue Op, SelectionDAG &DAG,
	bool Signed) const {
	// The regular method converting a 64-bit integer to float roughly consists of
	// 2 steps: normalization and rounding. In fact, after normalization, the
	// conversion from a 64-bit integer to a float is essentially the same as the
	// one from a 32-bit integer. The only difference is that it has more
	// trailing bits to be rounded. To leverage the native 32-bit conversion, a
	// 64-bit integer could be preprocessed and fit into a 32-bit integer then
	// converted into the correct float number. The basic steps for the unsigned
	// conversion are illustrated in the following pseudo code:
	//
	// f32 uitofp(i64 u) {
	// i32 hi, lo = split(u);
	// // Only count the leading zeros in hi as we have native support of the
	// // conversion from i32 to f32. If hi is all 0s, the conversion is
	// // reduced to a 32-bit one automatically.
	// i32 shamt = clz(hi); // Return 32 if hi is all 0s.
	// u <<= shamt;
	// hi, lo = split(u);
	// hi \|= (lo != 0) ? 1 : 0; // Adjust rounding bit in hi based on lo.
	// // convert it as a 32-bit integer and scale the result back.
	// return uitofp(hi) * 2^(32 - shamt);
	// }
	//
	// The signed one follows the same principle but uses 'ffbh_i32' to count its
	// sign bits instead. If 'ffbh_i32' is not available, its absolute value is
	// converted instead followed by negation based its sign bit.

	SDLoc SL(Op);
	SDValue Src = Op.getOperand(0);

	SDValue Lo, Hi;
	std::tie(Lo, Hi) = split64BitValue(Src, DAG);
	SDValue Sign;
	SDValue ShAmt;
	if (Signed && Subtarget->isGCN()) {
	// We also need to consider the sign bit in Lo if Hi has just sign bits,
	// i.e. Hi is 0 or -1. However, that only needs to take the MSB into
	// account. That is, the maximal shift is
	// - 32 if Lo and Hi have opposite signs;
	// - 33 if Lo and Hi have the same sign.
	//
	// Or, MaxShAmt = 33 + OppositeSign, where
	//
	// OppositeSign is defined as ((Lo ^ Hi) >> 31), which is
	// - -1 if Lo and Hi have opposite signs; and
	// - 0 otherwise.
	//
	// All in all, ShAmt is calculated as
	//
	// umin(sffbh(Hi), 33 + (Lo^Hi)>>31) - 1.
	//
	// or
	//
	// umin(sffbh(Hi) - 1, 32 + (Lo^Hi)>>31).
	//
	// to reduce the critical path.
	SDValue OppositeSign = DAG.getNode(
	ISD::SRA, SL, MVT::i32, DAG.getNode(ISD::XOR, SL, MVT::i32, Lo, Hi),
	DAG.getConstant(31, SL, MVT::i32));
	SDValue MaxShAmt =
	DAG.getNode(ISD::ADD, SL, MVT::i32, DAG.getConstant(32, SL, MVT::i32),
	OppositeSign);
	// Count the leading sign bits.
	ShAmt = DAG.getNode(AMDGPUISD::FFBH_I32, SL, MVT::i32, Hi);
	// Different from unsigned conversion, the shift should be one bit less to
	// preserve the sign bit.
	ShAmt = DAG.getNode(ISD::SUB, SL, MVT::i32, ShAmt,
	DAG.getConstant(1, SL, MVT::i32));
	ShAmt = DAG.getNode(ISD::UMIN, SL, MVT::i32, ShAmt, MaxShAmt);
	} else {
	if (Signed) {
	// Without 'ffbh_i32', only leading zeros could be counted. Take the
	// absolute value first.
	Sign = DAG.getNode(ISD::SRA, SL, MVT::i64, Src,
	DAG.getConstant(63, SL, MVT::i64));
	SDValue Abs =
	DAG.getNode(ISD::XOR, SL, MVT::i64,
	DAG.getNode(ISD::ADD, SL, MVT::i64, Src, Sign), Sign);
	std::tie(Lo, Hi) = split64BitValue(Abs, DAG);
	}
	// Count the leading zeros.
	ShAmt = DAG.getNode(ISD::CTLZ, SL, MVT::i32, Hi);
	// The shift amount for signed integers is [0, 32].
	}
	// Normalize the given 64-bit integer.
	SDValue Norm = DAG.getNode(ISD::SHL, SL, MVT::i64, Src, ShAmt);
	// Split it again.
	std::tie(Lo, Hi) = split64BitValue(Norm, DAG);
	// Calculate the adjust bit for rounding.
	// (lo != 0) ? 1 : 0 => (lo >= 1) ? 1 : 0 => umin(1, lo)
	SDValue Adjust = DAG.getNode(ISD::UMIN, SL, MVT::i32,
	DAG.getConstant(1, SL, MVT::i32), Lo);
	// Get the 32-bit normalized integer.
	Norm = DAG.getNode(ISD::OR, SL, MVT::i32, Hi, Adjust);
	// Convert the normalized 32-bit integer into f32.
	unsigned Opc =
	(Signed && Subtarget->isGCN()) ? ISD::SINT_TO_FP : ISD::UINT_TO_FP;
	SDValue FVal = DAG.getNode(Opc, SL, MVT::f32, Norm);

	// Finally, need to scale back the converted floating number as the original
	// 64-bit integer is converted as a 32-bit one.
	ShAmt = DAG.getNode(ISD::SUB, SL, MVT::i32, DAG.getConstant(32, SL, MVT::i32),
	ShAmt);
	// On GCN, use LDEXP directly.
	if (Subtarget->isGCN())
	return DAG.getNode(ISD::FLDEXP, SL, MVT::f32, FVal, ShAmt);

	// Otherwise, align 'ShAmt' to the exponent part and add it into the exponent
	// part directly to emulate the multiplication of 2^ShAmt. That 8-bit
	// exponent is enough to avoid overflowing into the sign bit.
	SDValue Exp = DAG.getNode(ISD::SHL, SL, MVT::i32, ShAmt,
	DAG.getConstant(23, SL, MVT::i32));
	SDValue IVal =
	DAG.getNode(ISD::ADD, SL, MVT::i32,
	DAG.getNode(ISD::BITCAST, SL, MVT::i32, FVal), Exp);
	if (Signed) {
	// Set the sign bit.
	Sign = DAG.getNode(ISD::SHL, SL, MVT::i32,
	DAG.getNode(ISD::TRUNCATE, SL, MVT::i32, Sign),
	DAG.getConstant(31, SL, MVT::i32));
	IVal = DAG.getNode(ISD::OR, SL, MVT::i32, IVal, Sign);
	}
	return DAG.getNode(ISD::BITCAST, SL, MVT::f32, IVal);
	}

	SDValue AMDGPUTargetLowering::LowerINT_TO_FP64(SDValue Op, SelectionDAG &DAG,
	bool Signed) const {
	SDLoc SL(Op);
	SDValue Src = Op.getOperand(0);

	SDValue Lo, Hi;
	std::tie(Lo, Hi) = split64BitValue(Src, DAG);

	SDValue CvtHi = DAG.getNode(Signed ? ISD::SINT_TO_FP : ISD::UINT_TO_FP,
	SL, MVT::f64, Hi);

	SDValue CvtLo = DAG.getNode(ISD::UINT_TO_FP, SL, MVT::f64, Lo);

	SDValue LdExp = DAG.getNode(ISD::FLDEXP, SL, MVT::f64, CvtHi,
	DAG.getConstant(32, SL, MVT::i32));
	// TODO: Should this propagate fast-math-flags?
	return DAG.getNode(ISD::FADD, SL, MVT::f64, LdExp, CvtLo);
	}

	SDValue AMDGPUTargetLowering::LowerUINT_TO_FP(SDValue Op,
	SelectionDAG &DAG) const {
	// TODO: Factor out code common with LowerSINT_TO_FP.
	EVT DestVT = Op.getValueType();
	SDValue Src = Op.getOperand(0);
	EVT SrcVT = Src.getValueType();

	if (SrcVT == MVT::i16) {
	if (DestVT == MVT::f16)
	return Op;
	SDLoc DL(Op);

	// Promote src to i32
	SDValue Ext = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, Src);
	return DAG.getNode(ISD::UINT_TO_FP, DL, DestVT, Ext);
	}

	if (DestVT == MVT::bf16) {
	SDLoc SL(Op);
	SDValue ToF32 = DAG.getNode(ISD::UINT_TO_FP, SL, MVT::f32, Src);
	SDValue FPRoundFlag = DAG.getIntPtrConstant(0, SL, /isTarget=/true);
	return DAG.getNode(ISD::FP_ROUND, SL, MVT::bf16, ToF32, FPRoundFlag);
	}

	if (SrcVT != MVT::i64)
	return Op;

	if (Subtarget->has16BitInsts() && DestVT == MVT::f16) {
	SDLoc DL(Op);

	SDValue IntToFp32 = DAG.getNode(Op.getOpcode(), DL, MVT::f32, Src);
	SDValue FPRoundFlag =
	DAG.getIntPtrConstant(0, SDLoc(Op), /isTarget=/true);
	SDValue FPRound =
	DAG.getNode(ISD::FP_ROUND, DL, MVT::f16, IntToFp32, FPRoundFlag);

	return FPRound;
	}

	if (DestVT == MVT::f32)
	return LowerINT_TO_FP32(Op, DAG, false);

	assert(DestVT == MVT::f64);
	return LowerINT_TO_FP64(Op, DAG, false);
	}

	SDValue AMDGPUTargetLowering::LowerSINT_TO_FP(SDValue Op,
	SelectionDAG &DAG) const {
	EVT DestVT = Op.getValueType();

	SDValue Src = Op.getOperand(0);
	EVT SrcVT = Src.getValueType();

	if (SrcVT == MVT::i16) {
	if (DestVT == MVT::f16)
	return Op;

	SDLoc DL(Op);
	// Promote src to i32
	SDValue Ext = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i32, Src);
	return DAG.getNode(ISD::SINT_TO_FP, DL, DestVT, Ext);
	}

	if (DestVT == MVT::bf16) {
	SDLoc SL(Op);
	SDValue ToF32 = DAG.getNode(ISD::SINT_TO_FP, SL, MVT::f32, Src);
	SDValue FPRoundFlag = DAG.getIntPtrConstant(0, SL, /isTarget=/true);
	return DAG.getNode(ISD::FP_ROUND, SL, MVT::bf16, ToF32, FPRoundFlag);
	}

	if (SrcVT != MVT::i64)
	return Op;

	// TODO: Factor out code common with LowerUINT_TO_FP.

	if (Subtarget->has16BitInsts() && DestVT == MVT::f16) {
	SDLoc DL(Op);
	SDValue Src = Op.getOperand(0);

	SDValue IntToFp32 = DAG.getNode(Op.getOpcode(), DL, MVT::f32, Src);
	SDValue FPRoundFlag =
	DAG.getIntPtrConstant(0, SDLoc(Op), /isTarget=/true);
	SDValue FPRound =
	DAG.getNode(ISD::FP_ROUND, DL, MVT::f16, IntToFp32, FPRoundFlag);

	return FPRound;
	}

	if (DestVT == MVT::f32)
	return LowerINT_TO_FP32(Op, DAG, true);

	assert(DestVT == MVT::f64);
	return LowerINT_TO_FP64(Op, DAG, true);
	}

	SDValue AMDGPUTargetLowering::LowerFP_TO_INT64(SDValue Op, SelectionDAG &DAG,
	bool Signed) const {
	SDLoc SL(Op);

	SDValue Src = Op.getOperand(0);
	EVT SrcVT = Src.getValueType();

	assert(SrcVT == MVT::f32 \|\| SrcVT == MVT::f64);

	// The basic idea of converting a floating point number into a pair of 32-bit
	// integers is illustrated as follows:
	//
	// tf := trunc(val);
	// hif := floor(tf * 2^-32);
	// lof := tf - hif * 2^32; // lof is always positive due to floor.
	// hi := fptoi(hif);
	// lo := fptoi(lof);
	//
	SDValue Trunc = DAG.getNode(ISD::FTRUNC, SL, SrcVT, Src);
	SDValue Sign;
	if (Signed && SrcVT == MVT::f32) {
	// However, a 32-bit floating point number has only 23 bits mantissa and
	// it's not enough to hold all the significant bits of `lof` if val is
	// negative. To avoid the loss of precision, We need to take the absolute
	// value after truncating and flip the result back based on the original
	// signedness.
	Sign = DAG.getNode(ISD::SRA, SL, MVT::i32,
	DAG.getNode(ISD::BITCAST, SL, MVT::i32, Trunc),
	DAG.getConstant(31, SL, MVT::i32));
	Trunc = DAG.getNode(ISD::FABS, SL, SrcVT, Trunc);
	}

	SDValue K0, K1;
	if (SrcVT == MVT::f64) {
	K0 = DAG.getConstantFP(
	llvm::bit_cast<double>(UINT64_C(/2^-32/ 0x3df0000000000000)), SL,
	SrcVT);
	K1 = DAG.getConstantFP(
	llvm::bit_cast<double>(UINT64_C(/-2^32/ 0xc1f0000000000000)), SL,
	SrcVT);
	} else {
	K0 = DAG.getConstantFP(
	llvm::bit_cast<float>(UINT32_C(/2^-32/ 0x2f800000)), SL, SrcVT);
	K1 = DAG.getConstantFP(
	llvm::bit_cast<float>(UINT32_C(/-2^32/ 0xcf800000)), SL, SrcVT);
	}
	// TODO: Should this propagate fast-math-flags?
	SDValue Mul = DAG.getNode(ISD::FMUL, SL, SrcVT, Trunc, K0);

	SDValue FloorMul = DAG.getNode(ISD::FFLOOR, SL, SrcVT, Mul);

	SDValue Fma = DAG.getNode(ISD::FMA, SL, SrcVT, FloorMul, K1, Trunc);

	SDValue Hi = DAG.getNode((Signed && SrcVT == MVT::f64) ? ISD::FP_TO_SINT
	: ISD::FP_TO_UINT,
	SL, MVT::i32, FloorMul);
	SDValue Lo = DAG.getNode(ISD::FP_TO_UINT, SL, MVT::i32, Fma);

	SDValue Result = DAG.getNode(ISD::BITCAST, SL, MVT::i64,
	DAG.getBuildVector(MVT::v2i32, SL, {Lo, Hi}));

	if (Signed && SrcVT == MVT::f32) {
	assert(Sign);
	// Flip the result based on the signedness, which is either all 0s or 1s.
	Sign = DAG.getNode(ISD::BITCAST, SL, MVT::i64,
	DAG.getBuildVector(MVT::v2i32, SL, {Sign, Sign}));
	// r := xor(r, sign) - sign;
	Result =
	DAG.getNode(ISD::SUB, SL, MVT::i64,
	DAG.getNode(ISD::XOR, SL, MVT::i64, Result, Sign), Sign);
	}

	return Result;
	}

	SDValue AMDGPUTargetLowering::LowerFP_TO_FP16(SDValue Op, SelectionDAG &DAG) const {
	SDLoc DL(Op);
	SDValue N0 = Op.getOperand(0);

	// Convert to target node to get known bits
	if (N0.getValueType() == MVT::f32)
	return DAG.getNode(AMDGPUISD::FP_TO_FP16, DL, Op.getValueType(), N0);

	if (getTargetMachine().Options.UnsafeFPMath) {
	// There is a generic expand for FP_TO_FP16 with unsafe fast math.
	return SDValue();
	}

	assert(N0.getSimpleValueType() == MVT::f64);

	// f64 -> f16 conversion using round-to-nearest-even rounding mode.
	const unsigned ExpMask = 0x7ff;
	const unsigned ExpBiasf64 = 1023;
	const unsigned ExpBiasf16 = 15;
	SDValue Zero = DAG.getConstant(0, DL, MVT::i32);
	SDValue One = DAG.getConstant(1, DL, MVT::i32);
	SDValue U = DAG.getNode(ISD::BITCAST, DL, MVT::i64, N0);
	SDValue UH = DAG.getNode(ISD::SRL, DL, MVT::i64, U,
	DAG.getConstant(32, DL, MVT::i64));
	UH = DAG.getZExtOrTrunc(UH, DL, MVT::i32);
	U = DAG.getZExtOrTrunc(U, DL, MVT::i32);
	SDValue E = DAG.getNode(ISD::SRL, DL, MVT::i32, UH,
	DAG.getConstant(20, DL, MVT::i64));
	E = DAG.getNode(ISD::AND, DL, MVT::i32, E,
	DAG.getConstant(ExpMask, DL, MVT::i32));
	// Subtract the fp64 exponent bias (1023) to get the real exponent and
	// add the f16 bias (15) to get the biased exponent for the f16 format.
	E = DAG.getNode(ISD::ADD, DL, MVT::i32, E,
	DAG.getConstant(-ExpBiasf64 + ExpBiasf16, DL, MVT::i32));

	SDValue M = DAG.getNode(ISD::SRL, DL, MVT::i32, UH,
	DAG.getConstant(8, DL, MVT::i32));
	M = DAG.getNode(ISD::AND, DL, MVT::i32, M,
	DAG.getConstant(0xffe, DL, MVT::i32));

	SDValue MaskedSig = DAG.getNode(ISD::AND, DL, MVT::i32, UH,
	DAG.getConstant(0x1ff, DL, MVT::i32));
	MaskedSig = DAG.getNode(ISD::OR, DL, MVT::i32, MaskedSig, U);

	SDValue Lo40Set = DAG.getSelectCC(DL, MaskedSig, Zero, Zero, One, ISD::SETEQ);
	M = DAG.getNode(ISD::OR, DL, MVT::i32, M, Lo40Set);

	// (M != 0 ? 0x0200 : 0) \| 0x7c00;
	SDValue I = DAG.getNode(ISD::OR, DL, MVT::i32,
	DAG.getSelectCC(DL, M, Zero, DAG.getConstant(0x0200, DL, MVT::i32),
	Zero, ISD::SETNE), DAG.getConstant(0x7c00, DL, MVT::i32));

	// N = M \| (E << 12);
	SDValue N = DAG.getNode(ISD::OR, DL, MVT::i32, M,
	DAG.getNode(ISD::SHL, DL, MVT::i32, E,
	DAG.getConstant(12, DL, MVT::i32)));

	// B = clamp(1-E, 0, 13);
	SDValue OneSubExp = DAG.getNode(ISD::SUB, DL, MVT::i32,
	One, E);
	SDValue B = DAG.getNode(ISD::SMAX, DL, MVT::i32, OneSubExp, Zero);
	B = DAG.getNode(ISD::SMIN, DL, MVT::i32, B,
	DAG.getConstant(13, DL, MVT::i32));

	SDValue SigSetHigh = DAG.getNode(ISD::OR, DL, MVT::i32, M,
	DAG.getConstant(0x1000, DL, MVT::i32));

	SDValue D = DAG.getNode(ISD::SRL, DL, MVT::i32, SigSetHigh, B);
	SDValue D0 = DAG.getNode(ISD::SHL, DL, MVT::i32, D, B);
	SDValue D1 = DAG.getSelectCC(DL, D0, SigSetHigh, One, Zero, ISD::SETNE);
	D = DAG.getNode(ISD::OR, DL, MVT::i32, D, D1);

	SDValue V = DAG.getSelectCC(DL, E, One, D, N, ISD::SETLT);
	SDValue VLow3 = DAG.getNode(ISD::AND, DL, MVT::i32, V,
	DAG.getConstant(0x7, DL, MVT::i32));
	V = DAG.getNode(ISD::SRL, DL, MVT::i32, V,
	DAG.getConstant(2, DL, MVT::i32));
	SDValue V0 = DAG.getSelectCC(DL, VLow3, DAG.getConstant(3, DL, MVT::i32),
	One, Zero, ISD::SETEQ);
	SDValue V1 = DAG.getSelectCC(DL, VLow3, DAG.getConstant(5, DL, MVT::i32),
	One, Zero, ISD::SETGT);
	V1 = DAG.getNode(ISD::OR, DL, MVT::i32, V0, V1);
	V = DAG.getNode(ISD::ADD, DL, MVT::i32, V, V1);

	V = DAG.getSelectCC(DL, E, DAG.getConstant(30, DL, MVT::i32),
	DAG.getConstant(0x7c00, DL, MVT::i32), V, ISD::SETGT);
	V = DAG.getSelectCC(DL, E, DAG.getConstant(1039, DL, MVT::i32),
	I, V, ISD::SETEQ);

	// Extract the sign bit.
	SDValue Sign = DAG.getNode(ISD::SRL, DL, MVT::i32, UH,
	DAG.getConstant(16, DL, MVT::i32));
	Sign = DAG.getNode(ISD::AND, DL, MVT::i32, Sign,
	DAG.getConstant(0x8000, DL, MVT::i32));

	V = DAG.getNode(ISD::OR, DL, MVT::i32, Sign, V);
	return DAG.getZExtOrTrunc(V, DL, Op.getValueType());
	}

	SDValue AMDGPUTargetLowering::LowerFP_TO_INT(const SDValue Op,
	SelectionDAG &DAG) const {
	SDValue Src = Op.getOperand(0);
	unsigned OpOpcode = Op.getOpcode();
	EVT SrcVT = Src.getValueType();
	EVT DestVT = Op.getValueType();

	// Will be selected natively
	if (SrcVT == MVT::f16 && DestVT == MVT::i16)
	return Op;

	if (SrcVT == MVT::bf16) {
	SDLoc DL(Op);
	SDValue PromotedSrc = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Src);
	return DAG.getNode(Op.getOpcode(), DL, DestVT, PromotedSrc);
	}

	// Promote i16 to i32
	if (DestVT == MVT::i16 && (SrcVT == MVT::f32 \|\| SrcVT == MVT::f64)) {
	SDLoc DL(Op);

	SDValue FpToInt32 = DAG.getNode(OpOpcode, DL, MVT::i32, Src);
	return DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, FpToInt32);
	}

	if (DestVT != MVT::i64)
	return Op;

	if (SrcVT == MVT::f16 \|\|
	(SrcVT == MVT::f32 && Src.getOpcode() == ISD::FP16_TO_FP)) {
	SDLoc DL(Op);

	SDValue FpToInt32 = DAG.getNode(OpOpcode, DL, MVT::i32, Src);
	unsigned Ext =
	OpOpcode == ISD::FP_TO_SINT ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
	return DAG.getNode(Ext, DL, MVT::i64, FpToInt32);
	}

	if (SrcVT == MVT::f32 \|\| SrcVT == MVT::f64)
	return LowerFP_TO_INT64(Op, DAG, OpOpcode == ISD::FP_TO_SINT);

	return SDValue();
	}

	SDValue AMDGPUTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op,
	SelectionDAG &DAG) const {
	EVT ExtraVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
	MVT VT = Op.getSimpleValueType();
	MVT ScalarVT = VT.getScalarType();

	assert(VT.isVector());

	SDValue Src = Op.getOperand(0);
	SDLoc DL(Op);

	// TODO: Don't scalarize on Evergreen?
	unsigned NElts = VT.getVectorNumElements();
	SmallVector<SDValue, 8> Args;
	DAG.ExtractVectorElements(Src, Args, 0, NElts);

	SDValue VTOp = DAG.getValueType(ExtraVT.getScalarType());
	for (unsigned I = 0; I < NElts; ++I)
	Args[I] = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, ScalarVT, Args[I], VTOp);

	return DAG.getBuildVector(VT, DL, Args);
	}

	//===----------------------------------------------------------------------===//
	// Custom DAG optimizations
	//===----------------------------------------------------------------------===//

	static bool isU24(SDValue Op, SelectionDAG &DAG) {
	return AMDGPUTargetLowering::numBitsUnsigned(Op, DAG) <= 24;
	}

	static bool isI24(SDValue Op, SelectionDAG &DAG) {
	EVT VT = Op.getValueType();
	return VT.getSizeInBits() >= 24 && // Types less than 24-bit should be treated
	// as unsigned 24-bit values.
	AMDGPUTargetLowering::numBitsSigned(Op, DAG) <= 24;
	}

	static SDValue simplifyMul24(SDNode *Node24,
	TargetLowering::DAGCombinerInfo &DCI) {
	SelectionDAG &DAG = DCI.DAG;
	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
	bool IsIntrin = Node24->getOpcode() == ISD::INTRINSIC_WO_CHAIN;

	SDValue LHS = IsIntrin ? Node24->getOperand(1) : Node24->getOperand(0);
	SDValue RHS = IsIntrin ? Node24->getOperand(2) : Node24->getOperand(1);
	unsigned NewOpcode = Node24->getOpcode();
	if (IsIntrin) {
	unsigned IID = Node24->getConstantOperandVal(0);
	switch (IID) {
	case Intrinsic::amdgcn_mul_i24:
	NewOpcode = AMDGPUISD::MUL_I24;
	break;
	case Intrinsic::amdgcn_mul_u24:
	NewOpcode = AMDGPUISD::MUL_U24;
	break;
	case Intrinsic::amdgcn_mulhi_i24:
	NewOpcode = AMDGPUISD::MULHI_I24;
	break;
	case Intrinsic::amdgcn_mulhi_u24:
	NewOpcode = AMDGPUISD::MULHI_U24;
	break;
	default:
	llvm_unreachable("Expected 24-bit mul intrinsic");
	}
	}

	APInt Demanded = APInt::getLowBitsSet(LHS.getValueSizeInBits(), 24);

	// First try to simplify using SimplifyMultipleUseDemandedBits which allows
	// the operands to have other uses, but will only perform simplifications that
	// involve bypassing some nodes for this user.
	SDValue DemandedLHS = TLI.SimplifyMultipleUseDemandedBits(LHS, Demanded, DAG);
	SDValue DemandedRHS = TLI.SimplifyMultipleUseDemandedBits(RHS, Demanded, DAG);
	if (DemandedLHS \|\| DemandedRHS)
	return DAG.getNode(NewOpcode, SDLoc(Node24), Node24->getVTList(),
	DemandedLHS ? DemandedLHS : LHS,
	DemandedRHS ? DemandedRHS : RHS);

	// Now try SimplifyDemandedBits which can simplify the nodes used by our
	// operands if this node is the only user.
	if (TLI.SimplifyDemandedBits(LHS, Demanded, DCI))
	return SDValue(Node24, 0);
	if (TLI.SimplifyDemandedBits(RHS, Demanded, DCI))
	return SDValue(Node24, 0);

	return SDValue();
	}

	template <typename IntTy>
	static SDValue constantFoldBFE(SelectionDAG &DAG, IntTy Src0, uint32_t Offset,
	uint32_t Width, const SDLoc &DL) {
	if (Width + Offset < 32) {
	uint32_t Shl = static_cast<uint32_t>(Src0) << (32 - Offset - Width);
	IntTy Result = static_cast<IntTy>(Shl) >> (32 - Width);
	return DAG.getConstant(Result, DL, MVT::i32);
	}

	return DAG.getConstant(Src0 >> Offset, DL, MVT::i32);
	}

	static bool hasVolatileUser(SDNode *Val) {
	for (SDNode *U : Val->uses()) {
	if (MemSDNode *M = dyn_cast<MemSDNode>(U)) {
	if (M->isVolatile())
	return true;
	}
	}

	return false;
	}

	bool AMDGPUTargetLowering::shouldCombineMemoryType(EVT VT) const {
	// i32 vectors are the canonical memory type.
	if (VT.getScalarType() == MVT::i32 \|\| isTypeLegal(VT))
	return false;

	if (!VT.isByteSized())
	return false;

	unsigned Size = VT.getStoreSize();

	if ((Size == 1 \|\| Size == 2 \|\| Size == 4) && !VT.isVector())
	return false;

	if (Size == 3 \|\| (Size > 4 && (Size % 4 != 0)))
	return false;

	return true;
	}

	// Replace load of an illegal type with a store of a bitcast to a friendlier
	// type.
	SDValue AMDGPUTargetLowering::performLoadCombine(SDNode *N,
	DAGCombinerInfo &DCI) const {
	if (!DCI.isBeforeLegalize())
	return SDValue();

	LoadSDNode *LN = cast<LoadSDNode>(N);
	if (!LN->isSimple() \|\| !ISD::isNormalLoad(LN) \|\| hasVolatileUser(LN))
	return SDValue();

	SDLoc SL(N);
	SelectionDAG &DAG = DCI.DAG;
	EVT VT = LN->getMemoryVT();

	unsigned Size = VT.getStoreSize();
	Align Alignment = LN->getAlign();
	if (Alignment < Size && isTypeLegal(VT)) {
	unsigned IsFast;
	unsigned AS = LN->getAddressSpace();

	// Expand unaligned loads earlier than legalization. Due to visitation order
	// problems during legalization, the emitted instructions to pack and unpack
	// the bytes again are not eliminated in the case of an unaligned copy.
	if (!allowsMisalignedMemoryAccesses(
	VT, AS, Alignment, LN->getMemOperand()->getFlags(), &IsFast)) {
	if (VT.isVector())
	return SplitVectorLoad(SDValue(LN, 0), DAG);

	SDValue Ops[2];
	std::tie(Ops[0], Ops[1]) = expandUnalignedLoad(LN, DAG);

	return DAG.getMergeValues(Ops, SDLoc(N));
	}

	if (!IsFast)
	return SDValue();
	}

	if (!shouldCombineMemoryType(VT))
	return SDValue();

	EVT NewVT = getEquivalentMemType(*DAG.getContext(), VT);

	SDValue NewLoad
	= DAG.getLoad(NewVT, SL, LN->getChain(),
	LN->getBasePtr(), LN->getMemOperand());

	SDValue BC = DAG.getNode(ISD::BITCAST, SL, VT, NewLoad);
	DCI.CombineTo(N, BC, NewLoad.getValue(1));
	return SDValue(N, 0);
	}

	// Replace store of an illegal type with a store of a bitcast to a friendlier
	// type.
	SDValue AMDGPUTargetLowering::performStoreCombine(SDNode *N,
	DAGCombinerInfo &DCI) const {
	if (!DCI.isBeforeLegalize())
	return SDValue();

	StoreSDNode *SN = cast<StoreSDNode>(N);
	if (!SN->isSimple() \|\| !ISD::isNormalStore(SN))
	return SDValue();

	EVT VT = SN->getMemoryVT();
	unsigned Size = VT.getStoreSize();

	SDLoc SL(N);
	SelectionDAG &DAG = DCI.DAG;
	Align Alignment = SN->getAlign();
	if (Alignment < Size && isTypeLegal(VT)) {
	unsigned IsFast;
	unsigned AS = SN->getAddressSpace();

	// Expand unaligned stores earlier than legalization. Due to visitation
	// order problems during legalization, the emitted instructions to pack and
	// unpack the bytes again are not eliminated in the case of an unaligned
	// copy.
	if (!allowsMisalignedMemoryAccesses(
	VT, AS, Alignment, SN->getMemOperand()->getFlags(), &IsFast)) {
	if (VT.isVector())
	return SplitVectorStore(SDValue(SN, 0), DAG);

	return expandUnalignedStore(SN, DAG);
	}

	if (!IsFast)
	return SDValue();
	}

	if (!shouldCombineMemoryType(VT))
	return SDValue();

	EVT NewVT = getEquivalentMemType(*DAG.getContext(), VT);
	SDValue Val = SN->getValue();

	//DCI.AddToWorklist(Val.getNode());

	bool OtherUses = !Val.hasOneUse();
	SDValue CastVal = DAG.getNode(ISD::BITCAST, SL, NewVT, Val);
	if (OtherUses) {
	SDValue CastBack = DAG.getNode(ISD::BITCAST, SL, VT, CastVal);
	DAG.ReplaceAllUsesOfValueWith(Val, CastBack);
	}

	return DAG.getStore(SN->getChain(), SL, CastVal,
	SN->getBasePtr(), SN->getMemOperand());
	}

	// FIXME: This should go in generic DAG combiner with an isTruncateFree check,
	// but isTruncateFree is inaccurate for i16 now because of SALU vs. VALU
	// issues.
	SDValue AMDGPUTargetLowering::performAssertSZExtCombine(SDNode *N,
	DAGCombinerInfo &DCI) const {
	SelectionDAG &DAG = DCI.DAG;
	SDValue N0 = N->getOperand(0);

	// (vt2 (assertzext (truncate vt0:x), vt1)) ->
	// (vt2 (truncate (assertzext vt0:x, vt1)))
	if (N0.getOpcode() == ISD::TRUNCATE) {
	SDValue N1 = N->getOperand(1);
	EVT ExtVT = cast<VTSDNode>(N1)->getVT();
	SDLoc SL(N);

	SDValue Src = N0.getOperand(0);
	EVT SrcVT = Src.getValueType();
	if (SrcVT.bitsGE(ExtVT)) {
	SDValue NewInReg = DAG.getNode(N->getOpcode(), SL, SrcVT, Src, N1);
	return DAG.getNode(ISD::TRUNCATE, SL, N->getValueType(0), NewInReg);
	}
	}

	return SDValue();
	}

	SDValue AMDGPUTargetLowering::performIntrinsicWOChainCombine(
	SDNode *N, DAGCombinerInfo &DCI) const {
	unsigned IID = N->getConstantOperandVal(0);
	switch (IID) {
	case Intrinsic::amdgcn_mul_i24:
	case Intrinsic::amdgcn_mul_u24:
	case Intrinsic::amdgcn_mulhi_i24:
	case Intrinsic::amdgcn_mulhi_u24:
	return simplifyMul24(N, DCI);
	case Intrinsic::amdgcn_fract:
	case Intrinsic::amdgcn_rsq:
	case Intrinsic::amdgcn_rcp_legacy:
	case Intrinsic::amdgcn_rsq_legacy:
	case Intrinsic::amdgcn_rsq_clamp: {
	// FIXME: This is probably wrong. If src is an sNaN, it won't be quieted
	SDValue Src = N->getOperand(1);
	return Src.isUndef() ? Src : SDValue();
	}
	case Intrinsic::amdgcn_frexp_exp: {
	// frexp_exp (fneg x) -> frexp_exp x
	// frexp_exp (fabs x) -> frexp_exp x
	// frexp_exp (fneg (fabs x)) -> frexp_exp x
	SDValue Src = N->getOperand(1);
	SDValue PeekSign = peekFPSignOps(Src);
	if (PeekSign == Src)
	return SDValue();
	return SDValue(DCI.DAG.UpdateNodeOperands(N, N->getOperand(0), PeekSign),
	0);
	}
	default:
	return SDValue();
	}
	}

	/// Split the 64-bit value \p LHS into two 32-bit components, and perform the
	/// binary operation \p Opc to it with the corresponding constant operands.
	SDValue AMDGPUTargetLowering::splitBinaryBitConstantOpImpl(
	DAGCombinerInfo &DCI, const SDLoc &SL,
	unsigned Opc, SDValue LHS,
	uint32_t ValLo, uint32_t ValHi) const {
	SelectionDAG &DAG = DCI.DAG;
	SDValue Lo, Hi;
	std::tie(Lo, Hi) = split64BitValue(LHS, DAG);

	SDValue LoRHS = DAG.getConstant(ValLo, SL, MVT::i32);
	SDValue HiRHS = DAG.getConstant(ValHi, SL, MVT::i32);

	SDValue LoAnd = DAG.getNode(Opc, SL, MVT::i32, Lo, LoRHS);
	SDValue HiAnd = DAG.getNode(Opc, SL, MVT::i32, Hi, HiRHS);

	// Re-visit the ands. It's possible we eliminated one of them and it could
	// simplify the vector.
	DCI.AddToWorklist(Lo.getNode());
	DCI.AddToWorklist(Hi.getNode());

	SDValue Vec = DAG.getBuildVector(MVT::v2i32, SL, {LoAnd, HiAnd});
	return DAG.getNode(ISD::BITCAST, SL, MVT::i64, Vec);
	}

	SDValue AMDGPUTargetLowering::performShlCombine(SDNode *N,
	DAGCombinerInfo &DCI) const {
	EVT VT = N->getValueType(0);

	ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N->getOperand(1));
	if (!RHS)
	return SDValue();

	SDValue LHS = N->getOperand(0);
	unsigned RHSVal = RHS->getZExtValue();
	if (!RHSVal)
	return LHS;

	SDLoc SL(N);
	SelectionDAG &DAG = DCI.DAG;

	switch (LHS->getOpcode()) {
	default:
	break;
	case ISD::ZERO_EXTEND:
	case ISD::SIGN_EXTEND:
	case ISD::ANY_EXTEND: {
	SDValue X = LHS->getOperand(0);

	if (VT == MVT::i32 && RHSVal == 16 && X.getValueType() == MVT::i16 &&
	isOperationLegal(ISD::BUILD_VECTOR, MVT::v2i16)) {
	// Prefer build_vector as the canonical form if packed types are legal.
	// (shl ([asz]ext i16:x), 16 -> build_vector 0, x
	SDValue Vec = DAG.getBuildVector(MVT::v2i16, SL,
	{ DAG.getConstant(0, SL, MVT::i16), LHS->getOperand(0) });
	return DAG.getNode(ISD::BITCAST, SL, MVT::i32, Vec);
	}

	// shl (ext x) => zext (shl x), if shift does not overflow int
	if (VT != MVT::i64)
	break;
	KnownBits Known = DAG.computeKnownBits(X);
	unsigned LZ = Known.countMinLeadingZeros();
	if (LZ < RHSVal)
	break;
	EVT XVT = X.getValueType();
	SDValue Shl = DAG.getNode(ISD::SHL, SL, XVT, X, SDValue(RHS, 0));
	return DAG.getZExtOrTrunc(Shl, SL, VT);
	}
	}

	if (VT != MVT::i64)
	return SDValue();

	// i64 (shl x, C) -> (build_pair 0, (shl x, C -32))

	// On some subtargets, 64-bit shift is a quarter rate instruction. In the
	// common case, splitting this into a move and a 32-bit shift is faster and
	// the same code size.
	if (RHSVal < 32)
	return SDValue();

	SDValue ShiftAmt = DAG.getConstant(RHSVal - 32, SL, MVT::i32);

	SDValue Lo = DAG.getNode(ISD::TRUNCATE, SL, MVT::i32, LHS);
	SDValue NewShift = DAG.getNode(ISD::SHL, SL, MVT::i32, Lo, ShiftAmt);

	const SDValue Zero = DAG.getConstant(0, SL, MVT::i32);

	SDValue Vec = DAG.getBuildVector(MVT::v2i32, SL, {Zero, NewShift});
	return DAG.getNode(ISD::BITCAST, SL, MVT::i64, Vec);
	}

	SDValue AMDGPUTargetLowering::performSraCombine(SDNode *N,
	DAGCombinerInfo &DCI) const {
	if (N->getValueType(0) != MVT::i64)
	return SDValue();

	const ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N->getOperand(1));
	if (!RHS)
	return SDValue();

	SelectionDAG &DAG = DCI.DAG;
	SDLoc SL(N);
	unsigned RHSVal = RHS->getZExtValue();

	// (sra i64:x, 32) -> build_pair x, (sra hi_32(x), 31)
	if (RHSVal == 32) {
	SDValue Hi = getHiHalf64(N->getOperand(0), DAG);
	SDValue NewShift = DAG.getNode(ISD::SRA, SL, MVT::i32, Hi,
	DAG.getConstant(31, SL, MVT::i32));

	SDValue BuildVec = DAG.getBuildVector(MVT::v2i32, SL, {Hi, NewShift});
	return DAG.getNode(ISD::BITCAST, SL, MVT::i64, BuildVec);
	}

	// (sra i64:x, 63) -> build_pair (sra hi_32(x), 31), (sra hi_32(x), 31)
	if (RHSVal == 63) {
	SDValue Hi = getHiHalf64(N->getOperand(0), DAG);
	SDValue NewShift = DAG.getNode(ISD::SRA, SL, MVT::i32, Hi,
	DAG.getConstant(31, SL, MVT::i32));
	SDValue BuildVec = DAG.getBuildVector(MVT::v2i32, SL, {NewShift, NewShift});
	return DAG.getNode(ISD::BITCAST, SL, MVT::i64, BuildVec);
	}

	return SDValue();
	}

	SDValue AMDGPUTargetLowering::performSrlCombine(SDNode *N,
	DAGCombinerInfo &DCI) const {
	auto *RHS = dyn_cast<ConstantSDNode>(N->getOperand(1));
	if (!RHS)
	return SDValue();

	EVT VT = N->getValueType(0);
	SDValue LHS = N->getOperand(0);
	unsigned ShiftAmt = RHS->getZExtValue();
	SelectionDAG &DAG = DCI.DAG;
	SDLoc SL(N);

	// fold (srl (and x, c1 << c2), c2) -> (and (srl(x, c2), c1)
	// this improves the ability to match BFE patterns in isel.
	if (LHS.getOpcode() == ISD::AND) {
	if (auto *Mask = dyn_cast<ConstantSDNode>(LHS.getOperand(1))) {
	unsigned MaskIdx, MaskLen;
	if (Mask->getAPIntValue().isShiftedMask(MaskIdx, MaskLen) &&
	MaskIdx == ShiftAmt) {
	return DAG.getNode(
	ISD::AND, SL, VT,
	DAG.getNode(ISD::SRL, SL, VT, LHS.getOperand(0), N->getOperand(1)),
	DAG.getNode(ISD::SRL, SL, VT, LHS.getOperand(1), N->getOperand(1)));
	}
	}
	}

	if (VT != MVT::i64)
	return SDValue();

	if (ShiftAmt < 32)
	return SDValue();

	// srl i64:x, C for C >= 32
	// =>
	// build_pair (srl hi_32(x), C - 32), 0
	SDValue Zero = DAG.getConstant(0, SL, MVT::i32);

	SDValue Hi = getHiHalf64(LHS, DAG);

	SDValue NewConst = DAG.getConstant(ShiftAmt - 32, SL, MVT::i32);
	SDValue NewShift = DAG.getNode(ISD::SRL, SL, MVT::i32, Hi, NewConst);

	SDValue BuildPair = DAG.getBuildVector(MVT::v2i32, SL, {NewShift, Zero});

	return DAG.getNode(ISD::BITCAST, SL, MVT::i64, BuildPair);
	}

	SDValue AMDGPUTargetLowering::performTruncateCombine(
	SDNode *N, DAGCombinerInfo &DCI) const {
	SDLoc SL(N);
	SelectionDAG &DAG = DCI.DAG;
	EVT VT = N->getValueType(0);
	SDValue Src = N->getOperand(0);

	// vt1 (truncate (bitcast (build_vector vt0:x, ...))) -> vt1 (bitcast vt0:x)
	if (Src.getOpcode() == ISD::BITCAST && !VT.isVector()) {
	SDValue Vec = Src.getOperand(0);
	if (Vec.getOpcode() == ISD::BUILD_VECTOR) {
	SDValue Elt0 = Vec.getOperand(0);
	EVT EltVT = Elt0.getValueType();
	if (VT.getFixedSizeInBits() <= EltVT.getFixedSizeInBits()) {
	if (EltVT.isFloatingPoint()) {
	Elt0 = DAG.getNode(ISD::BITCAST, SL,
	EltVT.changeTypeToInteger(), Elt0);
	}

	return DAG.getNode(ISD::TRUNCATE, SL, VT, Elt0);
	}
	}
	}

	// Equivalent of above for accessing the high element of a vector as an
	// integer operation.
	// trunc (srl (bitcast (build_vector x, y))), 16 -> trunc (bitcast y)
	if (Src.getOpcode() == ISD::SRL && !VT.isVector()) {
	if (auto K = isConstOrConstSplat(Src.getOperand(1))) {
	if (2 * K->getZExtValue() == Src.getValueType().getScalarSizeInBits()) {
	SDValue BV = stripBitcast(Src.getOperand(0));
	if (BV.getOpcode() == ISD::BUILD_VECTOR &&
	BV.getValueType().getVectorNumElements() == 2) {
	SDValue SrcElt = BV.getOperand(1);
	EVT SrcEltVT = SrcElt.getValueType();
	if (SrcEltVT.isFloatingPoint()) {
	SrcElt = DAG.getNode(ISD::BITCAST, SL,
	SrcEltVT.changeTypeToInteger(), SrcElt);
	}

	return DAG.getNode(ISD::TRUNCATE, SL, VT, SrcElt);
	}
	}
	}
	}

	// Partially shrink 64-bit shifts to 32-bit if reduced to 16-bit.
	//
	// i16 (trunc (srl i64:x, K)), K <= 16 ->
	// i16 (trunc (srl (i32 (trunc x), K)))
	if (VT.getScalarSizeInBits() < 32) {
	EVT SrcVT = Src.getValueType();
	if (SrcVT.getScalarSizeInBits() > 32 &&
	(Src.getOpcode() == ISD::SRL \|\|
	Src.getOpcode() == ISD::SRA \|\|
	Src.getOpcode() == ISD::SHL)) {
	SDValue Amt = Src.getOperand(1);
	KnownBits Known = DAG.computeKnownBits(Amt);

	// - For left shifts, do the transform as long as the shift
	// amount is still legal for i32, so when ShiftAmt < 32 (<= 31)
	// - For right shift, do it if ShiftAmt <= (32 - Size) to avoid
	// losing information stored in the high bits when truncating.
	const unsigned MaxCstSize =
	(Src.getOpcode() == ISD::SHL) ? 31 : (32 - VT.getScalarSizeInBits());
	if (Known.getMaxValue().ule(MaxCstSize)) {
	EVT MidVT = VT.isVector() ?
	EVT::getVectorVT(*DAG.getContext(), MVT::i32,
	VT.getVectorNumElements()) : MVT::i32;

	EVT NewShiftVT = getShiftAmountTy(MidVT, DAG.getDataLayout());
	SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, MidVT,
	Src.getOperand(0));
	DCI.AddToWorklist(Trunc.getNode());

	if (Amt.getValueType() != NewShiftVT) {
	Amt = DAG.getZExtOrTrunc(Amt, SL, NewShiftVT);
	DCI.AddToWorklist(Amt.getNode());
	}

	SDValue ShrunkShift = DAG.getNode(Src.getOpcode(), SL, MidVT,
	Trunc, Amt);
	return DAG.getNode(ISD::TRUNCATE, SL, VT, ShrunkShift);
	}
	}
	}

	return SDValue();
	}

	// We need to specifically handle i64 mul here to avoid unnecessary conversion
	// instructions. If we only match on the legalized i64 mul expansion,
	// SimplifyDemandedBits will be unable to remove them because there will be
	// multiple uses due to the separate mul + mulh[su].
	static SDValue getMul24(SelectionDAG &DAG, const SDLoc &SL,
	SDValue N0, SDValue N1, unsigned Size, bool Signed) {
	if (Size <= 32) {
	unsigned MulOpc = Signed ? AMDGPUISD::MUL_I24 : AMDGPUISD::MUL_U24;
	return DAG.getNode(MulOpc, SL, MVT::i32, N0, N1);
	}

	unsigned MulLoOpc = Signed ? AMDGPUISD::MUL_I24 : AMDGPUISD::MUL_U24;
	unsigned MulHiOpc = Signed ? AMDGPUISD::MULHI_I24 : AMDGPUISD::MULHI_U24;

	SDValue MulLo = DAG.getNode(MulLoOpc, SL, MVT::i32, N0, N1);
	SDValue MulHi = DAG.getNode(MulHiOpc, SL, MVT::i32, N0, N1);

	return DAG.getNode(ISD::BUILD_PAIR, SL, MVT::i64, MulLo, MulHi);
	}

	/// If \p V is an add of a constant 1, returns the other operand. Otherwise
	/// return SDValue().
	static SDValue getAddOneOp(const SDNode *V) {
	if (V->getOpcode() != ISD::ADD)
	return SDValue();

	return isOneConstant(V->getOperand(1)) ? V->getOperand(0) : SDValue();
	}

	SDValue AMDGPUTargetLowering::performMulCombine(SDNode *N,
	DAGCombinerInfo &DCI) const {
	assert(N->getOpcode() == ISD::MUL);
	EVT VT = N->getValueType(0);

	// Don't generate 24-bit multiplies on values that are in SGPRs, since
	// we only have a 32-bit scalar multiply (avoid values being moved to VGPRs
	// unnecessarily). isDivergent() is used as an approximation of whether the
	// value is in an SGPR.
	if (!N->isDivergent())
	return SDValue();

	unsigned Size = VT.getSizeInBits();
	if (VT.isVector() \|\| Size > 64)
	return SDValue();

	SelectionDAG &DAG = DCI.DAG;
	SDLoc DL(N);

	SDValue N0 = N->getOperand(0);
	SDValue N1 = N->getOperand(1);

	// Undo InstCombine canonicalize X * (Y + 1) -> X * Y + X to enable mad
	// matching.

	// mul x, (add y, 1) -> add (mul x, y), x
	auto IsFoldableAdd = [](SDValue V) -> SDValue {
	SDValue AddOp = getAddOneOp(V.getNode());
	if (!AddOp)
	return SDValue();

	if (V.hasOneUse() \|\| all_of(V->uses(), [](const SDNode *U) -> bool {
	return U->getOpcode() == ISD::MUL;
	}))
	return AddOp;

	return SDValue();
	};

	// FIXME: The selection pattern is not properly checking for commuted
	// operands, so we have to place the mul in the LHS
	if (SDValue MulOper = IsFoldableAdd(N0)) {
	SDValue MulVal = DAG.getNode(N->getOpcode(), DL, VT, N1, MulOper);
	return DAG.getNode(ISD::ADD, DL, VT, MulVal, N1);
	}

	if (SDValue MulOper = IsFoldableAdd(N1)) {
	SDValue MulVal = DAG.getNode(N->getOpcode(), DL, VT, N0, MulOper);
	return DAG.getNode(ISD::ADD, DL, VT, MulVal, N0);
	}

	// There are i16 integer mul/mad.
	if (Subtarget->has16BitInsts() && VT.getScalarType().bitsLE(MVT::i16))
	return SDValue();

	// SimplifyDemandedBits has the annoying habit of turning useful zero_extends
	// in the source into any_extends if the result of the mul is truncated. Since
	// we can assume the high bits are whatever we want, use the underlying value
	// to avoid the unknown high bits from interfering.
	if (N0.getOpcode() == ISD::ANY_EXTEND)
	N0 = N0.getOperand(0);

	if (N1.getOpcode() == ISD::ANY_EXTEND)
	N1 = N1.getOperand(0);

	SDValue Mul;

	if (Subtarget->hasMulU24() && isU24(N0, DAG) && isU24(N1, DAG)) {
	N0 = DAG.getZExtOrTrunc(N0, DL, MVT::i32);
	N1 = DAG.getZExtOrTrunc(N1, DL, MVT::i32);
	Mul = getMul24(DAG, DL, N0, N1, Size, false);
	} else if (Subtarget->hasMulI24() && isI24(N0, DAG) && isI24(N1, DAG)) {
	N0 = DAG.getSExtOrTrunc(N0, DL, MVT::i32);
	N1 = DAG.getSExtOrTrunc(N1, DL, MVT::i32);
	Mul = getMul24(DAG, DL, N0, N1, Size, true);
	} else {
	return SDValue();
	}

	// We need to use sext even for MUL_U24, because MUL_U24 is used
	// for signed multiply of 8 and 16-bit types.
	return DAG.getSExtOrTrunc(Mul, DL, VT);
	}

	SDValue
	AMDGPUTargetLowering::performMulLoHiCombine(SDNode *N,
	DAGCombinerInfo &DCI) const {
	if (N->getValueType(0) != MVT::i32)
	return SDValue();

	SelectionDAG &DAG = DCI.DAG;
	SDLoc DL(N);

	+ bool Signed = N->getOpcode() == ISD::SMUL_LOHI;
	SDValue N0 = N->getOperand(0);
	SDValue N1 = N->getOperand(1);

	// SimplifyDemandedBits has the annoying habit of turning useful zero_extends
	// in the source into any_extends if the result of the mul is truncated. Since
	// we can assume the high bits are whatever we want, use the underlying value
	// to avoid the unknown high bits from interfering.
	if (N0.getOpcode() == ISD::ANY_EXTEND)
	N0 = N0.getOperand(0);
	if (N1.getOpcode() == ISD::ANY_EXTEND)
	N1 = N1.getOperand(0);

	// Try to use two fast 24-bit multiplies (one for each half of the result)
	// instead of one slow extending multiply.
	- unsigned LoOpcode, HiOpcode;
	- if (Subtarget->hasMulU24() && isU24(N0, DAG) && isU24(N1, DAG)) {
	- N0 = DAG.getZExtOrTrunc(N0, DL, MVT::i32);
	- N1 = DAG.getZExtOrTrunc(N1, DL, MVT::i32);
	- LoOpcode = AMDGPUISD::MUL_U24;
	- HiOpcode = AMDGPUISD::MULHI_U24;
	- } else if (Subtarget->hasMulI24() && isI24(N0, DAG) && isI24(N1, DAG)) {
	- N0 = DAG.getSExtOrTrunc(N0, DL, MVT::i32);
	- N1 = DAG.getSExtOrTrunc(N1, DL, MVT::i32);
	- LoOpcode = AMDGPUISD::MUL_I24;
	- HiOpcode = AMDGPUISD::MULHI_I24;
	+ unsigned LoOpcode = 0;
	+ unsigned HiOpcode = 0;
	+ if (Signed) {
	+ if (Subtarget->hasMulI24() && isI24(N0, DAG) && isI24(N1, DAG)) {
	+ N0 = DAG.getSExtOrTrunc(N0, DL, MVT::i32);
	+ N1 = DAG.getSExtOrTrunc(N1, DL, MVT::i32);
	+ LoOpcode = AMDGPUISD::MUL_I24;
	+ HiOpcode = AMDGPUISD::MULHI_I24;
	+ }
	} else {
	- return SDValue();
	+ if (Subtarget->hasMulU24() && isU24(N0, DAG) && isU24(N1, DAG)) {
	+ N0 = DAG.getZExtOrTrunc(N0, DL, MVT::i32);
	+ N1 = DAG.getZExtOrTrunc(N1, DL, MVT::i32);
	+ LoOpcode = AMDGPUISD::MUL_U24;
	+ HiOpcode = AMDGPUISD::MULHI_U24;
	+ }
	}
	+ if (!LoOpcode)
	+ return SDValue();

	SDValue Lo = DAG.getNode(LoOpcode, DL, MVT::i32, N0, N1);
	SDValue Hi = DAG.getNode(HiOpcode, DL, MVT::i32, N0, N1);
	DCI.CombineTo(N, Lo, Hi);
	return SDValue(N, 0);
	}

	SDValue AMDGPUTargetLowering::performMulhsCombine(SDNode *N,
	DAGCombinerInfo &DCI) const {
	EVT VT = N->getValueType(0);

	if (!Subtarget->hasMulI24() \|\| VT.isVector())
	return SDValue();

	// Don't generate 24-bit multiplies on values that are in SGPRs, since
	// we only have a 32-bit scalar multiply (avoid values being moved to VGPRs
	// unnecessarily). isDivergent() is used as an approximation of whether the
	// value is in an SGPR.
	// This doesn't apply if no s_mul_hi is available (since we'll end up with a
	// valu op anyway)
	if (Subtarget->hasSMulHi() && !N->isDivergent())
	return SDValue();

	SelectionDAG &DAG = DCI.DAG;
	SDLoc DL(N);

	SDValue N0 = N->getOperand(0);
	SDValue N1 = N->getOperand(1);

	if (!isI24(N0, DAG) \|\| !isI24(N1, DAG))
	return SDValue();

	N0 = DAG.getSExtOrTrunc(N0, DL, MVT::i32);
	N1 = DAG.getSExtOrTrunc(N1, DL, MVT::i32);

	SDValue Mulhi = DAG.getNode(AMDGPUISD::MULHI_I24, DL, MVT::i32, N0, N1);
	DCI.AddToWorklist(Mulhi.getNode());
	return DAG.getSExtOrTrunc(Mulhi, DL, VT);
	}

	SDValue AMDGPUTargetLowering::performMulhuCombine(SDNode *N,
	DAGCombinerInfo &DCI) const {
	EVT VT = N->getValueType(0);

	if (!Subtarget->hasMulU24() \|\| VT.isVector() \|\| VT.getSizeInBits() > 32)
	return SDValue();

	// Don't generate 24-bit multiplies on values that are in SGPRs, since
	// we only have a 32-bit scalar multiply (avoid values being moved to VGPRs
	// unnecessarily). isDivergent() is used as an approximation of whether the
	// value is in an SGPR.
	// This doesn't apply if no s_mul_hi is available (since we'll end up with a
	// valu op anyway)
	if (Subtarget->hasSMulHi() && !N->isDivergent())
	return SDValue();

	SelectionDAG &DAG = DCI.DAG;
	SDLoc DL(N);

	SDValue N0 = N->getOperand(0);
	SDValue N1 = N->getOperand(1);

	if (!isU24(N0, DAG) \|\| !isU24(N1, DAG))
	return SDValue();

	N0 = DAG.getZExtOrTrunc(N0, DL, MVT::i32);
	N1 = DAG.getZExtOrTrunc(N1, DL, MVT::i32);

	SDValue Mulhi = DAG.getNode(AMDGPUISD::MULHI_U24, DL, MVT::i32, N0, N1);
	DCI.AddToWorklist(Mulhi.getNode());
	return DAG.getZExtOrTrunc(Mulhi, DL, VT);
	}

	SDValue AMDGPUTargetLowering::getFFBX_U32(SelectionDAG &DAG,
	SDValue Op,
	const SDLoc &DL,
	unsigned Opc) const {
	EVT VT = Op.getValueType();
	EVT LegalVT = getTypeToTransformTo(*DAG.getContext(), VT);
	if (LegalVT != MVT::i32 && (Subtarget->has16BitInsts() &&
	LegalVT != MVT::i16))
	return SDValue();

	if (VT != MVT::i32)
	Op = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, Op);

	SDValue FFBX = DAG.getNode(Opc, DL, MVT::i32, Op);
	if (VT != MVT::i32)
	FFBX = DAG.getNode(ISD::TRUNCATE, DL, VT, FFBX);

	return FFBX;
	}

	// The native instructions return -1 on 0 input. Optimize out a select that
	// produces -1 on 0.
	//
	// TODO: If zero is not undef, we could also do this if the output is compared
	// against the bitwidth.
	//
	// TODO: Should probably combine against FFBH_U32 instead of ctlz directly.
	SDValue AMDGPUTargetLowering::performCtlz_CttzCombine(const SDLoc &SL, SDValue Cond,
	SDValue LHS, SDValue RHS,
	DAGCombinerInfo &DCI) const {
	if (!isNullConstant(Cond.getOperand(1)))
	return SDValue();

	SelectionDAG &DAG = DCI.DAG;
	ISD::CondCode CCOpcode = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
	SDValue CmpLHS = Cond.getOperand(0);

	// select (setcc x, 0, eq), -1, (ctlz_zero_undef x) -> ffbh_u32 x
	// select (setcc x, 0, eq), -1, (cttz_zero_undef x) -> ffbl_u32 x
	if (CCOpcode == ISD::SETEQ &&
	(isCtlzOpc(RHS.getOpcode()) \|\| isCttzOpc(RHS.getOpcode())) &&
	RHS.getOperand(0) == CmpLHS && isAllOnesConstant(LHS)) {
	unsigned Opc =
	isCttzOpc(RHS.getOpcode()) ? AMDGPUISD::FFBL_B32 : AMDGPUISD::FFBH_U32;
	return getFFBX_U32(DAG, CmpLHS, SL, Opc);
	}

	// select (setcc x, 0, ne), (ctlz_zero_undef x), -1 -> ffbh_u32 x
	// select (setcc x, 0, ne), (cttz_zero_undef x), -1 -> ffbl_u32 x
	if (CCOpcode == ISD::SETNE &&
	(isCtlzOpc(LHS.getOpcode()) \|\| isCttzOpc(LHS.getOpcode())) &&
	LHS.getOperand(0) == CmpLHS && isAllOnesConstant(RHS)) {
	unsigned Opc =
	isCttzOpc(LHS.getOpcode()) ? AMDGPUISD::FFBL_B32 : AMDGPUISD::FFBH_U32;

	return getFFBX_U32(DAG, CmpLHS, SL, Opc);
	}

	return SDValue();
	}

	static SDValue distributeOpThroughSelect(TargetLowering::DAGCombinerInfo &DCI,
	unsigned Op,
	const SDLoc &SL,
	SDValue Cond,
	SDValue N1,
	SDValue N2) {
	SelectionDAG &DAG = DCI.DAG;
	EVT VT = N1.getValueType();

	SDValue NewSelect = DAG.getNode(ISD::SELECT, SL, VT, Cond,
	N1.getOperand(0), N2.getOperand(0));
	DCI.AddToWorklist(NewSelect.getNode());
	return DAG.getNode(Op, SL, VT, NewSelect);
	}

	// Pull a free FP operation out of a select so it may fold into uses.
	//
	// select c, (fneg x), (fneg y) -> fneg (select c, x, y)
	// select c, (fneg x), k -> fneg (select c, x, (fneg k))
	//
	// select c, (fabs x), (fabs y) -> fabs (select c, x, y)
	// select c, (fabs x), +k -> fabs (select c, x, k)
	SDValue
	AMDGPUTargetLowering::foldFreeOpFromSelect(TargetLowering::DAGCombinerInfo &DCI,
	SDValue N) const {
	SelectionDAG &DAG = DCI.DAG;
	SDValue Cond = N.getOperand(0);
	SDValue LHS = N.getOperand(1);
	SDValue RHS = N.getOperand(2);

	EVT VT = N.getValueType();
	if ((LHS.getOpcode() == ISD::FABS && RHS.getOpcode() == ISD::FABS) \|\|
	(LHS.getOpcode() == ISD::FNEG && RHS.getOpcode() == ISD::FNEG)) {
	if (!AMDGPUTargetLowering::allUsesHaveSourceMods(N.getNode()))
	return SDValue();

	return distributeOpThroughSelect(DCI, LHS.getOpcode(),
	SDLoc(N), Cond, LHS, RHS);
	}

	bool Inv = false;
	if (RHS.getOpcode() == ISD::FABS \|\| RHS.getOpcode() == ISD::FNEG) {
	std::swap(LHS, RHS);
	Inv = true;
	}

	// TODO: Support vector constants.
	ConstantFPSDNode *CRHS = dyn_cast<ConstantFPSDNode>(RHS);
	if ((LHS.getOpcode() == ISD::FNEG \|\| LHS.getOpcode() == ISD::FABS) && CRHS &&
	!selectSupportsSourceMods(N.getNode())) {
	SDLoc SL(N);
	// If one side is an fneg/fabs and the other is a constant, we can push the
	// fneg/fabs down. If it's an fabs, the constant needs to be non-negative.
	SDValue NewLHS = LHS.getOperand(0);
	SDValue NewRHS = RHS;

	// Careful: if the neg can be folded up, don't try to pull it back down.
	bool ShouldFoldNeg = true;

	if (NewLHS.hasOneUse()) {
	unsigned Opc = NewLHS.getOpcode();
	if (LHS.getOpcode() == ISD::FNEG && fnegFoldsIntoOp(NewLHS.getNode()))
	ShouldFoldNeg = false;
	if (LHS.getOpcode() == ISD::FABS && Opc == ISD::FMUL)
	ShouldFoldNeg = false;
	}

	if (ShouldFoldNeg) {
	if (LHS.getOpcode() == ISD::FABS && CRHS->isNegative())
	return SDValue();

	// We're going to be forced to use a source modifier anyway, there's no
	// point to pulling the negate out unless we can get a size reduction by
	// negating the constant.
	//
	// TODO: Generalize to use getCheaperNegatedExpression which doesn't know
	// about cheaper constants.
	if (NewLHS.getOpcode() == ISD::FABS &&
	getConstantNegateCost(CRHS) != NegatibleCost::Cheaper)
	return SDValue();

	if (!AMDGPUTargetLowering::allUsesHaveSourceMods(N.getNode()))
	return SDValue();

	if (LHS.getOpcode() == ISD::FNEG)
	NewRHS = DAG.getNode(ISD::FNEG, SL, VT, RHS);

	if (Inv)
	std::swap(NewLHS, NewRHS);

	SDValue NewSelect = DAG.getNode(ISD::SELECT, SL, VT,
	Cond, NewLHS, NewRHS);
	DCI.AddToWorklist(NewSelect.getNode());
	return DAG.getNode(LHS.getOpcode(), SL, VT, NewSelect);
	}
	}

	return SDValue();
	}

	SDValue AMDGPUTargetLowering::performSelectCombine(SDNode *N,
	DAGCombinerInfo &DCI) const {
	if (SDValue Folded = foldFreeOpFromSelect(DCI, SDValue(N, 0)))
	return Folded;

	SDValue Cond = N->getOperand(0);
	if (Cond.getOpcode() != ISD::SETCC)
	return SDValue();

	EVT VT = N->getValueType(0);
	SDValue LHS = Cond.getOperand(0);
	SDValue RHS = Cond.getOperand(1);
	SDValue CC = Cond.getOperand(2);

	SDValue True = N->getOperand(1);
	SDValue False = N->getOperand(2);

	if (Cond.hasOneUse()) { // TODO: Look for multiple select uses.
	SelectionDAG &DAG = DCI.DAG;
	if (DAG.isConstantValueOfAnyType(True) &&
	!DAG.isConstantValueOfAnyType(False)) {
	// Swap cmp + select pair to move constant to false input.
	// This will allow using VOPC cndmasks more often.
	// select (setcc x, y), k, x -> select (setccinv x, y), x, k

	SDLoc SL(N);
	ISD::CondCode NewCC =
	getSetCCInverse(cast<CondCodeSDNode>(CC)->get(), LHS.getValueType());

	SDValue NewCond = DAG.getSetCC(SL, Cond.getValueType(), LHS, RHS, NewCC);
	return DAG.getNode(ISD::SELECT, SL, VT, NewCond, False, True);
	}

	if (VT == MVT::f32 && Subtarget->hasFminFmaxLegacy()) {
	SDValue MinMax
	= combineFMinMaxLegacy(SDLoc(N), VT, LHS, RHS, True, False, CC, DCI);
	// Revisit this node so we can catch min3/max3/med3 patterns.
	//DCI.AddToWorklist(MinMax.getNode());
	return MinMax;
	}
	}

	// There's no reason to not do this if the condition has other uses.
	return performCtlz_CttzCombine(SDLoc(N), Cond, True, False, DCI);
	}

	static bool isInv2Pi(const APFloat &APF) {
	static const APFloat KF16(APFloat::IEEEhalf(), APInt(16, 0x3118));
	static const APFloat KF32(APFloat::IEEEsingle(), APInt(32, 0x3e22f983));
	static const APFloat KF64(APFloat::IEEEdouble(), APInt(64, 0x3fc45f306dc9c882));

	return APF.bitwiseIsEqual(KF16) \|\|
	APF.bitwiseIsEqual(KF32) \|\|
	APF.bitwiseIsEqual(KF64);
	}

	// 0 and 1.0 / (0.5 * pi) do not have inline immmediates, so there is an
	// additional cost to negate them.
	TargetLowering::NegatibleCost
	AMDGPUTargetLowering::getConstantNegateCost(const ConstantFPSDNode *C) const {
	if (C->isZero())
	return C->isNegative() ? NegatibleCost::Cheaper : NegatibleCost::Expensive;

	if (Subtarget->hasInv2PiInlineImm() && isInv2Pi(C->getValueAPF()))
	return C->isNegative() ? NegatibleCost::Cheaper : NegatibleCost::Expensive;

	return NegatibleCost::Neutral;
	}

	bool AMDGPUTargetLowering::isConstantCostlierToNegate(SDValue N) const {
	if (const ConstantFPSDNode *C = isConstOrConstSplatFP(N))
	return getConstantNegateCost(C) == NegatibleCost::Expensive;
	return false;
	}

	bool AMDGPUTargetLowering::isConstantCheaperToNegate(SDValue N) const {
	if (const ConstantFPSDNode *C = isConstOrConstSplatFP(N))
	return getConstantNegateCost(C) == NegatibleCost::Cheaper;
	return false;
	}

	static unsigned inverseMinMax(unsigned Opc) {
	switch (Opc) {
	case ISD::FMAXNUM:
	return ISD::FMINNUM;
	case ISD::FMINNUM:
	return ISD::FMAXNUM;
	case ISD::FMAXNUM_IEEE:
	return ISD::FMINNUM_IEEE;
	case ISD::FMINNUM_IEEE:
	return ISD::FMAXNUM_IEEE;
	case ISD::FMAXIMUM:
	return ISD::FMINIMUM;
	case ISD::FMINIMUM:
	return ISD::FMAXIMUM;
	case AMDGPUISD::FMAX_LEGACY:
	return AMDGPUISD::FMIN_LEGACY;
	case AMDGPUISD::FMIN_LEGACY:
	return AMDGPUISD::FMAX_LEGACY;
	default:
	llvm_unreachable("invalid min/max opcode");
	}
	}

	/// \return true if it's profitable to try to push an fneg into its source
	/// instruction.
	bool AMDGPUTargetLowering::shouldFoldFNegIntoSrc(SDNode *N, SDValue N0) {
	// If the input has multiple uses and we can either fold the negate down, or
	// the other uses cannot, give up. This both prevents unprofitable
	// transformations and infinite loops: we won't repeatedly try to fold around
	// a negate that has no 'good' form.
	if (N0.hasOneUse()) {
	// This may be able to fold into the source, but at a code size cost. Don't
	// fold if the fold into the user is free.
	if (allUsesHaveSourceMods(N, 0))
	return false;
	} else {
	if (fnegFoldsIntoOp(N0.getNode()) &&
	(allUsesHaveSourceMods(N) \|\| !allUsesHaveSourceMods(N0.getNode())))
	return false;
	}

	return true;
	}

	SDValue AMDGPUTargetLowering::performFNegCombine(SDNode *N,
	DAGCombinerInfo &DCI) const {
	SelectionDAG &DAG = DCI.DAG;
	SDValue N0 = N->getOperand(0);
	EVT VT = N->getValueType(0);

	unsigned Opc = N0.getOpcode();

	if (!shouldFoldFNegIntoSrc(N, N0))
	return SDValue();

	SDLoc SL(N);
	switch (Opc) {
	case ISD::FADD: {
	if (!mayIgnoreSignedZero(N0))
	return SDValue();

	// (fneg (fadd x, y)) -> (fadd (fneg x), (fneg y))
	SDValue LHS = N0.getOperand(0);
	SDValue RHS = N0.getOperand(1);

	if (LHS.getOpcode() != ISD::FNEG)
	LHS = DAG.getNode(ISD::FNEG, SL, VT, LHS);
	else
	LHS = LHS.getOperand(0);

	if (RHS.getOpcode() != ISD::FNEG)
	RHS = DAG.getNode(ISD::FNEG, SL, VT, RHS);
	else
	RHS = RHS.getOperand(0);

	SDValue Res = DAG.getNode(ISD::FADD, SL, VT, LHS, RHS, N0->getFlags());
	if (Res.getOpcode() != ISD::FADD)
	return SDValue(); // Op got folded away.
	if (!N0.hasOneUse())
	DAG.ReplaceAllUsesWith(N0, DAG.getNode(ISD::FNEG, SL, VT, Res));
	return Res;
	}
	case ISD::FMUL:
	case AMDGPUISD::FMUL_LEGACY: {
	// (fneg (fmul x, y)) -> (fmul x, (fneg y))
	// (fneg (fmul_legacy x, y)) -> (fmul_legacy x, (fneg y))
	SDValue LHS = N0.getOperand(0);
	SDValue RHS = N0.getOperand(1);

	if (LHS.getOpcode() == ISD::FNEG)
	LHS = LHS.getOperand(0);
	else if (RHS.getOpcode() == ISD::FNEG)
	RHS = RHS.getOperand(0);
	else
	RHS = DAG.getNode(ISD::FNEG, SL, VT, RHS);

	SDValue Res = DAG.getNode(Opc, SL, VT, LHS, RHS, N0->getFlags());
	if (Res.getOpcode() != Opc)
	return SDValue(); // Op got folded away.
	if (!N0.hasOneUse())
	DAG.ReplaceAllUsesWith(N0, DAG.getNode(ISD::FNEG, SL, VT, Res));
	return Res;
	}
	case ISD::FMA:
	case ISD::FMAD: {
	// TODO: handle llvm.amdgcn.fma.legacy
	if (!mayIgnoreSignedZero(N0))
	return SDValue();

	// (fneg (fma x, y, z)) -> (fma x, (fneg y), (fneg z))
	SDValue LHS = N0.getOperand(0);
	SDValue MHS = N0.getOperand(1);
	SDValue RHS = N0.getOperand(2);

	if (LHS.getOpcode() == ISD::FNEG)
	LHS = LHS.getOperand(0);
	else if (MHS.getOpcode() == ISD::FNEG)
	MHS = MHS.getOperand(0);
	else
	MHS = DAG.getNode(ISD::FNEG, SL, VT, MHS);

	if (RHS.getOpcode() != ISD::FNEG)
	RHS = DAG.getNode(ISD::FNEG, SL, VT, RHS);
	else
	RHS = RHS.getOperand(0);

	SDValue Res = DAG.getNode(Opc, SL, VT, LHS, MHS, RHS);
	if (Res.getOpcode() != Opc)
	return SDValue(); // Op got folded away.
	if (!N0.hasOneUse())
	DAG.ReplaceAllUsesWith(N0, DAG.getNode(ISD::FNEG, SL, VT, Res));
	return Res;
	}
	case ISD::FMAXNUM:
	case ISD::FMINNUM:
	case ISD::FMAXNUM_IEEE:
	case ISD::FMINNUM_IEEE:
	case ISD::FMINIMUM:
	case ISD::FMAXIMUM:
	case AMDGPUISD::FMAX_LEGACY:
	case AMDGPUISD::FMIN_LEGACY: {
	// fneg (fmaxnum x, y) -> fminnum (fneg x), (fneg y)
	// fneg (fminnum x, y) -> fmaxnum (fneg x), (fneg y)
	// fneg (fmax_legacy x, y) -> fmin_legacy (fneg x), (fneg y)
	// fneg (fmin_legacy x, y) -> fmax_legacy (fneg x), (fneg y)

	SDValue LHS = N0.getOperand(0);
	SDValue RHS = N0.getOperand(1);

	// 0 doesn't have a negated inline immediate.
	// TODO: This constant check should be generalized to other operations.
	if (isConstantCostlierToNegate(RHS))
	return SDValue();

	SDValue NegLHS = DAG.getNode(ISD::FNEG, SL, VT, LHS);
	SDValue NegRHS = DAG.getNode(ISD::FNEG, SL, VT, RHS);
	unsigned Opposite = inverseMinMax(Opc);

	SDValue Res = DAG.getNode(Opposite, SL, VT, NegLHS, NegRHS, N0->getFlags());
	if (Res.getOpcode() != Opposite)
	return SDValue(); // Op got folded away.
	if (!N0.hasOneUse())
	DAG.ReplaceAllUsesWith(N0, DAG.getNode(ISD::FNEG, SL, VT, Res));
	return Res;
	}
	case AMDGPUISD::FMED3: {
	SDValue Ops[3];
	for (unsigned I = 0; I < 3; ++I)
	Ops[I] = DAG.getNode(ISD::FNEG, SL, VT, N0->getOperand(I), N0->getFlags());

	SDValue Res = DAG.getNode(AMDGPUISD::FMED3, SL, VT, Ops, N0->getFlags());
	if (Res.getOpcode() != AMDGPUISD::FMED3)
	return SDValue(); // Op got folded away.

	if (!N0.hasOneUse()) {
	SDValue Neg = DAG.getNode(ISD::FNEG, SL, VT, Res);
	DAG.ReplaceAllUsesWith(N0, Neg);

	for (SDNode *U : Neg->uses())
	DCI.AddToWorklist(U);
	}

	return Res;
	}
	case ISD::FP_EXTEND:
	case ISD::FTRUNC:
	case ISD::FRINT:
	case ISD::FNEARBYINT: // XXX - Should fround be handled?
	case ISD::FROUNDEVEN:
	case ISD::FSIN:
	case ISD::FCANONICALIZE:
	case AMDGPUISD::RCP:
	case AMDGPUISD::RCP_LEGACY:
	case AMDGPUISD::RCP_IFLAG:
	case AMDGPUISD::SIN_HW: {
	SDValue CvtSrc = N0.getOperand(0);
	if (CvtSrc.getOpcode() == ISD::FNEG) {
	// (fneg (fp_extend (fneg x))) -> (fp_extend x)
	// (fneg (rcp (fneg x))) -> (rcp x)
	return DAG.getNode(Opc, SL, VT, CvtSrc.getOperand(0));
	}

	if (!N0.hasOneUse())
	return SDValue();

	// (fneg (fp_extend x)) -> (fp_extend (fneg x))
	// (fneg (rcp x)) -> (rcp (fneg x))
	SDValue Neg = DAG.getNode(ISD::FNEG, SL, CvtSrc.getValueType(), CvtSrc);
	return DAG.getNode(Opc, SL, VT, Neg, N0->getFlags());
	}
	case ISD::FP_ROUND: {
	SDValue CvtSrc = N0.getOperand(0);

	if (CvtSrc.getOpcode() == ISD::FNEG) {
	// (fneg (fp_round (fneg x))) -> (fp_round x)
	return DAG.getNode(ISD::FP_ROUND, SL, VT,
	CvtSrc.getOperand(0), N0.getOperand(1));
	}

	if (!N0.hasOneUse())
	return SDValue();

	// (fneg (fp_round x)) -> (fp_round (fneg x))
	SDValue Neg = DAG.getNode(ISD::FNEG, SL, CvtSrc.getValueType(), CvtSrc);
	return DAG.getNode(ISD::FP_ROUND, SL, VT, Neg, N0.getOperand(1));
	}
	case ISD::FP16_TO_FP: {
	// v_cvt_f32_f16 supports source modifiers on pre-VI targets without legal
	// f16, but legalization of f16 fneg ends up pulling it out of the source.
	// Put the fneg back as a legal source operation that can be matched later.
	SDLoc SL(N);

	SDValue Src = N0.getOperand(0);
	EVT SrcVT = Src.getValueType();

	// fneg (fp16_to_fp x) -> fp16_to_fp (xor x, 0x8000)
	SDValue IntFNeg = DAG.getNode(ISD::XOR, SL, SrcVT, Src,
	DAG.getConstant(0x8000, SL, SrcVT));
	return DAG.getNode(ISD::FP16_TO_FP, SL, N->getValueType(0), IntFNeg);
	}
	case ISD::SELECT: {
	// fneg (select c, a, b) -> select c, (fneg a), (fneg b)
	// TODO: Invert conditions of foldFreeOpFromSelect
	return SDValue();
	}
	case ISD::BITCAST: {
	SDLoc SL(N);
	SDValue BCSrc = N0.getOperand(0);
	if (BCSrc.getOpcode() == ISD::BUILD_VECTOR) {
	SDValue HighBits = BCSrc.getOperand(BCSrc.getNumOperands() - 1);
	if (HighBits.getValueType().getSizeInBits() != 32 \|\|
	!fnegFoldsIntoOp(HighBits.getNode()))
	return SDValue();

	// f64 fneg only really needs to operate on the high half of of the
	// register, so try to force it to an f32 operation to help make use of
	// source modifiers.
	//
	//
	// fneg (f64 (bitcast (build_vector x, y))) ->
	// f64 (bitcast (build_vector (bitcast i32:x to f32),
	// (fneg (bitcast i32:y to f32)))

	SDValue CastHi = DAG.getNode(ISD::BITCAST, SL, MVT::f32, HighBits);
	SDValue NegHi = DAG.getNode(ISD::FNEG, SL, MVT::f32, CastHi);
	SDValue CastBack =
	DAG.getNode(ISD::BITCAST, SL, HighBits.getValueType(), NegHi);

	SmallVector<SDValue, 8> Ops(BCSrc->op_begin(), BCSrc->op_end());
	Ops.back() = CastBack;
	DCI.AddToWorklist(NegHi.getNode());
	SDValue Build =
	DAG.getNode(ISD::BUILD_VECTOR, SL, BCSrc.getValueType(), Ops);
	SDValue Result = DAG.getNode(ISD::BITCAST, SL, VT, Build);

	if (!N0.hasOneUse())
	DAG.ReplaceAllUsesWith(N0, DAG.getNode(ISD::FNEG, SL, VT, Result));
	return Result;
	}

	if (BCSrc.getOpcode() == ISD::SELECT && VT == MVT::f32 &&
	BCSrc.hasOneUse()) {
	// fneg (bitcast (f32 (select cond, i32:lhs, i32:rhs))) ->
	// select cond, (bitcast i32:lhs to f32), (bitcast i32:rhs to f32)

	// TODO: Cast back result for multiple uses is beneficial in some cases.

	SDValue LHS =
	DAG.getNode(ISD::BITCAST, SL, MVT::f32, BCSrc.getOperand(1));
	SDValue RHS =
	DAG.getNode(ISD::BITCAST, SL, MVT::f32, BCSrc.getOperand(2));

	SDValue NegLHS = DAG.getNode(ISD::FNEG, SL, MVT::f32, LHS);
	SDValue NegRHS = DAG.getNode(ISD::FNEG, SL, MVT::f32, RHS);

	return DAG.getNode(ISD::SELECT, SL, MVT::f32, BCSrc.getOperand(0), NegLHS,
	NegRHS);
	}

	return SDValue();
	}
	default:
	return SDValue();
	}
	}

	SDValue AMDGPUTargetLowering::performFAbsCombine(SDNode *N,
	DAGCombinerInfo &DCI) const {
	SelectionDAG &DAG = DCI.DAG;
	SDValue N0 = N->getOperand(0);

	if (!N0.hasOneUse())
	return SDValue();

	switch (N0.getOpcode()) {
	case ISD::FP16_TO_FP: {
	assert(!Subtarget->has16BitInsts() && "should only see if f16 is illegal");
	SDLoc SL(N);
	SDValue Src = N0.getOperand(0);
	EVT SrcVT = Src.getValueType();

	// fabs (fp16_to_fp x) -> fp16_to_fp (and x, 0x7fff)
	SDValue IntFAbs = DAG.getNode(ISD::AND, SL, SrcVT, Src,
	DAG.getConstant(0x7fff, SL, SrcVT));
	return DAG.getNode(ISD::FP16_TO_FP, SL, N->getValueType(0), IntFAbs);
	}
	default:
	return SDValue();
	}
	}

	SDValue AMDGPUTargetLowering::performRcpCombine(SDNode *N,
	DAGCombinerInfo &DCI) const {
	const auto *CFP = dyn_cast<ConstantFPSDNode>(N->getOperand(0));
	if (!CFP)
	return SDValue();

	// XXX - Should this flush denormals?
	const APFloat &Val = CFP->getValueAPF();
	APFloat One(Val.getSemantics(), "1.0");
	return DCI.DAG.getConstantFP(One / Val, SDLoc(N), N->getValueType(0));
	}

	SDValue AMDGPUTargetLowering::PerformDAGCombine(SDNode *N,
	DAGCombinerInfo &DCI) const {
	SelectionDAG &DAG = DCI.DAG;
	SDLoc DL(N);

	switch(N->getOpcode()) {
	default:
	break;
	case ISD::BITCAST: {
	EVT DestVT = N->getValueType(0);

	// Push casts through vector builds. This helps avoid emitting a large
	// number of copies when materializing floating point vector constants.
	//
	// vNt1 bitcast (vNt0 (build_vector t0:x, t0:y)) =>
	// vnt1 = build_vector (t1 (bitcast t0:x)), (t1 (bitcast t0:y))
	if (DestVT.isVector()) {
	SDValue Src = N->getOperand(0);
	if (Src.getOpcode() == ISD::BUILD_VECTOR &&
	(DCI.getDAGCombineLevel() < AfterLegalizeDAG \|\|
	isOperationLegal(ISD::BUILD_VECTOR, DestVT))) {
	EVT SrcVT = Src.getValueType();
	unsigned NElts = DestVT.getVectorNumElements();

	if (SrcVT.getVectorNumElements() == NElts) {
	EVT DestEltVT = DestVT.getVectorElementType();

	SmallVector<SDValue, 8> CastedElts;
	SDLoc SL(N);
	for (unsigned I = 0, E = SrcVT.getVectorNumElements(); I != E; ++I) {
	SDValue Elt = Src.getOperand(I);
	CastedElts.push_back(DAG.getNode(ISD::BITCAST, DL, DestEltVT, Elt));
	}

	return DAG.getBuildVector(DestVT, SL, CastedElts);
	}
	}
	}

	if (DestVT.getSizeInBits() != 64 \|\| !DestVT.isVector())
	break;

	// Fold bitcasts of constants.
	//
	// v2i32 (bitcast i64:k) -> build_vector lo_32(k), hi_32(k)
	// TODO: Generalize and move to DAGCombiner
	SDValue Src = N->getOperand(0);
	if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Src)) {
	SDLoc SL(N);
	uint64_t CVal = C->getZExtValue();
	SDValue BV = DAG.getNode(ISD::BUILD_VECTOR, SL, MVT::v2i32,
	DAG.getConstant(Lo_32(CVal), SL, MVT::i32),
	DAG.getConstant(Hi_32(CVal), SL, MVT::i32));
	return DAG.getNode(ISD::BITCAST, SL, DestVT, BV);
	}

	if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Src)) {
	const APInt &Val = C->getValueAPF().bitcastToAPInt();
	SDLoc SL(N);
	uint64_t CVal = Val.getZExtValue();
	SDValue Vec = DAG.getNode(ISD::BUILD_VECTOR, SL, MVT::v2i32,
	DAG.getConstant(Lo_32(CVal), SL, MVT::i32),
	DAG.getConstant(Hi_32(CVal), SL, MVT::i32));

	return DAG.getNode(ISD::BITCAST, SL, DestVT, Vec);
	}

	break;
	}
	case ISD::SHL: {
	if (DCI.getDAGCombineLevel() < AfterLegalizeDAG)
	break;

	return performShlCombine(N, DCI);
	}
	case ISD::SRL: {
	if (DCI.getDAGCombineLevel() < AfterLegalizeDAG)
	break;

	return performSrlCombine(N, DCI);
	}
	case ISD::SRA: {
	if (DCI.getDAGCombineLevel() < AfterLegalizeDAG)
	break;

	return performSraCombine(N, DCI);
	}
	case ISD::TRUNCATE:
	return performTruncateCombine(N, DCI);
	case ISD::MUL:
	return performMulCombine(N, DCI);
	case AMDGPUISD::MUL_U24:
	case AMDGPUISD::MUL_I24: {
	if (SDValue Simplified = simplifyMul24(N, DCI))
	return Simplified;
	break;
	}
	case AMDGPUISD::MULHI_I24:
	case AMDGPUISD::MULHI_U24:
	return simplifyMul24(N, DCI);
	case ISD::SMUL_LOHI:
	case ISD::UMUL_LOHI:
	return performMulLoHiCombine(N, DCI);
	case ISD::MULHS:
	return performMulhsCombine(N, DCI);
	case ISD::MULHU:
	return performMulhuCombine(N, DCI);
	case ISD::SELECT:
	return performSelectCombine(N, DCI);
	case ISD::FNEG:
	return performFNegCombine(N, DCI);
	case ISD::FABS:
	return performFAbsCombine(N, DCI);
	case AMDGPUISD::BFE_I32:
	case AMDGPUISD::BFE_U32: {
	assert(!N->getValueType(0).isVector() &&
	"Vector handling of BFE not implemented");
	ConstantSDNode *Width = dyn_cast<ConstantSDNode>(N->getOperand(2));
	if (!Width)
	break;

	uint32_t WidthVal = Width->getZExtValue() & 0x1f;
	if (WidthVal == 0)
	return DAG.getConstant(0, DL, MVT::i32);

	ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
	if (!Offset)
	break;

	SDValue BitsFrom = N->getOperand(0);
	uint32_t OffsetVal = Offset->getZExtValue() & 0x1f;

	bool Signed = N->getOpcode() == AMDGPUISD::BFE_I32;

	if (OffsetVal == 0) {
	// This is already sign / zero extended, so try to fold away extra BFEs.
	unsigned SignBits = Signed ? (32 - WidthVal + 1) : (32 - WidthVal);

	unsigned OpSignBits = DAG.ComputeNumSignBits(BitsFrom);
	if (OpSignBits >= SignBits)
	return BitsFrom;

	EVT SmallVT = EVT::getIntegerVT(*DAG.getContext(), WidthVal);
	if (Signed) {
	// This is a sign_extend_inreg. Replace it to take advantage of existing
	// DAG Combines. If not eliminated, we will match back to BFE during
	// selection.

	// TODO: The sext_inreg of extended types ends, although we can could
	// handle them in a single BFE.
	return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i32, BitsFrom,
	DAG.getValueType(SmallVT));
	}

	return DAG.getZeroExtendInReg(BitsFrom, DL, SmallVT);
	}

	if (ConstantSDNode *CVal = dyn_cast<ConstantSDNode>(BitsFrom)) {
	if (Signed) {
	return constantFoldBFE<int32_t>(DAG,
	CVal->getSExtValue(),
	OffsetVal,
	WidthVal,
	DL);
	}

	return constantFoldBFE<uint32_t>(DAG,
	CVal->getZExtValue(),
	OffsetVal,
	WidthVal,
	DL);
	}

	if ((OffsetVal + WidthVal) >= 32 &&
	!(Subtarget->hasSDWA() && OffsetVal == 16 && WidthVal == 16)) {
	SDValue ShiftVal = DAG.getConstant(OffsetVal, DL, MVT::i32);
	return DAG.getNode(Signed ? ISD::SRA : ISD::SRL, DL, MVT::i32,
	BitsFrom, ShiftVal);
	}

	if (BitsFrom.hasOneUse()) {
	APInt Demanded = APInt::getBitsSet(32,
	OffsetVal,
	OffsetVal + WidthVal);

	KnownBits Known;
	TargetLowering::TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
	!DCI.isBeforeLegalizeOps());
	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
	if (TLI.ShrinkDemandedConstant(BitsFrom, Demanded, TLO) \|\|
	TLI.SimplifyDemandedBits(BitsFrom, Demanded, Known, TLO)) {
	DCI.CommitTargetLoweringOpt(TLO);
	}
	}

	break;
	}
	case ISD::LOAD:
	return performLoadCombine(N, DCI);
	case ISD::STORE:
	return performStoreCombine(N, DCI);
	case AMDGPUISD::RCP:
	case AMDGPUISD::RCP_IFLAG:
	return performRcpCombine(N, DCI);
	case ISD::AssertZext:
	case ISD::AssertSext:
	return performAssertSZExtCombine(N, DCI);
	case ISD::INTRINSIC_WO_CHAIN:
	return performIntrinsicWOChainCombine(N, DCI);
	case AMDGPUISD::FMAD_FTZ: {
	SDValue N0 = N->getOperand(0);
	SDValue N1 = N->getOperand(1);
	SDValue N2 = N->getOperand(2);
	EVT VT = N->getValueType(0);

	// FMAD_FTZ is a FMAD + flush denormals to zero.
	// We flush the inputs, the intermediate step, and the output.
	ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
	ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
	ConstantFPSDNode *N2CFP = dyn_cast<ConstantFPSDNode>(N2);
	if (N0CFP && N1CFP && N2CFP) {
	const auto FTZ = [](const APFloat &V) {
	if (V.isDenormal()) {
	APFloat Zero(V.getSemantics(), 0);
	return V.isNegative() ? -Zero : Zero;
	}
	return V;
	};

	APFloat V0 = FTZ(N0CFP->getValueAPF());
	APFloat V1 = FTZ(N1CFP->getValueAPF());
	APFloat V2 = FTZ(N2CFP->getValueAPF());
	V0.multiply(V1, APFloat::rmNearestTiesToEven);
	V0 = FTZ(V0);
	V0.add(V2, APFloat::rmNearestTiesToEven);
	return DAG.getConstantFP(FTZ(V0), DL, VT);
	}
	break;
	}
	}
	return SDValue();
	}

	//===----------------------------------------------------------------------===//
	// Helper functions
	//===----------------------------------------------------------------------===//

	SDValue AMDGPUTargetLowering::CreateLiveInRegister(SelectionDAG &DAG,
	const TargetRegisterClass *RC,
	Register Reg, EVT VT,
	const SDLoc &SL,
	bool RawReg) const {
	MachineFunction &MF = DAG.getMachineFunction();
	MachineRegisterInfo &MRI = MF.getRegInfo();
	Register VReg;

	if (!MRI.isLiveIn(Reg)) {
	VReg = MRI.createVirtualRegister(RC);
	MRI.addLiveIn(Reg, VReg);
	} else {
	VReg = MRI.getLiveInVirtReg(Reg);
	}

	if (RawReg)
	return DAG.getRegister(VReg, VT);

	return DAG.getCopyFromReg(DAG.getEntryNode(), SL, VReg, VT);
	}

	// This may be called multiple times, and nothing prevents creating multiple
	// objects at the same offset. See if we already defined this object.
	static int getOrCreateFixedStackObject(MachineFrameInfo &MFI, unsigned Size,
	int64_t Offset) {
	for (int I = MFI.getObjectIndexBegin(); I < 0; ++I) {
	if (MFI.getObjectOffset(I) == Offset) {
	assert(MFI.getObjectSize(I) == Size);
	return I;
	}
	}

	return MFI.CreateFixedObject(Size, Offset, true);
	}

	SDValue AMDGPUTargetLowering::loadStackInputValue(SelectionDAG &DAG,
	EVT VT,
	const SDLoc &SL,
	int64_t Offset) const {
	MachineFunction &MF = DAG.getMachineFunction();
	MachineFrameInfo &MFI = MF.getFrameInfo();
	int FI = getOrCreateFixedStackObject(MFI, VT.getStoreSize(), Offset);

	auto SrcPtrInfo = MachinePointerInfo::getStack(MF, Offset);
	SDValue Ptr = DAG.getFrameIndex(FI, MVT::i32);

	return DAG.getLoad(VT, SL, DAG.getEntryNode(), Ptr, SrcPtrInfo, Align(4),
	MachineMemOperand::MODereferenceable \|
	MachineMemOperand::MOInvariant);
	}

	SDValue AMDGPUTargetLowering::storeStackInputValue(SelectionDAG &DAG,
	const SDLoc &SL,
	SDValue Chain,
	SDValue ArgVal,
	int64_t Offset) const {
	MachineFunction &MF = DAG.getMachineFunction();
	MachinePointerInfo DstInfo = MachinePointerInfo::getStack(MF, Offset);
	const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();

	SDValue Ptr = DAG.getConstant(Offset, SL, MVT::i32);
	// Stores to the argument stack area are relative to the stack pointer.
	SDValue SP =
	DAG.getCopyFromReg(Chain, SL, Info->getStackPtrOffsetReg(), MVT::i32);
	Ptr = DAG.getNode(ISD::ADD, SL, MVT::i32, SP, Ptr);
	SDValue Store = DAG.getStore(Chain, SL, ArgVal, Ptr, DstInfo, Align(4),
	MachineMemOperand::MODereferenceable);
	return Store;
	}

	SDValue AMDGPUTargetLowering::loadInputValue(SelectionDAG &DAG,
	const TargetRegisterClass *RC,
	EVT VT, const SDLoc &SL,
	const ArgDescriptor &Arg) const {
	assert(Arg && "Attempting to load missing argument");

	SDValue V = Arg.isRegister() ?
	CreateLiveInRegister(DAG, RC, Arg.getRegister(), VT, SL) :
	loadStackInputValue(DAG, VT, SL, Arg.getStackOffset());

	if (!Arg.isMasked())
	return V;

	unsigned Mask = Arg.getMask();
	unsigned Shift = llvm::countr_zero<unsigned>(Mask);
	V = DAG.getNode(ISD::SRL, SL, VT, V,
	DAG.getShiftAmountConstant(Shift, VT, SL));
	return DAG.getNode(ISD::AND, SL, VT, V,
	DAG.getConstant(Mask >> Shift, SL, VT));
	}

	uint32_t AMDGPUTargetLowering::getImplicitParameterOffset(
	uint64_t ExplicitKernArgSize, const ImplicitParameter Param) const {
	unsigned ExplicitArgOffset = Subtarget->getExplicitKernelArgOffset();
	const Align Alignment = Subtarget->getAlignmentForImplicitArgPtr();
	uint64_t ArgOffset =
	alignTo(ExplicitKernArgSize, Alignment) + ExplicitArgOffset;
	switch (Param) {
	case FIRST_IMPLICIT:
	return ArgOffset;
	case PRIVATE_BASE:
	return ArgOffset + AMDGPU::ImplicitArg::PRIVATE_BASE_OFFSET;
	case SHARED_BASE:
	return ArgOffset + AMDGPU::ImplicitArg::SHARED_BASE_OFFSET;
	case QUEUE_PTR:
	return ArgOffset + AMDGPU::ImplicitArg::QUEUE_PTR_OFFSET;
	}
	llvm_unreachable("unexpected implicit parameter type");
	}

	uint32_t AMDGPUTargetLowering::getImplicitParameterOffset(
	const MachineFunction &MF, const ImplicitParameter Param) const {
	const AMDGPUMachineFunction *MFI = MF.getInfo<AMDGPUMachineFunction>();
	return getImplicitParameterOffset(MFI->getExplicitKernArgSize(), Param);
	}

	#define NODE_NAME_CASE(node) case AMDGPUISD::node: return #node;

	const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const {
	switch ((AMDGPUISD::NodeType)Opcode) {
	case AMDGPUISD::FIRST_NUMBER: break;
	// AMDIL DAG nodes
	NODE_NAME_CASE(UMUL);
	NODE_NAME_CASE(BRANCH_COND);

	// AMDGPU DAG nodes
	NODE_NAME_CASE(IF)
	NODE_NAME_CASE(ELSE)
	NODE_NAME_CASE(LOOP)
	NODE_NAME_CASE(CALL)
	NODE_NAME_CASE(TC_RETURN)
	NODE_NAME_CASE(TC_RETURN_GFX)
	NODE_NAME_CASE(TC_RETURN_CHAIN)
	NODE_NAME_CASE(TRAP)
	NODE_NAME_CASE(RET_GLUE)
	NODE_NAME_CASE(WAVE_ADDRESS)
	NODE_NAME_CASE(RETURN_TO_EPILOG)
	NODE_NAME_CASE(ENDPGM)
	NODE_NAME_CASE(ENDPGM_TRAP)
	NODE_NAME_CASE(SIMULATED_TRAP)
	NODE_NAME_CASE(DWORDADDR)
	NODE_NAME_CASE(FRACT)
	NODE_NAME_CASE(SETCC)
	NODE_NAME_CASE(SETREG)
	NODE_NAME_CASE(DENORM_MODE)
	NODE_NAME_CASE(FMA_W_CHAIN)
	NODE_NAME_CASE(FMUL_W_CHAIN)
	NODE_NAME_CASE(CLAMP)
	NODE_NAME_CASE(COS_HW)
	NODE_NAME_CASE(SIN_HW)
	NODE_NAME_CASE(FMAX_LEGACY)
	NODE_NAME_CASE(FMIN_LEGACY)
	NODE_NAME_CASE(FMAX3)
	NODE_NAME_CASE(SMAX3)
	NODE_NAME_CASE(UMAX3)
	NODE_NAME_CASE(FMIN3)
	NODE_NAME_CASE(SMIN3)
	NODE_NAME_CASE(UMIN3)
	NODE_NAME_CASE(FMED3)
	NODE_NAME_CASE(SMED3)
	NODE_NAME_CASE(UMED3)
	NODE_NAME_CASE(FMAXIMUM3)
	NODE_NAME_CASE(FMINIMUM3)
	NODE_NAME_CASE(FDOT2)
	NODE_NAME_CASE(URECIP)
	NODE_NAME_CASE(DIV_SCALE)
	NODE_NAME_CASE(DIV_FMAS)
	NODE_NAME_CASE(DIV_FIXUP)
	NODE_NAME_CASE(FMAD_FTZ)
	NODE_NAME_CASE(RCP)
	NODE_NAME_CASE(RSQ)
	NODE_NAME_CASE(RCP_LEGACY)
	NODE_NAME_CASE(RCP_IFLAG)
	NODE_NAME_CASE(LOG)
	NODE_NAME_CASE(EXP)
	NODE_NAME_CASE(FMUL_LEGACY)
	NODE_NAME_CASE(RSQ_CLAMP)
	NODE_NAME_CASE(FP_CLASS)
	NODE_NAME_CASE(DOT4)
	NODE_NAME_CASE(CARRY)
	NODE_NAME_CASE(BORROW)
	NODE_NAME_CASE(BFE_U32)
	NODE_NAME_CASE(BFE_I32)
	NODE_NAME_CASE(BFI)
	NODE_NAME_CASE(BFM)
	NODE_NAME_CASE(FFBH_U32)
	NODE_NAME_CASE(FFBH_I32)
	NODE_NAME_CASE(FFBL_B32)
	NODE_NAME_CASE(MUL_U24)
	NODE_NAME_CASE(MUL_I24)
	NODE_NAME_CASE(MULHI_U24)
	NODE_NAME_CASE(MULHI_I24)
	NODE_NAME_CASE(MAD_U24)
	NODE_NAME_CASE(MAD_I24)
	NODE_NAME_CASE(MAD_I64_I32)
	NODE_NAME_CASE(MAD_U64_U32)
	NODE_NAME_CASE(PERM)
	NODE_NAME_CASE(TEXTURE_FETCH)
	NODE_NAME_CASE(R600_EXPORT)
	NODE_NAME_CASE(CONST_ADDRESS)
	NODE_NAME_CASE(REGISTER_LOAD)
	NODE_NAME_CASE(REGISTER_STORE)
	NODE_NAME_CASE(SAMPLE)
	NODE_NAME_CASE(SAMPLEB)
	NODE_NAME_CASE(SAMPLED)
	NODE_NAME_CASE(SAMPLEL)
	NODE_NAME_CASE(CVT_F32_UBYTE0)
	NODE_NAME_CASE(CVT_F32_UBYTE1)
	NODE_NAME_CASE(CVT_F32_UBYTE2)
	NODE_NAME_CASE(CVT_F32_UBYTE3)
	NODE_NAME_CASE(CVT_PKRTZ_F16_F32)
	NODE_NAME_CASE(CVT_PKNORM_I16_F32)
	NODE_NAME_CASE(CVT_PKNORM_U16_F32)
	NODE_NAME_CASE(CVT_PK_I16_I32)
	NODE_NAME_CASE(CVT_PK_U16_U32)
	NODE_NAME_CASE(FP_TO_FP16)
	NODE_NAME_CASE(BUILD_VERTICAL_VECTOR)
	NODE_NAME_CASE(CONST_DATA_PTR)
	NODE_NAME_CASE(PC_ADD_REL_OFFSET)
	NODE_NAME_CASE(LDS)
	NODE_NAME_CASE(FPTRUNC_ROUND_UPWARD)
	NODE_NAME_CASE(FPTRUNC_ROUND_DOWNWARD)
	NODE_NAME_CASE(DUMMY_CHAIN)
	case AMDGPUISD::FIRST_MEM_OPCODE_NUMBER: break;
	NODE_NAME_CASE(LOAD_D16_HI)
	NODE_NAME_CASE(LOAD_D16_LO)
	NODE_NAME_CASE(LOAD_D16_HI_I8)
	NODE_NAME_CASE(LOAD_D16_HI_U8)
	NODE_NAME_CASE(LOAD_D16_LO_I8)
	NODE_NAME_CASE(LOAD_D16_LO_U8)
	NODE_NAME_CASE(STORE_MSKOR)
	NODE_NAME_CASE(LOAD_CONSTANT)
	NODE_NAME_CASE(TBUFFER_STORE_FORMAT)
	NODE_NAME_CASE(TBUFFER_STORE_FORMAT_D16)
	NODE_NAME_CASE(TBUFFER_LOAD_FORMAT)
	NODE_NAME_CASE(TBUFFER_LOAD_FORMAT_D16)
	NODE_NAME_CASE(DS_ORDERED_COUNT)
	NODE_NAME_CASE(ATOMIC_CMP_SWAP)
	NODE_NAME_CASE(BUFFER_LOAD)
	NODE_NAME_CASE(BUFFER_LOAD_UBYTE)
	NODE_NAME_CASE(BUFFER_LOAD_USHORT)
	NODE_NAME_CASE(BUFFER_LOAD_BYTE)
	NODE_NAME_CASE(BUFFER_LOAD_SHORT)
	NODE_NAME_CASE(BUFFER_LOAD_TFE)
	NODE_NAME_CASE(BUFFER_LOAD_UBYTE_TFE)
	NODE_NAME_CASE(BUFFER_LOAD_USHORT_TFE)
	NODE_NAME_CASE(BUFFER_LOAD_BYTE_TFE)
	NODE_NAME_CASE(BUFFER_LOAD_SHORT_TFE)
	NODE_NAME_CASE(BUFFER_LOAD_FORMAT)
	NODE_NAME_CASE(BUFFER_LOAD_FORMAT_TFE)
	NODE_NAME_CASE(BUFFER_LOAD_FORMAT_D16)
	NODE_NAME_CASE(SBUFFER_LOAD)
	NODE_NAME_CASE(SBUFFER_LOAD_BYTE)
	NODE_NAME_CASE(SBUFFER_LOAD_UBYTE)
	NODE_NAME_CASE(SBUFFER_LOAD_SHORT)
	NODE_NAME_CASE(SBUFFER_LOAD_USHORT)
	NODE_NAME_CASE(BUFFER_STORE)
	NODE_NAME_CASE(BUFFER_STORE_BYTE)
	NODE_NAME_CASE(BUFFER_STORE_SHORT)
	NODE_NAME_CASE(BUFFER_STORE_FORMAT)
	NODE_NAME_CASE(BUFFER_STORE_FORMAT_D16)
	NODE_NAME_CASE(BUFFER_ATOMIC_SWAP)
	NODE_NAME_CASE(BUFFER_ATOMIC_ADD)
	NODE_NAME_CASE(BUFFER_ATOMIC_SUB)
	NODE_NAME_CASE(BUFFER_ATOMIC_SMIN)
	NODE_NAME_CASE(BUFFER_ATOMIC_UMIN)
	NODE_NAME_CASE(BUFFER_ATOMIC_SMAX)
	NODE_NAME_CASE(BUFFER_ATOMIC_UMAX)
	NODE_NAME_CASE(BUFFER_ATOMIC_AND)
	NODE_NAME_CASE(BUFFER_ATOMIC_OR)
	NODE_NAME_CASE(BUFFER_ATOMIC_XOR)
	NODE_NAME_CASE(BUFFER_ATOMIC_INC)
	NODE_NAME_CASE(BUFFER_ATOMIC_DEC)
	NODE_NAME_CASE(BUFFER_ATOMIC_CMPSWAP)
	NODE_NAME_CASE(BUFFER_ATOMIC_CSUB)
	NODE_NAME_CASE(BUFFER_ATOMIC_FADD)
	NODE_NAME_CASE(BUFFER_ATOMIC_FMIN)
	NODE_NAME_CASE(BUFFER_ATOMIC_FMAX)
	NODE_NAME_CASE(BUFFER_ATOMIC_COND_SUB_U32)

	case AMDGPUISD::LAST_AMDGPU_ISD_NUMBER: break;
	}
	return nullptr;
	}

	SDValue AMDGPUTargetLowering::getSqrtEstimate(SDValue Operand,
	SelectionDAG &DAG, int Enabled,
	int &RefinementSteps,
	bool &UseOneConstNR,
	bool Reciprocal) const {
	EVT VT = Operand.getValueType();

	if (VT == MVT::f32) {
	RefinementSteps = 0;
	return DAG.getNode(AMDGPUISD::RSQ, SDLoc(Operand), VT, Operand);
	}

	// TODO: There is also f64 rsq instruction, but the documentation is less
	// clear on its precision.

	return SDValue();
	}

	SDValue AMDGPUTargetLowering::getRecipEstimate(SDValue Operand,
	SelectionDAG &DAG, int Enabled,
	int &RefinementSteps) const {
	EVT VT = Operand.getValueType();

	if (VT == MVT::f32) {
	// Reciprocal, < 1 ulp error.
	//
	// This reciprocal approximation converges to < 0.5 ulp error with one
	// newton rhapson performed with two fused multiple adds (FMAs).

	RefinementSteps = 0;
	return DAG.getNode(AMDGPUISD::RCP, SDLoc(Operand), VT, Operand);
	}

	// TODO: There is also f64 rcp instruction, but the documentation is less
	// clear on its precision.

	return SDValue();
	}

	static unsigned workitemIntrinsicDim(unsigned ID) {
	switch (ID) {
	case Intrinsic::amdgcn_workitem_id_x:
	return 0;
	case Intrinsic::amdgcn_workitem_id_y:
	return 1;
	case Intrinsic::amdgcn_workitem_id_z:
	return 2;
	default:
	llvm_unreachable("not a workitem intrinsic");
	}
	}

	void AMDGPUTargetLowering::computeKnownBitsForTargetNode(
	const SDValue Op, KnownBits &Known,
	const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth) const {

	Known.resetAll(); // Don't know anything.

	unsigned Opc = Op.getOpcode();

	switch (Opc) {
	default:
	break;
	case AMDGPUISD::CARRY:
	case AMDGPUISD::BORROW: {
	Known.Zero = APInt::getHighBitsSet(32, 31);
	break;
	}

	case AMDGPUISD::BFE_I32:
	case AMDGPUISD::BFE_U32: {
	ConstantSDNode *CWidth = dyn_cast<ConstantSDNode>(Op.getOperand(2));
	if (!CWidth)
	return;

	uint32_t Width = CWidth->getZExtValue() & 0x1f;

	if (Opc == AMDGPUISD::BFE_U32)
	Known.Zero = APInt::getHighBitsSet(32, 32 - Width);

	break;
	}
	case AMDGPUISD::FP_TO_FP16: {
	unsigned BitWidth = Known.getBitWidth();

	// High bits are zero.
	Known.Zero = APInt::getHighBitsSet(BitWidth, BitWidth - 16);
	break;
	}
	case AMDGPUISD::MUL_U24:
	case AMDGPUISD::MUL_I24: {
	KnownBits LHSKnown = DAG.computeKnownBits(Op.getOperand(0), Depth + 1);
	KnownBits RHSKnown = DAG.computeKnownBits(Op.getOperand(1), Depth + 1);
	unsigned TrailZ = LHSKnown.countMinTrailingZeros() +
	RHSKnown.countMinTrailingZeros();
	Known.Zero.setLowBits(std::min(TrailZ, 32u));
	// Skip extra check if all bits are known zeros.
	if (TrailZ >= 32)
	break;

	// Truncate to 24 bits.
	LHSKnown = LHSKnown.trunc(24);
	RHSKnown = RHSKnown.trunc(24);

	if (Opc == AMDGPUISD::MUL_I24) {
	unsigned LHSValBits = LHSKnown.countMaxSignificantBits();
	unsigned RHSValBits = RHSKnown.countMaxSignificantBits();
	unsigned MaxValBits = LHSValBits + RHSValBits;
	if (MaxValBits > 32)
	break;
	unsigned SignBits = 32 - MaxValBits + 1;
	bool LHSNegative = LHSKnown.isNegative();
	bool LHSNonNegative = LHSKnown.isNonNegative();
	bool LHSPositive = LHSKnown.isStrictlyPositive();
	bool RHSNegative = RHSKnown.isNegative();
	bool RHSNonNegative = RHSKnown.isNonNegative();
	bool RHSPositive = RHSKnown.isStrictlyPositive();

	if ((LHSNonNegative && RHSNonNegative) \|\| (LHSNegative && RHSNegative))
	Known.Zero.setHighBits(SignBits);
	else if ((LHSNegative && RHSPositive) \|\| (LHSPositive && RHSNegative))
	Known.One.setHighBits(SignBits);
	} else {
	unsigned LHSValBits = LHSKnown.countMaxActiveBits();
	unsigned RHSValBits = RHSKnown.countMaxActiveBits();
	unsigned MaxValBits = LHSValBits + RHSValBits;
	if (MaxValBits >= 32)
	break;
	Known.Zero.setBitsFrom(MaxValBits);
	}
	break;
	}
	case AMDGPUISD::PERM: {
	ConstantSDNode *CMask = dyn_cast<ConstantSDNode>(Op.getOperand(2));
	if (!CMask)
	return;

	KnownBits LHSKnown = DAG.computeKnownBits(Op.getOperand(0), Depth + 1);
	KnownBits RHSKnown = DAG.computeKnownBits(Op.getOperand(1), Depth + 1);
	unsigned Sel = CMask->getZExtValue();

	for (unsigned I = 0; I < 32; I += 8) {
	unsigned SelBits = Sel & 0xff;
	if (SelBits < 4) {
	SelBits *= 8;
	Known.One \|= ((RHSKnown.One.getZExtValue() >> SelBits) & 0xff) << I;
	Known.Zero \|= ((RHSKnown.Zero.getZExtValue() >> SelBits) & 0xff) << I;
	} else if (SelBits < 7) {
	SelBits = (SelBits & 3) * 8;
	Known.One \|= ((LHSKnown.One.getZExtValue() >> SelBits) & 0xff) << I;
	Known.Zero \|= ((LHSKnown.Zero.getZExtValue() >> SelBits) & 0xff) << I;
	} else if (SelBits == 0x0c) {
	Known.Zero \|= 0xFFull << I;
	} else if (SelBits > 0x0c) {
	Known.One \|= 0xFFull << I;
	}
	Sel >>= 8;
	}
	break;
	}
	case AMDGPUISD::BUFFER_LOAD_UBYTE: {
	Known.Zero.setHighBits(24);
	break;
	}
	case AMDGPUISD::BUFFER_LOAD_USHORT: {
	Known.Zero.setHighBits(16);
	break;
	}
	case AMDGPUISD::LDS: {
	auto GA = cast<GlobalAddressSDNode>(Op.getOperand(0).getNode());
	Align Alignment = GA->getGlobal()->getPointerAlignment(DAG.getDataLayout());

	Known.Zero.setHighBits(16);
	Known.Zero.setLowBits(Log2(Alignment));
	break;
	}
	case AMDGPUISD::SMIN3:
	case AMDGPUISD::SMAX3:
	case AMDGPUISD::SMED3:
	case AMDGPUISD::UMIN3:
	case AMDGPUISD::UMAX3:
	case AMDGPUISD::UMED3: {
	KnownBits Known2 = DAG.computeKnownBits(Op.getOperand(2), Depth + 1);
	if (Known2.isUnknown())
	break;

	KnownBits Known1 = DAG.computeKnownBits(Op.getOperand(1), Depth + 1);
	if (Known1.isUnknown())
	break;

	KnownBits Known0 = DAG.computeKnownBits(Op.getOperand(0), Depth + 1);
	if (Known0.isUnknown())
	break;

	// TODO: Handle LeadZero/LeadOne from UMIN/UMAX handling.
	Known.Zero = Known0.Zero & Known1.Zero & Known2.Zero;
	Known.One = Known0.One & Known1.One & Known2.One;
	break;
	}
	case ISD::INTRINSIC_WO_CHAIN: {
	unsigned IID = Op.getConstantOperandVal(0);
	switch (IID) {
	case Intrinsic::amdgcn_workitem_id_x:
	case Intrinsic::amdgcn_workitem_id_y:
	case Intrinsic::amdgcn_workitem_id_z: {
	unsigned MaxValue = Subtarget->getMaxWorkitemID(
	DAG.getMachineFunction().getFunction(), workitemIntrinsicDim(IID));
	Known.Zero.setHighBits(llvm::countl_zero(MaxValue));
	break;
	}
	default:
	break;
	}
	}
	}
	}

	unsigned AMDGPUTargetLowering::ComputeNumSignBitsForTargetNode(
	SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
	unsigned Depth) const {
	switch (Op.getOpcode()) {
	case AMDGPUISD::BFE_I32: {
	ConstantSDNode *Width = dyn_cast<ConstantSDNode>(Op.getOperand(2));
	if (!Width)
	return 1;

	unsigned SignBits = 32 - Width->getZExtValue() + 1;
	if (!isNullConstant(Op.getOperand(1)))
	return SignBits;

	// TODO: Could probably figure something out with non-0 offsets.
	unsigned Op0SignBits = DAG.ComputeNumSignBits(Op.getOperand(0), Depth + 1);
	return std::max(SignBits, Op0SignBits);
	}

	case AMDGPUISD::BFE_U32: {
	ConstantSDNode *Width = dyn_cast<ConstantSDNode>(Op.getOperand(2));
	return Width ? 32 - (Width->getZExtValue() & 0x1f) : 1;
	}

	case AMDGPUISD::CARRY:
	case AMDGPUISD::BORROW:
	return 31;
	case AMDGPUISD::BUFFER_LOAD_BYTE:
	return 25;
	case AMDGPUISD::BUFFER_LOAD_SHORT:
	return 17;
	case AMDGPUISD::BUFFER_LOAD_UBYTE:
	return 24;
	case AMDGPUISD::BUFFER_LOAD_USHORT:
	return 16;
	case AMDGPUISD::FP_TO_FP16:
	return 16;
	case AMDGPUISD::SMIN3:
	case AMDGPUISD::SMAX3:
	case AMDGPUISD::SMED3:
	case AMDGPUISD::UMIN3:
	case AMDGPUISD::UMAX3:
	case AMDGPUISD::UMED3: {
	unsigned Tmp2 = DAG.ComputeNumSignBits(Op.getOperand(2), Depth + 1);
	if (Tmp2 == 1)
	return 1; // Early out.

	unsigned Tmp1 = DAG.ComputeNumSignBits(Op.getOperand(1), Depth + 1);
	if (Tmp1 == 1)
	return 1; // Early out.

	unsigned Tmp0 = DAG.ComputeNumSignBits(Op.getOperand(0), Depth + 1);
	if (Tmp0 == 1)
	return 1; // Early out.

	return std::min({Tmp0, Tmp1, Tmp2});
	}
	default:
	return 1;
	}
	}

	unsigned AMDGPUTargetLowering::computeNumSignBitsForTargetInstr(
	GISelKnownBits &Analysis, Register R,
	const APInt &DemandedElts, const MachineRegisterInfo &MRI,
	unsigned Depth) const {
	const MachineInstr *MI = MRI.getVRegDef(R);
	if (!MI)
	return 1;

	// TODO: Check range metadata on MMO.
	switch (MI->getOpcode()) {
	case AMDGPU::G_AMDGPU_BUFFER_LOAD_SBYTE:
	return 25;
	case AMDGPU::G_AMDGPU_BUFFER_LOAD_SSHORT:
	return 17;
	case AMDGPU::G_AMDGPU_BUFFER_LOAD_UBYTE:
	return 24;
	case AMDGPU::G_AMDGPU_BUFFER_LOAD_USHORT:
	return 16;
	case AMDGPU::G_AMDGPU_SMED3:
	case AMDGPU::G_AMDGPU_UMED3: {
	auto [Dst, Src0, Src1, Src2] = MI->getFirst4Regs();
	unsigned Tmp2 = Analysis.computeNumSignBits(Src2, DemandedElts, Depth + 1);
	if (Tmp2 == 1)
	return 1;
	unsigned Tmp1 = Analysis.computeNumSignBits(Src1, DemandedElts, Depth + 1);
	if (Tmp1 == 1)
	return 1;
	unsigned Tmp0 = Analysis.computeNumSignBits(Src0, DemandedElts, Depth + 1);
	if (Tmp0 == 1)
	return 1;
	return std::min({Tmp0, Tmp1, Tmp2});
	}
	default:
	return 1;
	}
	}

	bool AMDGPUTargetLowering::isKnownNeverNaNForTargetNode(SDValue Op,
	const SelectionDAG &DAG,
	bool SNaN,
	unsigned Depth) const {
	unsigned Opcode = Op.getOpcode();
	switch (Opcode) {
	case AMDGPUISD::FMIN_LEGACY:
	case AMDGPUISD::FMAX_LEGACY: {
	if (SNaN)
	return true;

	// TODO: Can check no nans on one of the operands for each one, but which
	// one?
	return false;
	}
	case AMDGPUISD::FMUL_LEGACY:
	case AMDGPUISD::CVT_PKRTZ_F16_F32: {
	if (SNaN)
	return true;
	return DAG.isKnownNeverNaN(Op.getOperand(0), SNaN, Depth + 1) &&
	DAG.isKnownNeverNaN(Op.getOperand(1), SNaN, Depth + 1);
	}
	case AMDGPUISD::FMED3:
	case AMDGPUISD::FMIN3:
	case AMDGPUISD::FMAX3:
	case AMDGPUISD::FMINIMUM3:
	case AMDGPUISD::FMAXIMUM3:
	case AMDGPUISD::FMAD_FTZ: {
	if (SNaN)
	return true;
	return DAG.isKnownNeverNaN(Op.getOperand(0), SNaN, Depth + 1) &&
	DAG.isKnownNeverNaN(Op.getOperand(1), SNaN, Depth + 1) &&
	DAG.isKnownNeverNaN(Op.getOperand(2), SNaN, Depth + 1);
	}
	case AMDGPUISD::CVT_F32_UBYTE0:
	case AMDGPUISD::CVT_F32_UBYTE1:
	case AMDGPUISD::CVT_F32_UBYTE2:
	case AMDGPUISD::CVT_F32_UBYTE3:
	return true;

	case AMDGPUISD::RCP:
	case AMDGPUISD::RSQ:
	case AMDGPUISD::RCP_LEGACY:
	case AMDGPUISD::RSQ_CLAMP: {
	if (SNaN)
	return true;

	// TODO: Need is known positive check.
	return false;
	}
	case ISD::FLDEXP:
	case AMDGPUISD::FRACT: {
	if (SNaN)
	return true;
	return DAG.isKnownNeverNaN(Op.getOperand(0), SNaN, Depth + 1);
	}
	case AMDGPUISD::DIV_SCALE:
	case AMDGPUISD::DIV_FMAS:
	case AMDGPUISD::DIV_FIXUP:
	// TODO: Refine on operands.
	return SNaN;
	case AMDGPUISD::SIN_HW:
	case AMDGPUISD::COS_HW: {
	// TODO: Need check for infinity
	return SNaN;
	}
	case ISD::INTRINSIC_WO_CHAIN: {
	unsigned IntrinsicID = Op.getConstantOperandVal(0);
	// TODO: Handle more intrinsics
	switch (IntrinsicID) {
	case Intrinsic::amdgcn_cubeid:
	return true;

	case Intrinsic::amdgcn_frexp_mant: {
	if (SNaN)
	return true;
	return DAG.isKnownNeverNaN(Op.getOperand(1), SNaN, Depth + 1);
	}
	case Intrinsic::amdgcn_cvt_pkrtz: {
	if (SNaN)
	return true;
	return DAG.isKnownNeverNaN(Op.getOperand(1), SNaN, Depth + 1) &&
	DAG.isKnownNeverNaN(Op.getOperand(2), SNaN, Depth + 1);
	}
	case Intrinsic::amdgcn_rcp:
	case Intrinsic::amdgcn_rsq:
	case Intrinsic::amdgcn_rcp_legacy:
	case Intrinsic::amdgcn_rsq_legacy:
	case Intrinsic::amdgcn_rsq_clamp: {
	if (SNaN)
	return true;

	// TODO: Need is known positive check.
	return false;
	}
	case Intrinsic::amdgcn_trig_preop:
	case Intrinsic::amdgcn_fdot2:
	// TODO: Refine on operand
	return SNaN;
	case Intrinsic::amdgcn_fma_legacy:
	if (SNaN)
	return true;
	return DAG.isKnownNeverNaN(Op.getOperand(1), SNaN, Depth + 1) &&
	DAG.isKnownNeverNaN(Op.getOperand(2), SNaN, Depth + 1) &&
	DAG.isKnownNeverNaN(Op.getOperand(3), SNaN, Depth + 1);
	default:
	return false;
	}
	}
	default:
	return false;
	}
	}

	bool AMDGPUTargetLowering::isReassocProfitable(MachineRegisterInfo &MRI,
	Register N0, Register N1) const {
	return MRI.hasOneNonDBGUse(N0); // FIXME: handle regbanks
	}

	TargetLowering::AtomicExpansionKind
	AMDGPUTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const {
	switch (RMW->getOperation()) {
	case AtomicRMWInst::Nand:
	case AtomicRMWInst::FAdd:
	case AtomicRMWInst::FSub:
	case AtomicRMWInst::FMax:
	case AtomicRMWInst::FMin:
	return AtomicExpansionKind::CmpXChg;
	case AtomicRMWInst::Xchg: {
	const DataLayout &DL = RMW->getFunction()->getDataLayout();
	unsigned ValSize = DL.getTypeSizeInBits(RMW->getType());
	if (ValSize == 32 \|\| ValSize == 64)
	return AtomicExpansionKind::None;
	return AtomicExpansionKind::CmpXChg;
	}
	default: {
	if (auto *IntTy = dyn_cast<IntegerType>(RMW->getType())) {
	unsigned Size = IntTy->getBitWidth();
	if (Size == 32 \|\| Size == 64)
	return AtomicExpansionKind::None;
	}

	return AtomicExpansionKind::CmpXChg;
	}
	}
	}

	/// Whether it is profitable to sink the operands of an
	/// Instruction I to the basic block of I.
	/// This helps using several modifiers (like abs and neg) more often.
	bool AMDGPUTargetLowering::shouldSinkOperands(
	Instruction I, SmallVectorImpl<Use > &Ops) const {
	using namespace PatternMatch;

	for (auto &Op : I->operands()) {
	// Ensure we are not already sinking this operand.
	if (any_of(Ops, [&](Use *U) { return U->get() == Op.get(); }))
	continue;

	if (match(&Op, m_FAbs(m_Value())) \|\| match(&Op, m_FNeg(m_Value())))
	Ops.push_back(&Op);
	}

	return !Ops.empty();
	}
	diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
	index 21cf4d9eeac1..758de9d732fa 100644
	--- a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
	+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
	@@ -1,18860 +1,18861 @@
	//===-- PPCISelLowering.cpp - PPC DAG Lowering Implementation -------------===//
	//
	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
	// See https://llvm.org/LICENSE.txt for license information.
	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
	//
	//===----------------------------------------------------------------------===//
	//
	// This file implements the PPCISelLowering class.
	//
	//===----------------------------------------------------------------------===//

	#include "PPCISelLowering.h"
	#include "MCTargetDesc/PPCMCTargetDesc.h"
	#include "MCTargetDesc/PPCPredicates.h"
	#include "PPC.h"
	#include "PPCCCState.h"
	#include "PPCCallingConv.h"
	#include "PPCFrameLowering.h"
	#include "PPCInstrInfo.h"
	#include "PPCMachineFunctionInfo.h"
	#include "PPCPerfectShuffle.h"
	#include "PPCRegisterInfo.h"
	#include "PPCSubtarget.h"
	#include "PPCTargetMachine.h"
	#include "llvm/ADT/APFloat.h"
	#include "llvm/ADT/APInt.h"
	#include "llvm/ADT/APSInt.h"
	#include "llvm/ADT/ArrayRef.h"
	#include "llvm/ADT/DenseMap.h"
	#include "llvm/ADT/STLExtras.h"
	#include "llvm/ADT/SmallPtrSet.h"
	#include "llvm/ADT/SmallSet.h"
	#include "llvm/ADT/SmallVector.h"
	#include "llvm/ADT/Statistic.h"
	#include "llvm/ADT/StringRef.h"
	#include "llvm/ADT/StringSwitch.h"
	#include "llvm/CodeGen/CallingConvLower.h"
	#include "llvm/CodeGen/ISDOpcodes.h"
	#include "llvm/CodeGen/MachineBasicBlock.h"
	#include "llvm/CodeGen/MachineFrameInfo.h"
	#include "llvm/CodeGen/MachineFunction.h"
	#include "llvm/CodeGen/MachineInstr.h"
	#include "llvm/CodeGen/MachineInstrBuilder.h"
	#include "llvm/CodeGen/MachineJumpTableInfo.h"
	#include "llvm/CodeGen/MachineLoopInfo.h"
	#include "llvm/CodeGen/MachineMemOperand.h"
	#include "llvm/CodeGen/MachineModuleInfo.h"
	#include "llvm/CodeGen/MachineOperand.h"
	#include "llvm/CodeGen/MachineRegisterInfo.h"
	#include "llvm/CodeGen/RuntimeLibcallUtil.h"
	#include "llvm/CodeGen/SelectionDAG.h"
	#include "llvm/CodeGen/SelectionDAGNodes.h"
	#include "llvm/CodeGen/TargetInstrInfo.h"
	#include "llvm/CodeGen/TargetLowering.h"
	#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
	#include "llvm/CodeGen/TargetRegisterInfo.h"
	#include "llvm/CodeGen/ValueTypes.h"
	#include "llvm/CodeGenTypes/MachineValueType.h"
	#include "llvm/IR/CallingConv.h"
	#include "llvm/IR/Constant.h"
	#include "llvm/IR/Constants.h"
	#include "llvm/IR/DataLayout.h"
	#include "llvm/IR/DebugLoc.h"
	#include "llvm/IR/DerivedTypes.h"
	#include "llvm/IR/Function.h"
	#include "llvm/IR/GlobalValue.h"
	#include "llvm/IR/IRBuilder.h"
	#include "llvm/IR/Instructions.h"
	#include "llvm/IR/Intrinsics.h"
	#include "llvm/IR/IntrinsicsPowerPC.h"
	#include "llvm/IR/Module.h"
	#include "llvm/IR/Type.h"
	#include "llvm/IR/Use.h"
	#include "llvm/IR/Value.h"
	#include "llvm/MC/MCContext.h"
	#include "llvm/MC/MCExpr.h"
	#include "llvm/MC/MCRegisterInfo.h"
	#include "llvm/MC/MCSectionXCOFF.h"
	#include "llvm/MC/MCSymbolXCOFF.h"
	#include "llvm/Support/AtomicOrdering.h"
	#include "llvm/Support/BranchProbability.h"
	#include "llvm/Support/Casting.h"
	#include "llvm/Support/CodeGen.h"
	#include "llvm/Support/CommandLine.h"
	#include "llvm/Support/Compiler.h"
	#include "llvm/Support/Debug.h"
	#include "llvm/Support/ErrorHandling.h"
	#include "llvm/Support/Format.h"
	#include "llvm/Support/KnownBits.h"
	#include "llvm/Support/MathExtras.h"
	#include "llvm/Support/raw_ostream.h"
	#include "llvm/Target/TargetMachine.h"
	#include "llvm/Target/TargetOptions.h"
	#include <algorithm>
	#include <cassert>
	#include <cstdint>
	#include <iterator>
	#include <list>
	#include <optional>
	#include <utility>
	#include <vector>

	using namespace llvm;

	#define DEBUG_TYPE "ppc-lowering"

	static cl::opt<bool> DisablePPCPreinc("disable-ppc-preinc",
	cl::desc("disable preincrement load/store generation on PPC"), cl::Hidden);

	static cl::opt<bool> DisableILPPref("disable-ppc-ilp-pref",
	cl::desc("disable setting the node scheduling preference to ILP on PPC"), cl::Hidden);

	static cl::opt<bool> DisablePPCUnaligned("disable-ppc-unaligned",
	cl::desc("disable unaligned load/store generation on PPC"), cl::Hidden);

	static cl::opt<bool> DisableSCO("disable-ppc-sco",
	cl::desc("disable sibling call optimization on ppc"), cl::Hidden);

	static cl::opt<bool> DisableInnermostLoopAlign32("disable-ppc-innermost-loop-align32",
	cl::desc("don't always align innermost loop to 32 bytes on ppc"), cl::Hidden);

	static cl::opt<bool> UseAbsoluteJumpTables("ppc-use-absolute-jumptables",
	cl::desc("use absolute jump tables on ppc"), cl::Hidden);

	static cl::opt<bool>
	DisablePerfectShuffle("ppc-disable-perfect-shuffle",
	cl::desc("disable vector permute decomposition"),
	cl::init(true), cl::Hidden);

	cl::opt<bool> DisableAutoPairedVecSt(
	"disable-auto-paired-vec-st",
	cl::desc("disable automatically generated 32byte paired vector stores"),
	cl::init(true), cl::Hidden);

	static cl::opt<unsigned> PPCMinimumJumpTableEntries(
	"ppc-min-jump-table-entries", cl::init(64), cl::Hidden,
	cl::desc("Set minimum number of entries to use a jump table on PPC"));

	static cl::opt<unsigned> PPCGatherAllAliasesMaxDepth(
	"ppc-gather-alias-max-depth", cl::init(18), cl::Hidden,
	cl::desc("max depth when checking alias info in GatherAllAliases()"));

	static cl::opt<unsigned> PPCAIXTLSModelOptUseIEForLDLimit(
	"ppc-aix-shared-lib-tls-model-opt-limit", cl::init(1), cl::Hidden,
	cl::desc("Set inclusive limit count of TLS local-dynamic access(es) in a "
	"function to use initial-exec"));

	STATISTIC(NumTailCalls, "Number of tail calls");
	STATISTIC(NumSiblingCalls, "Number of sibling calls");
	STATISTIC(ShufflesHandledWithVPERM,
	"Number of shuffles lowered to a VPERM or XXPERM");
	STATISTIC(NumDynamicAllocaProbed, "Number of dynamic stack allocation probed");

	static bool isNByteElemShuffleMask(ShuffleVectorSDNode *, unsigned, int);

	static SDValue widenVec(SelectionDAG &DAG, SDValue Vec, const SDLoc &dl);

	static const char AIXSSPCanaryWordName[] = "__ssp_canary_word";

	// A faster local-[exec\|dynamic] TLS access sequence (enabled with the
	// -maix-small-local-[exec\|dynamic]-tls option) can be produced for TLS
	// variables; consistent with the IBM XL compiler, we apply a max size of
	// slightly under 32KB.
	constexpr uint64_t AIXSmallTlsPolicySizeLimit = 32751;

	// FIXME: Remove this once the bug has been fixed!
	extern cl::opt<bool> ANDIGlueBug;

	PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
	const PPCSubtarget &STI)
	: TargetLowering(TM), Subtarget(STI) {
	// Initialize map that relates the PPC addressing modes to the computed flags
	// of a load/store instruction. The map is used to determine the optimal
	// addressing mode when selecting load and stores.
	initializeAddrModeMap();
	// On PPC32/64, arguments smaller than 4/8 bytes are extended, so all
	// arguments are at least 4/8 bytes aligned.
	bool isPPC64 = Subtarget.isPPC64();
	setMinStackArgumentAlignment(isPPC64 ? Align(8) : Align(4));

	// Set up the register classes.
	addRegisterClass(MVT::i32, &PPC::GPRCRegClass);
	if (!useSoftFloat()) {
	if (hasSPE()) {
	addRegisterClass(MVT::f32, &PPC::GPRCRegClass);
	// EFPU2 APU only supports f32
	if (!Subtarget.hasEFPU2())
	addRegisterClass(MVT::f64, &PPC::SPERCRegClass);
	} else {
	addRegisterClass(MVT::f32, &PPC::F4RCRegClass);
	addRegisterClass(MVT::f64, &PPC::F8RCRegClass);
	}
	}

	// Match BITREVERSE to customized fast code sequence in the td file.
	setOperationAction(ISD::BITREVERSE, MVT::i32, Legal);
	setOperationAction(ISD::BITREVERSE, MVT::i64, Legal);

	// Sub-word ATOMIC_CMP_SWAP need to ensure that the input is zero-extended.
	setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i32, Custom);

	// Custom lower inline assembly to check for special registers.
	setOperationAction(ISD::INLINEASM, MVT::Other, Custom);
	setOperationAction(ISD::INLINEASM_BR, MVT::Other, Custom);

	// PowerPC has an i16 but no i8 (or i1) SEXTLOAD.
	for (MVT VT : MVT::integer_valuetypes()) {
	setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
	setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i8, Expand);
	}

	if (Subtarget.isISA3_0()) {
	setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Legal);
	setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Legal);
	setTruncStoreAction(MVT::f64, MVT::f16, Legal);
	setTruncStoreAction(MVT::f32, MVT::f16, Legal);
	} else {
	// No extending loads from f16 or HW conversions back and forth.
	setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
	setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand);
	setOperationAction(ISD::FP_TO_FP16, MVT::f64, Expand);
	setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
	setOperationAction(ISD::FP16_TO_FP, MVT::f32, Expand);
	setOperationAction(ISD::FP_TO_FP16, MVT::f32, Expand);
	setTruncStoreAction(MVT::f64, MVT::f16, Expand);
	setTruncStoreAction(MVT::f32, MVT::f16, Expand);
	}

	setTruncStoreAction(MVT::f64, MVT::f32, Expand);

	// PowerPC has pre-inc load and store's.
	setIndexedLoadAction(ISD::PRE_INC, MVT::i1, Legal);
	setIndexedLoadAction(ISD::PRE_INC, MVT::i8, Legal);
	setIndexedLoadAction(ISD::PRE_INC, MVT::i16, Legal);
	setIndexedLoadAction(ISD::PRE_INC, MVT::i32, Legal);
	setIndexedLoadAction(ISD::PRE_INC, MVT::i64, Legal);
	setIndexedStoreAction(ISD::PRE_INC, MVT::i1, Legal);
	setIndexedStoreAction(ISD::PRE_INC, MVT::i8, Legal);
	setIndexedStoreAction(ISD::PRE_INC, MVT::i16, Legal);
	setIndexedStoreAction(ISD::PRE_INC, MVT::i32, Legal);
	setIndexedStoreAction(ISD::PRE_INC, MVT::i64, Legal);
	if (!Subtarget.hasSPE()) {
	setIndexedLoadAction(ISD::PRE_INC, MVT::f32, Legal);
	setIndexedLoadAction(ISD::PRE_INC, MVT::f64, Legal);
	setIndexedStoreAction(ISD::PRE_INC, MVT::f32, Legal);
	setIndexedStoreAction(ISD::PRE_INC, MVT::f64, Legal);
	}

	// PowerPC uses ADDC/ADDE/SUBC/SUBE to propagate carry.
	const MVT ScalarIntVTs[] = { MVT::i32, MVT::i64 };
	for (MVT VT : ScalarIntVTs) {
	setOperationAction(ISD::ADDC, VT, Legal);
	setOperationAction(ISD::ADDE, VT, Legal);
	setOperationAction(ISD::SUBC, VT, Legal);
	setOperationAction(ISD::SUBE, VT, Legal);
	}

	if (Subtarget.useCRBits()) {
	setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);

	if (isPPC64 \|\| Subtarget.hasFPCVT()) {
	setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i1, Promote);
	AddPromotedToType(ISD::STRICT_SINT_TO_FP, MVT::i1,
	isPPC64 ? MVT::i64 : MVT::i32);
	setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i1, Promote);
	AddPromotedToType(ISD::STRICT_UINT_TO_FP, MVT::i1,
	isPPC64 ? MVT::i64 : MVT::i32);

	setOperationAction(ISD::SINT_TO_FP, MVT::i1, Promote);
	AddPromotedToType (ISD::SINT_TO_FP, MVT::i1,
	isPPC64 ? MVT::i64 : MVT::i32);
	setOperationAction(ISD::UINT_TO_FP, MVT::i1, Promote);
	AddPromotedToType(ISD::UINT_TO_FP, MVT::i1,
	isPPC64 ? MVT::i64 : MVT::i32);

	setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i1, Promote);
	AddPromotedToType(ISD::STRICT_FP_TO_SINT, MVT::i1,
	isPPC64 ? MVT::i64 : MVT::i32);
	setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i1, Promote);
	AddPromotedToType(ISD::STRICT_FP_TO_UINT, MVT::i1,
	isPPC64 ? MVT::i64 : MVT::i32);

	setOperationAction(ISD::FP_TO_SINT, MVT::i1, Promote);
	AddPromotedToType(ISD::FP_TO_SINT, MVT::i1,
	isPPC64 ? MVT::i64 : MVT::i32);
	setOperationAction(ISD::FP_TO_UINT, MVT::i1, Promote);
	AddPromotedToType(ISD::FP_TO_UINT, MVT::i1,
	isPPC64 ? MVT::i64 : MVT::i32);
	} else {
	setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i1, Custom);
	setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i1, Custom);
	setOperationAction(ISD::SINT_TO_FP, MVT::i1, Custom);
	setOperationAction(ISD::UINT_TO_FP, MVT::i1, Custom);
	}

	// PowerPC does not support direct load/store of condition registers.
	setOperationAction(ISD::LOAD, MVT::i1, Custom);
	setOperationAction(ISD::STORE, MVT::i1, Custom);

	// FIXME: Remove this once the ANDI glue bug is fixed:
	if (ANDIGlueBug)
	setOperationAction(ISD::TRUNCATE, MVT::i1, Custom);

	for (MVT VT : MVT::integer_valuetypes()) {
	setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
	setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i1, Promote);
	setTruncStoreAction(VT, MVT::i1, Expand);
	}

	addRegisterClass(MVT::i1, &PPC::CRBITRCRegClass);
	}

	// Expand ppcf128 to i32 by hand for the benefit of llvm-gcc bootstrap on
	// PPC (the libcall is not available).
	setOperationAction(ISD::FP_TO_SINT, MVT::ppcf128, Custom);
	setOperationAction(ISD::FP_TO_UINT, MVT::ppcf128, Custom);
	setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::ppcf128, Custom);
	setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::ppcf128, Custom);

	// We do not currently implement these libm ops for PowerPC.
	setOperationAction(ISD::FFLOOR, MVT::ppcf128, Expand);
	setOperationAction(ISD::FCEIL, MVT::ppcf128, Expand);
	setOperationAction(ISD::FTRUNC, MVT::ppcf128, Expand);
	setOperationAction(ISD::FRINT, MVT::ppcf128, Expand);
	setOperationAction(ISD::FNEARBYINT, MVT::ppcf128, Expand);
	setOperationAction(ISD::FREM, MVT::ppcf128, Expand);

	// PowerPC has no SREM/UREM instructions unless we are on P9
	// On P9 we may use a hardware instruction to compute the remainder.
	// When the result of both the remainder and the division is required it is
	// more efficient to compute the remainder from the result of the division
	// rather than use the remainder instruction. The instructions are legalized
	// directly because the DivRemPairsPass performs the transformation at the IR
	// level.
	if (Subtarget.isISA3_0()) {
	setOperationAction(ISD::SREM, MVT::i32, Legal);
	setOperationAction(ISD::UREM, MVT::i32, Legal);
	setOperationAction(ISD::SREM, MVT::i64, Legal);
	setOperationAction(ISD::UREM, MVT::i64, Legal);
	} else {
	setOperationAction(ISD::SREM, MVT::i32, Expand);
	setOperationAction(ISD::UREM, MVT::i32, Expand);
	setOperationAction(ISD::SREM, MVT::i64, Expand);
	setOperationAction(ISD::UREM, MVT::i64, Expand);
	}

	// Don't use SMUL_LOHI/UMUL_LOHI or SDIVREM/UDIVREM to lower SREM/UREM.
	setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand);
	setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand);
	setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand);
	setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand);
	setOperationAction(ISD::UDIVREM, MVT::i32, Expand);
	setOperationAction(ISD::SDIVREM, MVT::i32, Expand);
	setOperationAction(ISD::UDIVREM, MVT::i64, Expand);
	setOperationAction(ISD::SDIVREM, MVT::i64, Expand);

	// Handle constrained floating-point operations of scalar.
	// TODO: Handle SPE specific operation.
	setOperationAction(ISD::STRICT_FADD, MVT::f32, Legal);
	setOperationAction(ISD::STRICT_FSUB, MVT::f32, Legal);
	setOperationAction(ISD::STRICT_FMUL, MVT::f32, Legal);
	setOperationAction(ISD::STRICT_FDIV, MVT::f32, Legal);
	setOperationAction(ISD::STRICT_FP_ROUND, MVT::f32, Legal);

	setOperationAction(ISD::STRICT_FADD, MVT::f64, Legal);
	setOperationAction(ISD::STRICT_FSUB, MVT::f64, Legal);
	setOperationAction(ISD::STRICT_FMUL, MVT::f64, Legal);
	setOperationAction(ISD::STRICT_FDIV, MVT::f64, Legal);

	if (!Subtarget.hasSPE()) {
	setOperationAction(ISD::STRICT_FMA, MVT::f32, Legal);
	setOperationAction(ISD::STRICT_FMA, MVT::f64, Legal);
	}

	if (Subtarget.hasVSX()) {
	setOperationAction(ISD::STRICT_FRINT, MVT::f32, Legal);
	setOperationAction(ISD::STRICT_FRINT, MVT::f64, Legal);
	}

	if (Subtarget.hasFSQRT()) {
	setOperationAction(ISD::STRICT_FSQRT, MVT::f32, Legal);
	setOperationAction(ISD::STRICT_FSQRT, MVT::f64, Legal);
	}

	if (Subtarget.hasFPRND()) {
	setOperationAction(ISD::STRICT_FFLOOR, MVT::f32, Legal);
	setOperationAction(ISD::STRICT_FCEIL, MVT::f32, Legal);
	setOperationAction(ISD::STRICT_FTRUNC, MVT::f32, Legal);
	setOperationAction(ISD::STRICT_FROUND, MVT::f32, Legal);

	setOperationAction(ISD::STRICT_FFLOOR, MVT::f64, Legal);
	setOperationAction(ISD::STRICT_FCEIL, MVT::f64, Legal);
	setOperationAction(ISD::STRICT_FTRUNC, MVT::f64, Legal);
	setOperationAction(ISD::STRICT_FROUND, MVT::f64, Legal);
	}

	// We don't support sin/cos/sqrt/fmod/pow
	setOperationAction(ISD::FSIN , MVT::f64, Expand);
	setOperationAction(ISD::FCOS , MVT::f64, Expand);
	setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
	setOperationAction(ISD::FREM , MVT::f64, Expand);
	setOperationAction(ISD::FPOW , MVT::f64, Expand);
	setOperationAction(ISD::FSIN , MVT::f32, Expand);
	setOperationAction(ISD::FCOS , MVT::f32, Expand);
	setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
	setOperationAction(ISD::FREM , MVT::f32, Expand);
	setOperationAction(ISD::FPOW , MVT::f32, Expand);

	// MASS transformation for LLVM intrinsics with replicating fast-math flag
	// to be consistent to PPCGenScalarMASSEntries pass
	if (TM.getOptLevel() == CodeGenOptLevel::Aggressive) {
	setOperationAction(ISD::FSIN , MVT::f64, Custom);
	setOperationAction(ISD::FCOS , MVT::f64, Custom);
	setOperationAction(ISD::FPOW , MVT::f64, Custom);
	setOperationAction(ISD::FLOG, MVT::f64, Custom);
	setOperationAction(ISD::FLOG10, MVT::f64, Custom);
	setOperationAction(ISD::FEXP, MVT::f64, Custom);
	setOperationAction(ISD::FSIN , MVT::f32, Custom);
	setOperationAction(ISD::FCOS , MVT::f32, Custom);
	setOperationAction(ISD::FPOW , MVT::f32, Custom);
	setOperationAction(ISD::FLOG, MVT::f32, Custom);
	setOperationAction(ISD::FLOG10, MVT::f32, Custom);
	setOperationAction(ISD::FEXP, MVT::f32, Custom);
	}

	if (Subtarget.hasSPE()) {
	setOperationAction(ISD::FMA , MVT::f64, Expand);
	setOperationAction(ISD::FMA , MVT::f32, Expand);
	} else {
	setOperationAction(ISD::FMA , MVT::f64, Legal);
	setOperationAction(ISD::FMA , MVT::f32, Legal);
	}

	if (Subtarget.hasSPE())
	setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);

	setOperationAction(ISD::GET_ROUNDING, MVT::i32, Custom);

	// If we're enabling GP optimizations, use hardware square root
	if (!Subtarget.hasFSQRT() &&
	!(TM.Options.UnsafeFPMath && Subtarget.hasFRSQRTE() &&
	Subtarget.hasFRE()))
	setOperationAction(ISD::FSQRT, MVT::f64, Expand);

	if (!Subtarget.hasFSQRT() &&
	!(TM.Options.UnsafeFPMath && Subtarget.hasFRSQRTES() &&
	Subtarget.hasFRES()))
	setOperationAction(ISD::FSQRT, MVT::f32, Expand);

	if (Subtarget.hasFCPSGN()) {
	setOperationAction(ISD::FCOPYSIGN, MVT::f64, Legal);
	setOperationAction(ISD::FCOPYSIGN, MVT::f32, Legal);
	} else {
	setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
	setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
	}

	if (Subtarget.hasFPRND()) {
	setOperationAction(ISD::FFLOOR, MVT::f64, Legal);
	setOperationAction(ISD::FCEIL, MVT::f64, Legal);
	setOperationAction(ISD::FTRUNC, MVT::f64, Legal);
	setOperationAction(ISD::FROUND, MVT::f64, Legal);

	setOperationAction(ISD::FFLOOR, MVT::f32, Legal);
	setOperationAction(ISD::FCEIL, MVT::f32, Legal);
	setOperationAction(ISD::FTRUNC, MVT::f32, Legal);
	setOperationAction(ISD::FROUND, MVT::f32, Legal);
	}

	// Prior to P10, PowerPC does not have BSWAP, but we can use vector BSWAP
	// instruction xxbrd to speed up scalar BSWAP64.
	if (Subtarget.isISA3_1()) {
	setOperationAction(ISD::BSWAP, MVT::i32, Legal);
	setOperationAction(ISD::BSWAP, MVT::i64, Legal);
	} else {
	setOperationAction(ISD::BSWAP, MVT::i32, Expand);
	setOperationAction(
	ISD::BSWAP, MVT::i64,
	(Subtarget.hasP9Vector() && Subtarget.isPPC64()) ? Custom : Expand);
	}

	// CTPOP or CTTZ were introduced in P8/P9 respectively
	if (Subtarget.isISA3_0()) {
	setOperationAction(ISD::CTTZ , MVT::i32 , Legal);
	setOperationAction(ISD::CTTZ , MVT::i64 , Legal);
	} else {
	setOperationAction(ISD::CTTZ , MVT::i32 , Expand);
	setOperationAction(ISD::CTTZ , MVT::i64 , Expand);
	}

	if (Subtarget.hasPOPCNTD() == PPCSubtarget::POPCNTD_Fast) {
	setOperationAction(ISD::CTPOP, MVT::i32 , Legal);
	setOperationAction(ISD::CTPOP, MVT::i64 , Legal);
	} else {
	setOperationAction(ISD::CTPOP, MVT::i32 , Expand);
	setOperationAction(ISD::CTPOP, MVT::i64 , Expand);
	}

	// PowerPC does not have ROTR
	setOperationAction(ISD::ROTR, MVT::i32 , Expand);
	setOperationAction(ISD::ROTR, MVT::i64 , Expand);

	if (!Subtarget.useCRBits()) {
	// PowerPC does not have Select
	setOperationAction(ISD::SELECT, MVT::i32, Expand);
	setOperationAction(ISD::SELECT, MVT::i64, Expand);
	setOperationAction(ISD::SELECT, MVT::f32, Expand);
	setOperationAction(ISD::SELECT, MVT::f64, Expand);
	}

	// PowerPC wants to turn select_cc of FP into fsel when possible.
	setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
	setOperationAction(ISD::SELECT_CC, MVT::f64, Custom);

	// PowerPC wants to optimize integer setcc a bit
	if (!Subtarget.useCRBits())
	setOperationAction(ISD::SETCC, MVT::i32, Custom);

	if (Subtarget.hasFPU()) {
	setOperationAction(ISD::STRICT_FSETCC, MVT::f32, Legal);
	setOperationAction(ISD::STRICT_FSETCC, MVT::f64, Legal);
	setOperationAction(ISD::STRICT_FSETCC, MVT::f128, Legal);

	setOperationAction(ISD::STRICT_FSETCCS, MVT::f32, Legal);
	setOperationAction(ISD::STRICT_FSETCCS, MVT::f64, Legal);
	setOperationAction(ISD::STRICT_FSETCCS, MVT::f128, Legal);
	}

	// PowerPC does not have BRCOND which requires SetCC
	if (!Subtarget.useCRBits())
	setOperationAction(ISD::BRCOND, MVT::Other, Expand);

	setOperationAction(ISD::BR_JT, MVT::Other, Expand);

	if (Subtarget.hasSPE()) {
	// SPE has built-in conversions
	setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i32, Legal);
	setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i32, Legal);
	setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i32, Legal);
	setOperationAction(ISD::FP_TO_SINT, MVT::i32, Legal);
	setOperationAction(ISD::SINT_TO_FP, MVT::i32, Legal);
	setOperationAction(ISD::UINT_TO_FP, MVT::i32, Legal);

	// SPE supports signaling compare of f32/f64.
	setOperationAction(ISD::STRICT_FSETCCS, MVT::f32, Legal);
	setOperationAction(ISD::STRICT_FSETCCS, MVT::f64, Legal);
	} else {
	// PowerPC turns FP_TO_SINT into FCTIWZ and some load/stores.
	setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i32, Custom);
	setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);

	// PowerPC does not have [U\|S]INT_TO_FP
	setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i32, Expand);
	setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i32, Expand);
	setOperationAction(ISD::SINT_TO_FP, MVT::i32, Expand);
	setOperationAction(ISD::UINT_TO_FP, MVT::i32, Expand);
	}

	if (Subtarget.hasDirectMove() && isPPC64) {
	setOperationAction(ISD::BITCAST, MVT::f32, Legal);
	setOperationAction(ISD::BITCAST, MVT::i32, Legal);
	setOperationAction(ISD::BITCAST, MVT::i64, Legal);
	setOperationAction(ISD::BITCAST, MVT::f64, Legal);
	if (TM.Options.UnsafeFPMath) {
	setOperationAction(ISD::LRINT, MVT::f64, Legal);
	setOperationAction(ISD::LRINT, MVT::f32, Legal);
	setOperationAction(ISD::LLRINT, MVT::f64, Legal);
	setOperationAction(ISD::LLRINT, MVT::f32, Legal);
	setOperationAction(ISD::LROUND, MVT::f64, Legal);
	setOperationAction(ISD::LROUND, MVT::f32, Legal);
	setOperationAction(ISD::LLROUND, MVT::f64, Legal);
	setOperationAction(ISD::LLROUND, MVT::f32, Legal);
	}
	} else {
	setOperationAction(ISD::BITCAST, MVT::f32, Expand);
	setOperationAction(ISD::BITCAST, MVT::i32, Expand);
	setOperationAction(ISD::BITCAST, MVT::i64, Expand);
	setOperationAction(ISD::BITCAST, MVT::f64, Expand);
	}

	// We cannot sextinreg(i1). Expand to shifts.
	setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);

	// NOTE: EH_SJLJ_SETJMP/_LONGJMP supported here is NOT intended to support
	// SjLj exception handling but a light-weight setjmp/longjmp replacement to
	// support continuation, user-level threading, and etc.. As a result, no
	// other SjLj exception interfaces are implemented and please don't build
	// your own exception handling based on them.
	// LLVM/Clang supports zero-cost DWARF exception handling.
	setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom);
	setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom);

	// We want to legalize GlobalAddress and ConstantPool nodes into the
	// appropriate instructions to materialize the address.
	setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
	setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom);
	setOperationAction(ISD::BlockAddress, MVT::i32, Custom);
	setOperationAction(ISD::ConstantPool, MVT::i32, Custom);
	setOperationAction(ISD::JumpTable, MVT::i32, Custom);
	setOperationAction(ISD::GlobalAddress, MVT::i64, Custom);
	setOperationAction(ISD::GlobalTLSAddress, MVT::i64, Custom);
	setOperationAction(ISD::BlockAddress, MVT::i64, Custom);
	setOperationAction(ISD::ConstantPool, MVT::i64, Custom);
	setOperationAction(ISD::JumpTable, MVT::i64, Custom);

	// TRAP is legal.
	setOperationAction(ISD::TRAP, MVT::Other, Legal);

	// TRAMPOLINE is custom lowered.
	setOperationAction(ISD::INIT_TRAMPOLINE, MVT::Other, Custom);
	setOperationAction(ISD::ADJUST_TRAMPOLINE, MVT::Other, Custom);

	// VASTART needs to be custom lowered to use the VarArgsFrameIndex
	setOperationAction(ISD::VASTART , MVT::Other, Custom);

	if (Subtarget.is64BitELFABI()) {
	// VAARG always uses double-word chunks, so promote anything smaller.
	setOperationAction(ISD::VAARG, MVT::i1, Promote);
	AddPromotedToType(ISD::VAARG, MVT::i1, MVT::i64);
	setOperationAction(ISD::VAARG, MVT::i8, Promote);
	AddPromotedToType(ISD::VAARG, MVT::i8, MVT::i64);
	setOperationAction(ISD::VAARG, MVT::i16, Promote);
	AddPromotedToType(ISD::VAARG, MVT::i16, MVT::i64);
	setOperationAction(ISD::VAARG, MVT::i32, Promote);
	AddPromotedToType(ISD::VAARG, MVT::i32, MVT::i64);
	setOperationAction(ISD::VAARG, MVT::Other, Expand);
	} else if (Subtarget.is32BitELFABI()) {
	// VAARG is custom lowered with the 32-bit SVR4 ABI.
	setOperationAction(ISD::VAARG, MVT::Other, Custom);
	setOperationAction(ISD::VAARG, MVT::i64, Custom);
	} else
	setOperationAction(ISD::VAARG, MVT::Other, Expand);

	// VACOPY is custom lowered with the 32-bit SVR4 ABI.
	if (Subtarget.is32BitELFABI())
	setOperationAction(ISD::VACOPY , MVT::Other, Custom);
	else
	setOperationAction(ISD::VACOPY , MVT::Other, Expand);

	// Use the default implementation.
	setOperationAction(ISD::VAEND , MVT::Other, Expand);
	setOperationAction(ISD::STACKSAVE , MVT::Other, Expand);
	setOperationAction(ISD::STACKRESTORE , MVT::Other, Custom);
	setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Custom);
	setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64 , Custom);
	setOperationAction(ISD::GET_DYNAMIC_AREA_OFFSET, MVT::i32, Custom);
	setOperationAction(ISD::GET_DYNAMIC_AREA_OFFSET, MVT::i64, Custom);
	setOperationAction(ISD::EH_DWARF_CFA, MVT::i32, Custom);
	setOperationAction(ISD::EH_DWARF_CFA, MVT::i64, Custom);

	// We want to custom lower some of our intrinsics.
	setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
	setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::f64, Custom);
	setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::ppcf128, Custom);
	setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::v4f32, Custom);
	setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::v2f64, Custom);

	// To handle counter-based loop conditions.
	setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i1, Custom);

	setOperationAction(ISD::INTRINSIC_VOID, MVT::i8, Custom);
	setOperationAction(ISD::INTRINSIC_VOID, MVT::i16, Custom);
	setOperationAction(ISD::INTRINSIC_VOID, MVT::i32, Custom);
	setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);

	// Comparisons that require checking two conditions.
	if (Subtarget.hasSPE()) {
	setCondCodeAction(ISD::SETO, MVT::f32, Expand);
	setCondCodeAction(ISD::SETO, MVT::f64, Expand);
	setCondCodeAction(ISD::SETUO, MVT::f32, Expand);
	setCondCodeAction(ISD::SETUO, MVT::f64, Expand);
	}
	setCondCodeAction(ISD::SETULT, MVT::f32, Expand);
	setCondCodeAction(ISD::SETULT, MVT::f64, Expand);
	setCondCodeAction(ISD::SETUGT, MVT::f32, Expand);
	setCondCodeAction(ISD::SETUGT, MVT::f64, Expand);
	setCondCodeAction(ISD::SETUEQ, MVT::f32, Expand);
	setCondCodeAction(ISD::SETUEQ, MVT::f64, Expand);
	setCondCodeAction(ISD::SETOGE, MVT::f32, Expand);
	setCondCodeAction(ISD::SETOGE, MVT::f64, Expand);
	setCondCodeAction(ISD::SETOLE, MVT::f32, Expand);
	setCondCodeAction(ISD::SETOLE, MVT::f64, Expand);
	setCondCodeAction(ISD::SETONE, MVT::f32, Expand);
	setCondCodeAction(ISD::SETONE, MVT::f64, Expand);

	setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f32, Legal);
	setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f64, Legal);

	if (Subtarget.has64BitSupport()) {
	// They also have instructions for converting between i64 and fp.
	setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i64, Custom);
	setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i64, Expand);
	setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i64, Custom);
	setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i64, Expand);
	setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
	setOperationAction(ISD::FP_TO_UINT, MVT::i64, Expand);
	setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
	setOperationAction(ISD::UINT_TO_FP, MVT::i64, Expand);
	// This is just the low 32 bits of a (signed) fp->i64 conversion.
	// We cannot do this with Promote because i64 is not a legal type.
	setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i32, Custom);
	setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);

	if (Subtarget.hasLFIWAX() \|\| Subtarget.isPPC64()) {
	setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
	setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i32, Custom);
	}
	} else {
	// PowerPC does not have FP_TO_UINT on 32-bit implementations.
	if (Subtarget.hasSPE()) {
	setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i32, Legal);
	setOperationAction(ISD::FP_TO_UINT, MVT::i32, Legal);
	} else {
	setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i32, Expand);
	setOperationAction(ISD::FP_TO_UINT, MVT::i32, Expand);
	}
	}

	// With the instructions enabled under FPCVT, we can do everything.
	if (Subtarget.hasFPCVT()) {
	if (Subtarget.has64BitSupport()) {
	setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i64, Custom);
	setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i64, Custom);
	setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i64, Custom);
	setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i64, Custom);
	setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
	setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
	setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
	setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
	}

	setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i32, Custom);
	setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i32, Custom);
	setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i32, Custom);
	setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i32, Custom);
	setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
	setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
	setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
	setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom);
	}

	if (Subtarget.use64BitRegs()) {
	// 64-bit PowerPC implementations can support i64 types directly
	addRegisterClass(MVT::i64, &PPC::G8RCRegClass);
	// BUILD_PAIR can't be handled natively, and should be expanded to shl/or
	setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
	// 64-bit PowerPC wants to expand i128 shifts itself.
	setOperationAction(ISD::SHL_PARTS, MVT::i64, Custom);
	setOperationAction(ISD::SRA_PARTS, MVT::i64, Custom);
	setOperationAction(ISD::SRL_PARTS, MVT::i64, Custom);
	} else {
	// 32-bit PowerPC wants to expand i64 shifts itself.
	setOperationAction(ISD::SHL_PARTS, MVT::i32, Custom);
	setOperationAction(ISD::SRA_PARTS, MVT::i32, Custom);
	setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom);
	}

	// PowerPC has better expansions for funnel shifts than the generic
	// TargetLowering::expandFunnelShift.
	if (Subtarget.has64BitSupport()) {
	setOperationAction(ISD::FSHL, MVT::i64, Custom);
	setOperationAction(ISD::FSHR, MVT::i64, Custom);
	}
	setOperationAction(ISD::FSHL, MVT::i32, Custom);
	setOperationAction(ISD::FSHR, MVT::i32, Custom);

	if (Subtarget.hasVSX()) {
	setOperationAction(ISD::FMAXNUM_IEEE, MVT::f64, Legal);
	setOperationAction(ISD::FMAXNUM_IEEE, MVT::f32, Legal);
	setOperationAction(ISD::FMINNUM_IEEE, MVT::f64, Legal);
	setOperationAction(ISD::FMINNUM_IEEE, MVT::f32, Legal);
	}

	if (Subtarget.hasAltivec()) {
	for (MVT VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32 }) {
	setOperationAction(ISD::SADDSAT, VT, Legal);
	setOperationAction(ISD::SSUBSAT, VT, Legal);
	setOperationAction(ISD::UADDSAT, VT, Legal);
	setOperationAction(ISD::USUBSAT, VT, Legal);
	}
	// First set operation action for all vector types to expand. Then we
	// will selectively turn on ones that can be effectively codegen'd.
	for (MVT VT : MVT::fixedlen_vector_valuetypes()) {
	// add/sub are legal for all supported vector VT's.
	setOperationAction(ISD::ADD, VT, Legal);
	setOperationAction(ISD::SUB, VT, Legal);

	// For v2i64, these are only valid with P8Vector. This is corrected after
	// the loop.
	if (VT.getSizeInBits() <= 128 && VT.getScalarSizeInBits() <= 64) {
	setOperationAction(ISD::SMAX, VT, Legal);
	setOperationAction(ISD::SMIN, VT, Legal);
	setOperationAction(ISD::UMAX, VT, Legal);
	setOperationAction(ISD::UMIN, VT, Legal);
	}
	else {
	setOperationAction(ISD::SMAX, VT, Expand);
	setOperationAction(ISD::SMIN, VT, Expand);
	setOperationAction(ISD::UMAX, VT, Expand);
	setOperationAction(ISD::UMIN, VT, Expand);
	}

	if (Subtarget.hasVSX()) {
	setOperationAction(ISD::FMAXNUM, VT, Legal);
	setOperationAction(ISD::FMINNUM, VT, Legal);
	}

	// Vector instructions introduced in P8
	if (Subtarget.hasP8Altivec() && (VT.SimpleTy != MVT::v1i128)) {
	setOperationAction(ISD::CTPOP, VT, Legal);
	setOperationAction(ISD::CTLZ, VT, Legal);
	}
	else {
	setOperationAction(ISD::CTPOP, VT, Expand);
	setOperationAction(ISD::CTLZ, VT, Expand);
	}

	// Vector instructions introduced in P9
	if (Subtarget.hasP9Altivec() && (VT.SimpleTy != MVT::v1i128))
	setOperationAction(ISD::CTTZ, VT, Legal);
	else
	setOperationAction(ISD::CTTZ, VT, Expand);

	// We promote all shuffles to v16i8.
	setOperationAction(ISD::VECTOR_SHUFFLE, VT, Promote);
	AddPromotedToType (ISD::VECTOR_SHUFFLE, VT, MVT::v16i8);

	// We promote all non-typed operations to v4i32.
	setOperationAction(ISD::AND , VT, Promote);
	AddPromotedToType (ISD::AND , VT, MVT::v4i32);
	setOperationAction(ISD::OR , VT, Promote);
	AddPromotedToType (ISD::OR , VT, MVT::v4i32);
	setOperationAction(ISD::XOR , VT, Promote);
	AddPromotedToType (ISD::XOR , VT, MVT::v4i32);
	setOperationAction(ISD::LOAD , VT, Promote);
	AddPromotedToType (ISD::LOAD , VT, MVT::v4i32);
	setOperationAction(ISD::SELECT, VT, Promote);
	AddPromotedToType (ISD::SELECT, VT, MVT::v4i32);
	setOperationAction(ISD::VSELECT, VT, Legal);
	setOperationAction(ISD::SELECT_CC, VT, Promote);
	AddPromotedToType (ISD::SELECT_CC, VT, MVT::v4i32);
	setOperationAction(ISD::STORE, VT, Promote);
	AddPromotedToType (ISD::STORE, VT, MVT::v4i32);

	// No other operations are legal.
	setOperationAction(ISD::MUL , VT, Expand);
	setOperationAction(ISD::SDIV, VT, Expand);
	setOperationAction(ISD::SREM, VT, Expand);
	setOperationAction(ISD::UDIV, VT, Expand);
	setOperationAction(ISD::UREM, VT, Expand);
	setOperationAction(ISD::FDIV, VT, Expand);
	setOperationAction(ISD::FREM, VT, Expand);
	setOperationAction(ISD::FNEG, VT, Expand);
	setOperationAction(ISD::FSQRT, VT, Expand);
	setOperationAction(ISD::FLOG, VT, Expand);
	setOperationAction(ISD::FLOG10, VT, Expand);
	setOperationAction(ISD::FLOG2, VT, Expand);
	setOperationAction(ISD::FEXP, VT, Expand);
	setOperationAction(ISD::FEXP2, VT, Expand);
	setOperationAction(ISD::FSIN, VT, Expand);
	setOperationAction(ISD::FCOS, VT, Expand);
	setOperationAction(ISD::FABS, VT, Expand);
	setOperationAction(ISD::FFLOOR, VT, Expand);
	setOperationAction(ISD::FCEIL, VT, Expand);
	setOperationAction(ISD::FTRUNC, VT, Expand);
	setOperationAction(ISD::FRINT, VT, Expand);
	setOperationAction(ISD::FLDEXP, VT, Expand);
	setOperationAction(ISD::FNEARBYINT, VT, Expand);
	setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Expand);
	setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Expand);
	setOperationAction(ISD::BUILD_VECTOR, VT, Expand);
	setOperationAction(ISD::MULHU, VT, Expand);
	setOperationAction(ISD::MULHS, VT, Expand);
	setOperationAction(ISD::UMUL_LOHI, VT, Expand);
	setOperationAction(ISD::SMUL_LOHI, VT, Expand);
	setOperationAction(ISD::UDIVREM, VT, Expand);
	setOperationAction(ISD::SDIVREM, VT, Expand);
	setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Expand);
	setOperationAction(ISD::FPOW, VT, Expand);
	setOperationAction(ISD::BSWAP, VT, Expand);
	setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand);
	setOperationAction(ISD::ROTL, VT, Expand);
	setOperationAction(ISD::ROTR, VT, Expand);

	for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) {
	setTruncStoreAction(VT, InnerVT, Expand);
	setLoadExtAction(ISD::SEXTLOAD, VT, InnerVT, Expand);
	setLoadExtAction(ISD::ZEXTLOAD, VT, InnerVT, Expand);
	setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
	}
	}
	setOperationAction(ISD::SELECT_CC, MVT::v4i32, Expand);
	if (!Subtarget.hasP8Vector()) {
	setOperationAction(ISD::SMAX, MVT::v2i64, Expand);
	setOperationAction(ISD::SMIN, MVT::v2i64, Expand);
	setOperationAction(ISD::UMAX, MVT::v2i64, Expand);
	setOperationAction(ISD::UMIN, MVT::v2i64, Expand);
	}

	// We can custom expand all VECTOR_SHUFFLEs to VPERM, others we can handle
	// with merges, splats, etc.
	setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v16i8, Custom);

	// Vector truncates to sub-word integer that fit in an Altivec/VSX register
	// are cheap, so handle them before they get expanded to scalar.
	setOperationAction(ISD::TRUNCATE, MVT::v8i8, Custom);
	setOperationAction(ISD::TRUNCATE, MVT::v4i8, Custom);
	setOperationAction(ISD::TRUNCATE, MVT::v2i8, Custom);
	setOperationAction(ISD::TRUNCATE, MVT::v4i16, Custom);
	setOperationAction(ISD::TRUNCATE, MVT::v2i16, Custom);

	setOperationAction(ISD::AND , MVT::v4i32, Legal);
	setOperationAction(ISD::OR , MVT::v4i32, Legal);
	setOperationAction(ISD::XOR , MVT::v4i32, Legal);
	setOperationAction(ISD::LOAD , MVT::v4i32, Legal);
	setOperationAction(ISD::SELECT, MVT::v4i32,
	Subtarget.useCRBits() ? Legal : Expand);
	setOperationAction(ISD::STORE , MVT::v4i32, Legal);
	setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v4i32, Legal);
	setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v4i32, Legal);
	setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v4i32, Legal);
	setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v4i32, Legal);
	setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Legal);
	setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Legal);
	setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Legal);
	setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Legal);
	setOperationAction(ISD::FFLOOR, MVT::v4f32, Legal);
	setOperationAction(ISD::FCEIL, MVT::v4f32, Legal);
	setOperationAction(ISD::FTRUNC, MVT::v4f32, Legal);
	setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Legal);

	// Custom lowering ROTL v1i128 to VECTOR_SHUFFLE v16i8.
	setOperationAction(ISD::ROTL, MVT::v1i128, Custom);
	// With hasAltivec set, we can lower ISD::ROTL to vrl(b\|h\|w).
	if (Subtarget.hasAltivec())
	for (auto VT : {MVT::v4i32, MVT::v8i16, MVT::v16i8})
	setOperationAction(ISD::ROTL, VT, Legal);
	// With hasP8Altivec set, we can lower ISD::ROTL to vrld.
	if (Subtarget.hasP8Altivec())
	setOperationAction(ISD::ROTL, MVT::v2i64, Legal);

	addRegisterClass(MVT::v4f32, &PPC::VRRCRegClass);
	addRegisterClass(MVT::v4i32, &PPC::VRRCRegClass);
	addRegisterClass(MVT::v8i16, &PPC::VRRCRegClass);
	addRegisterClass(MVT::v16i8, &PPC::VRRCRegClass);

	setOperationAction(ISD::MUL, MVT::v4f32, Legal);
	setOperationAction(ISD::FMA, MVT::v4f32, Legal);

	if (Subtarget.hasVSX()) {
	setOperationAction(ISD::FDIV, MVT::v4f32, Legal);
	setOperationAction(ISD::FSQRT, MVT::v4f32, Legal);
	setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2f64, Custom);
	}

	if (Subtarget.hasP8Altivec())
	setOperationAction(ISD::MUL, MVT::v4i32, Legal);
	else
	setOperationAction(ISD::MUL, MVT::v4i32, Custom);

	if (Subtarget.isISA3_1()) {
	setOperationAction(ISD::MUL, MVT::v2i64, Legal);
	setOperationAction(ISD::MULHS, MVT::v2i64, Legal);
	setOperationAction(ISD::MULHU, MVT::v2i64, Legal);
	setOperationAction(ISD::MULHS, MVT::v4i32, Legal);
	setOperationAction(ISD::MULHU, MVT::v4i32, Legal);
	setOperationAction(ISD::UDIV, MVT::v2i64, Legal);
	setOperationAction(ISD::SDIV, MVT::v2i64, Legal);
	setOperationAction(ISD::UDIV, MVT::v4i32, Legal);
	setOperationAction(ISD::SDIV, MVT::v4i32, Legal);
	setOperationAction(ISD::UREM, MVT::v2i64, Legal);
	setOperationAction(ISD::SREM, MVT::v2i64, Legal);
	setOperationAction(ISD::UREM, MVT::v4i32, Legal);
	setOperationAction(ISD::SREM, MVT::v4i32, Legal);
	setOperationAction(ISD::UREM, MVT::v1i128, Legal);
	setOperationAction(ISD::SREM, MVT::v1i128, Legal);
	setOperationAction(ISD::UDIV, MVT::v1i128, Legal);
	setOperationAction(ISD::SDIV, MVT::v1i128, Legal);
	setOperationAction(ISD::ROTL, MVT::v1i128, Legal);
	}

	setOperationAction(ISD::MUL, MVT::v8i16, Legal);
	setOperationAction(ISD::MUL, MVT::v16i8, Custom);

	setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom);
	setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i32, Custom);

	setOperationAction(ISD::BUILD_VECTOR, MVT::v16i8, Custom);
	setOperationAction(ISD::BUILD_VECTOR, MVT::v8i16, Custom);
	setOperationAction(ISD::BUILD_VECTOR, MVT::v4i32, Custom);
	setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom);

	// Altivec does not contain unordered floating-point compare instructions
	setCondCodeAction(ISD::SETUO, MVT::v4f32, Expand);
	setCondCodeAction(ISD::SETUEQ, MVT::v4f32, Expand);
	setCondCodeAction(ISD::SETO, MVT::v4f32, Expand);
	setCondCodeAction(ISD::SETONE, MVT::v4f32, Expand);

	if (Subtarget.hasVSX()) {
	setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2f64, Legal);
	setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Legal);
	if (Subtarget.hasP8Vector()) {
	setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Legal);
	setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Legal);
	}
	if (Subtarget.hasDirectMove() && isPPC64) {
	setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v16i8, Legal);
	setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v8i16, Legal);
	setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i32, Legal);
	setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2i64, Legal);
	setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v16i8, Legal);
	setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v8i16, Legal);
	setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4i32, Legal);
	setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i64, Legal);
	}
	setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Legal);

	// The nearbyint variants are not allowed to raise the inexact exception
	// so we can only code-gen them with unsafe math.
	if (TM.Options.UnsafeFPMath) {
	setOperationAction(ISD::FNEARBYINT, MVT::f64, Legal);
	setOperationAction(ISD::FNEARBYINT, MVT::f32, Legal);
	}

	setOperationAction(ISD::FFLOOR, MVT::v2f64, Legal);
	setOperationAction(ISD::FCEIL, MVT::v2f64, Legal);
	setOperationAction(ISD::FTRUNC, MVT::v2f64, Legal);
	setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Legal);
	setOperationAction(ISD::FRINT, MVT::v2f64, Legal);
	setOperationAction(ISD::FROUND, MVT::v2f64, Legal);
	setOperationAction(ISD::FROUND, MVT::f64, Legal);
	setOperationAction(ISD::FRINT, MVT::f64, Legal);

	setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Legal);
	setOperationAction(ISD::FRINT, MVT::v4f32, Legal);
	setOperationAction(ISD::FROUND, MVT::v4f32, Legal);
	setOperationAction(ISD::FROUND, MVT::f32, Legal);
	setOperationAction(ISD::FRINT, MVT::f32, Legal);

	setOperationAction(ISD::MUL, MVT::v2f64, Legal);
	setOperationAction(ISD::FMA, MVT::v2f64, Legal);

	setOperationAction(ISD::FDIV, MVT::v2f64, Legal);
	setOperationAction(ISD::FSQRT, MVT::v2f64, Legal);

	// Share the Altivec comparison restrictions.
	setCondCodeAction(ISD::SETUO, MVT::v2f64, Expand);
	setCondCodeAction(ISD::SETUEQ, MVT::v2f64, Expand);
	setCondCodeAction(ISD::SETO, MVT::v2f64, Expand);
	setCondCodeAction(ISD::SETONE, MVT::v2f64, Expand);

	setOperationAction(ISD::LOAD, MVT::v2f64, Legal);
	setOperationAction(ISD::STORE, MVT::v2f64, Legal);

	setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2f64, Custom);

	if (Subtarget.hasP8Vector())
	addRegisterClass(MVT::f32, &PPC::VSSRCRegClass);

	addRegisterClass(MVT::f64, &PPC::VSFRCRegClass);

	addRegisterClass(MVT::v4i32, &PPC::VSRCRegClass);
	addRegisterClass(MVT::v4f32, &PPC::VSRCRegClass);
	addRegisterClass(MVT::v2f64, &PPC::VSRCRegClass);

	if (Subtarget.hasP8Altivec()) {
	setOperationAction(ISD::SHL, MVT::v2i64, Legal);
	setOperationAction(ISD::SRA, MVT::v2i64, Legal);
	setOperationAction(ISD::SRL, MVT::v2i64, Legal);

	// 128 bit shifts can be accomplished via 3 instructions for SHL and
	// SRL, but not for SRA because of the instructions available:
	// VS{RL} and VS{RL}O. However due to direct move costs, it's not worth
	// doing
	setOperationAction(ISD::SHL, MVT::v1i128, Expand);
	setOperationAction(ISD::SRL, MVT::v1i128, Expand);
	setOperationAction(ISD::SRA, MVT::v1i128, Expand);

	setOperationAction(ISD::SETCC, MVT::v2i64, Legal);
	}
	else {
	setOperationAction(ISD::SHL, MVT::v2i64, Expand);
	setOperationAction(ISD::SRA, MVT::v2i64, Expand);
	setOperationAction(ISD::SRL, MVT::v2i64, Expand);

	setOperationAction(ISD::SETCC, MVT::v2i64, Custom);

	// VSX v2i64 only supports non-arithmetic operations.
	setOperationAction(ISD::ADD, MVT::v2i64, Expand);
	setOperationAction(ISD::SUB, MVT::v2i64, Expand);
	}

	if (Subtarget.isISA3_1())
	setOperationAction(ISD::SETCC, MVT::v1i128, Legal);
	else
	setOperationAction(ISD::SETCC, MVT::v1i128, Expand);

	setOperationAction(ISD::LOAD, MVT::v2i64, Promote);
	AddPromotedToType (ISD::LOAD, MVT::v2i64, MVT::v2f64);
	setOperationAction(ISD::STORE, MVT::v2i64, Promote);
	AddPromotedToType (ISD::STORE, MVT::v2i64, MVT::v2f64);

	setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2i64, Custom);

	setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v2i64, Legal);
	setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v2i64, Legal);
	setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v2i64, Legal);
	setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v2i64, Legal);
	setOperationAction(ISD::SINT_TO_FP, MVT::v2i64, Legal);
	setOperationAction(ISD::UINT_TO_FP, MVT::v2i64, Legal);
	setOperationAction(ISD::FP_TO_SINT, MVT::v2i64, Legal);
	setOperationAction(ISD::FP_TO_UINT, MVT::v2i64, Legal);

	// Custom handling for partial vectors of integers converted to
	// floating point. We already have optimal handling for v2i32 through
	// the DAG combine, so those aren't necessary.
	setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v2i8, Custom);
	setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v4i8, Custom);
	setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v2i16, Custom);
	setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v4i16, Custom);
	setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v2i8, Custom);
	setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v4i8, Custom);
	setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v2i16, Custom);
	setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v4i16, Custom);
	setOperationAction(ISD::UINT_TO_FP, MVT::v2i8, Custom);
	setOperationAction(ISD::UINT_TO_FP, MVT::v4i8, Custom);
	setOperationAction(ISD::UINT_TO_FP, MVT::v2i16, Custom);
	setOperationAction(ISD::UINT_TO_FP, MVT::v4i16, Custom);
	setOperationAction(ISD::SINT_TO_FP, MVT::v2i8, Custom);
	setOperationAction(ISD::SINT_TO_FP, MVT::v4i8, Custom);
	setOperationAction(ISD::SINT_TO_FP, MVT::v2i16, Custom);
	setOperationAction(ISD::SINT_TO_FP, MVT::v4i16, Custom);

	setOperationAction(ISD::FNEG, MVT::v4f32, Legal);
	setOperationAction(ISD::FNEG, MVT::v2f64, Legal);
	setOperationAction(ISD::FABS, MVT::v4f32, Legal);
	setOperationAction(ISD::FABS, MVT::v2f64, Legal);
	setOperationAction(ISD::FCOPYSIGN, MVT::v4f32, Legal);
	setOperationAction(ISD::FCOPYSIGN, MVT::v2f64, Legal);

	setOperationAction(ISD::BUILD_VECTOR, MVT::v2i64, Custom);
	setOperationAction(ISD::BUILD_VECTOR, MVT::v2f64, Custom);

	// Handle constrained floating-point operations of vector.
	// The predictor is `hasVSX` because altivec instruction has
	// no exception but VSX vector instruction has.
	setOperationAction(ISD::STRICT_FADD, MVT::v4f32, Legal);
	setOperationAction(ISD::STRICT_FSUB, MVT::v4f32, Legal);
	setOperationAction(ISD::STRICT_FMUL, MVT::v4f32, Legal);
	setOperationAction(ISD::STRICT_FDIV, MVT::v4f32, Legal);
	setOperationAction(ISD::STRICT_FMA, MVT::v4f32, Legal);
	setOperationAction(ISD::STRICT_FSQRT, MVT::v4f32, Legal);
	setOperationAction(ISD::STRICT_FMAXNUM, MVT::v4f32, Legal);
	setOperationAction(ISD::STRICT_FMINNUM, MVT::v4f32, Legal);
	setOperationAction(ISD::STRICT_FRINT, MVT::v4f32, Legal);
	setOperationAction(ISD::STRICT_FFLOOR, MVT::v4f32, Legal);
	setOperationAction(ISD::STRICT_FCEIL, MVT::v4f32, Legal);
	setOperationAction(ISD::STRICT_FTRUNC, MVT::v4f32, Legal);
	setOperationAction(ISD::STRICT_FROUND, MVT::v4f32, Legal);

	setOperationAction(ISD::STRICT_FADD, MVT::v2f64, Legal);
	setOperationAction(ISD::STRICT_FSUB, MVT::v2f64, Legal);
	setOperationAction(ISD::STRICT_FMUL, MVT::v2f64, Legal);
	setOperationAction(ISD::STRICT_FDIV, MVT::v2f64, Legal);
	setOperationAction(ISD::STRICT_FMA, MVT::v2f64, Legal);
	setOperationAction(ISD::STRICT_FSQRT, MVT::v2f64, Legal);
	setOperationAction(ISD::STRICT_FMAXNUM, MVT::v2f64, Legal);
	setOperationAction(ISD::STRICT_FMINNUM, MVT::v2f64, Legal);
	setOperationAction(ISD::STRICT_FRINT, MVT::v2f64, Legal);
	setOperationAction(ISD::STRICT_FFLOOR, MVT::v2f64, Legal);
	setOperationAction(ISD::STRICT_FCEIL, MVT::v2f64, Legal);
	setOperationAction(ISD::STRICT_FTRUNC, MVT::v2f64, Legal);
	setOperationAction(ISD::STRICT_FROUND, MVT::v2f64, Legal);

	addRegisterClass(MVT::v2i64, &PPC::VSRCRegClass);
	addRegisterClass(MVT::f128, &PPC::VRRCRegClass);

	for (MVT FPT : MVT::fp_valuetypes())
	setLoadExtAction(ISD::EXTLOAD, MVT::f128, FPT, Expand);

	// Expand the SELECT to SELECT_CC
	setOperationAction(ISD::SELECT, MVT::f128, Expand);

	setTruncStoreAction(MVT::f128, MVT::f64, Expand);
	setTruncStoreAction(MVT::f128, MVT::f32, Expand);

	// No implementation for these ops for PowerPC.
	setOperationAction(ISD::FSINCOS, MVT::f128, Expand);
	setOperationAction(ISD::FSIN, MVT::f128, Expand);
	setOperationAction(ISD::FCOS, MVT::f128, Expand);
	setOperationAction(ISD::FPOW, MVT::f128, Expand);
	setOperationAction(ISD::FPOWI, MVT::f128, Expand);
	setOperationAction(ISD::FREM, MVT::f128, Expand);
	}

	if (Subtarget.hasP8Altivec()) {
	addRegisterClass(MVT::v2i64, &PPC::VRRCRegClass);
	addRegisterClass(MVT::v1i128, &PPC::VRRCRegClass);
	}

	if (Subtarget.hasP9Vector()) {
	setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom);
	setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom);

	// Test data class instructions store results in CR bits.
	if (Subtarget.useCRBits()) {
	setOperationAction(ISD::IS_FPCLASS, MVT::f32, Custom);
	setOperationAction(ISD::IS_FPCLASS, MVT::f64, Custom);
	setOperationAction(ISD::IS_FPCLASS, MVT::f128, Custom);
	}

	// 128 bit shifts can be accomplished via 3 instructions for SHL and
	// SRL, but not for SRA because of the instructions available:
	// VS{RL} and VS{RL}O.
	setOperationAction(ISD::SHL, MVT::v1i128, Legal);
	setOperationAction(ISD::SRL, MVT::v1i128, Legal);
	setOperationAction(ISD::SRA, MVT::v1i128, Expand);

	setOperationAction(ISD::FADD, MVT::f128, Legal);
	setOperationAction(ISD::FSUB, MVT::f128, Legal);
	setOperationAction(ISD::FDIV, MVT::f128, Legal);
	setOperationAction(ISD::FMUL, MVT::f128, Legal);
	setOperationAction(ISD::FP_EXTEND, MVT::f128, Legal);

	setOperationAction(ISD::FMA, MVT::f128, Legal);
	setCondCodeAction(ISD::SETULT, MVT::f128, Expand);
	setCondCodeAction(ISD::SETUGT, MVT::f128, Expand);
	setCondCodeAction(ISD::SETUEQ, MVT::f128, Expand);
	setCondCodeAction(ISD::SETOGE, MVT::f128, Expand);
	setCondCodeAction(ISD::SETOLE, MVT::f128, Expand);
	setCondCodeAction(ISD::SETONE, MVT::f128, Expand);

	setOperationAction(ISD::FTRUNC, MVT::f128, Legal);
	setOperationAction(ISD::FRINT, MVT::f128, Legal);
	setOperationAction(ISD::FFLOOR, MVT::f128, Legal);
	setOperationAction(ISD::FCEIL, MVT::f128, Legal);
	setOperationAction(ISD::FNEARBYINT, MVT::f128, Legal);
	setOperationAction(ISD::FROUND, MVT::f128, Legal);

	setOperationAction(ISD::FP_ROUND, MVT::f64, Legal);
	setOperationAction(ISD::FP_ROUND, MVT::f32, Legal);
	setOperationAction(ISD::BITCAST, MVT::i128, Custom);

	// Handle constrained floating-point operations of fp128
	setOperationAction(ISD::STRICT_FADD, MVT::f128, Legal);
	setOperationAction(ISD::STRICT_FSUB, MVT::f128, Legal);
	setOperationAction(ISD::STRICT_FMUL, MVT::f128, Legal);
	setOperationAction(ISD::STRICT_FDIV, MVT::f128, Legal);
	setOperationAction(ISD::STRICT_FMA, MVT::f128, Legal);
	setOperationAction(ISD::STRICT_FSQRT, MVT::f128, Legal);
	setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f128, Legal);
	setOperationAction(ISD::STRICT_FP_ROUND, MVT::f64, Legal);
	setOperationAction(ISD::STRICT_FP_ROUND, MVT::f32, Legal);
	setOperationAction(ISD::STRICT_FRINT, MVT::f128, Legal);
	setOperationAction(ISD::STRICT_FNEARBYINT, MVT::f128, Legal);
	setOperationAction(ISD::STRICT_FFLOOR, MVT::f128, Legal);
	setOperationAction(ISD::STRICT_FCEIL, MVT::f128, Legal);
	setOperationAction(ISD::STRICT_FTRUNC, MVT::f128, Legal);
	setOperationAction(ISD::STRICT_FROUND, MVT::f128, Legal);
	setOperationAction(ISD::FP_EXTEND, MVT::v2f32, Custom);
	setOperationAction(ISD::BSWAP, MVT::v8i16, Legal);
	setOperationAction(ISD::BSWAP, MVT::v4i32, Legal);
	setOperationAction(ISD::BSWAP, MVT::v2i64, Legal);
	setOperationAction(ISD::BSWAP, MVT::v1i128, Legal);
	} else if (Subtarget.hasVSX()) {
	setOperationAction(ISD::LOAD, MVT::f128, Promote);
	setOperationAction(ISD::STORE, MVT::f128, Promote);

	AddPromotedToType(ISD::LOAD, MVT::f128, MVT::v4i32);
	AddPromotedToType(ISD::STORE, MVT::f128, MVT::v4i32);

	// Set FADD/FSUB as libcall to avoid the legalizer to expand the
	// fp_to_uint and int_to_fp.
	setOperationAction(ISD::FADD, MVT::f128, LibCall);
	setOperationAction(ISD::FSUB, MVT::f128, LibCall);

	setOperationAction(ISD::FMUL, MVT::f128, Expand);
	setOperationAction(ISD::FDIV, MVT::f128, Expand);
	setOperationAction(ISD::FNEG, MVT::f128, Expand);
	setOperationAction(ISD::FABS, MVT::f128, Expand);
	setOperationAction(ISD::FSQRT, MVT::f128, Expand);
	setOperationAction(ISD::FMA, MVT::f128, Expand);
	setOperationAction(ISD::FCOPYSIGN, MVT::f128, Expand);

	// Expand the fp_extend if the target type is fp128.
	setOperationAction(ISD::FP_EXTEND, MVT::f128, Expand);
	setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f128, Expand);

	// Expand the fp_round if the source type is fp128.
	for (MVT VT : {MVT::f32, MVT::f64}) {
	setOperationAction(ISD::FP_ROUND, VT, Custom);
	setOperationAction(ISD::STRICT_FP_ROUND, VT, Custom);
	}

	setOperationAction(ISD::SETCC, MVT::f128, Custom);
	setOperationAction(ISD::STRICT_FSETCC, MVT::f128, Custom);
	setOperationAction(ISD::STRICT_FSETCCS, MVT::f128, Custom);
	setOperationAction(ISD::BR_CC, MVT::f128, Expand);

	// Lower following f128 select_cc pattern:
	// select_cc x, y, tv, fv, cc -> select_cc (setcc x, y, cc), 0, tv, fv, NE
	setOperationAction(ISD::SELECT_CC, MVT::f128, Custom);

	// We need to handle f128 SELECT_CC with integer result type.
	setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
	setOperationAction(ISD::SELECT_CC, MVT::i64, isPPC64 ? Custom : Expand);
	}

	if (Subtarget.hasP9Altivec()) {
	if (Subtarget.isISA3_1()) {
	setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2i64, Legal);
	setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i16, Legal);
	setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v16i8, Legal);
	setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Legal);
	} else {
	setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i16, Custom);
	setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v16i8, Custom);
	}
	setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i8, Legal);
	setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i16, Legal);
	setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i32, Legal);
	setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i8, Legal);
	setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i16, Legal);
	setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i32, Legal);
	setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i64, Legal);

	setOperationAction(ISD::ABDU, MVT::v16i8, Legal);
	setOperationAction(ISD::ABDU, MVT::v8i16, Legal);
	setOperationAction(ISD::ABDU, MVT::v4i32, Legal);
	setOperationAction(ISD::ABDS, MVT::v4i32, Legal);
	}

	if (Subtarget.hasP10Vector()) {
	setOperationAction(ISD::SELECT_CC, MVT::f128, Custom);
	}
	}

	if (Subtarget.pairedVectorMemops()) {
	addRegisterClass(MVT::v256i1, &PPC::VSRpRCRegClass);
	setOperationAction(ISD::LOAD, MVT::v256i1, Custom);
	setOperationAction(ISD::STORE, MVT::v256i1, Custom);
	}
	if (Subtarget.hasMMA()) {
	if (Subtarget.isISAFuture())
	addRegisterClass(MVT::v512i1, &PPC::WACCRCRegClass);
	else
	addRegisterClass(MVT::v512i1, &PPC::UACCRCRegClass);
	setOperationAction(ISD::LOAD, MVT::v512i1, Custom);
	setOperationAction(ISD::STORE, MVT::v512i1, Custom);
	setOperationAction(ISD::BUILD_VECTOR, MVT::v512i1, Custom);
	}

	if (Subtarget.has64BitSupport())
	setOperationAction(ISD::PREFETCH, MVT::Other, Legal);

	if (Subtarget.isISA3_1())
	setOperationAction(ISD::SRA, MVT::v1i128, Legal);

	setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, isPPC64 ? Legal : Custom);

	if (!isPPC64) {
	setOperationAction(ISD::ATOMIC_LOAD, MVT::i64, Expand);
	setOperationAction(ISD::ATOMIC_STORE, MVT::i64, Expand);
	}

	if (shouldInlineQuadwordAtomics()) {
	setOperationAction(ISD::ATOMIC_LOAD, MVT::i128, Custom);
	setOperationAction(ISD::ATOMIC_STORE, MVT::i128, Custom);
	setOperationAction(ISD::INTRINSIC_VOID, MVT::i128, Custom);
	}

	setBooleanContents(ZeroOrOneBooleanContent);

	if (Subtarget.hasAltivec()) {
	// Altivec instructions set fields to all zeros or all ones.
	setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
	}

	if (shouldInlineQuadwordAtomics())
	setMaxAtomicSizeInBitsSupported(128);
	else if (isPPC64)
	setMaxAtomicSizeInBitsSupported(64);
	else
	setMaxAtomicSizeInBitsSupported(32);

	setStackPointerRegisterToSaveRestore(isPPC64 ? PPC::X1 : PPC::R1);

	// We have target-specific dag combine patterns for the following nodes:
	setTargetDAGCombine({ISD::AND, ISD::ADD, ISD::SHL, ISD::SRA, ISD::SRL,
	ISD::MUL, ISD::FMA, ISD::SINT_TO_FP, ISD::BUILD_VECTOR});
	if (Subtarget.hasFPCVT())
	setTargetDAGCombine(ISD::UINT_TO_FP);
	setTargetDAGCombine({ISD::LOAD, ISD::STORE, ISD::BR_CC});
	if (Subtarget.useCRBits())
	setTargetDAGCombine(ISD::BRCOND);
	setTargetDAGCombine({ISD::BSWAP, ISD::INTRINSIC_WO_CHAIN,
	ISD::INTRINSIC_W_CHAIN, ISD::INTRINSIC_VOID});

	setTargetDAGCombine({ISD::SIGN_EXTEND, ISD::ZERO_EXTEND, ISD::ANY_EXTEND});

	setTargetDAGCombine({ISD::TRUNCATE, ISD::VECTOR_SHUFFLE});

	if (Subtarget.useCRBits()) {
	setTargetDAGCombine({ISD::TRUNCATE, ISD::SETCC, ISD::SELECT_CC});
	}

	setLibcallName(RTLIB::LOG_F128, "logf128");
	setLibcallName(RTLIB::LOG2_F128, "log2f128");
	setLibcallName(RTLIB::LOG10_F128, "log10f128");
	setLibcallName(RTLIB::EXP_F128, "expf128");
	setLibcallName(RTLIB::EXP2_F128, "exp2f128");
	setLibcallName(RTLIB::SIN_F128, "sinf128");
	setLibcallName(RTLIB::COS_F128, "cosf128");
	setLibcallName(RTLIB::SINCOS_F128, "sincosf128");
	setLibcallName(RTLIB::POW_F128, "powf128");
	setLibcallName(RTLIB::FMIN_F128, "fminf128");
	setLibcallName(RTLIB::FMAX_F128, "fmaxf128");
	setLibcallName(RTLIB::REM_F128, "fmodf128");
	setLibcallName(RTLIB::SQRT_F128, "sqrtf128");
	setLibcallName(RTLIB::CEIL_F128, "ceilf128");
	setLibcallName(RTLIB::FLOOR_F128, "floorf128");
	setLibcallName(RTLIB::TRUNC_F128, "truncf128");
	setLibcallName(RTLIB::ROUND_F128, "roundf128");
	setLibcallName(RTLIB::LROUND_F128, "lroundf128");
	setLibcallName(RTLIB::LLROUND_F128, "llroundf128");
	setLibcallName(RTLIB::RINT_F128, "rintf128");
	setLibcallName(RTLIB::LRINT_F128, "lrintf128");
	setLibcallName(RTLIB::LLRINT_F128, "llrintf128");
	setLibcallName(RTLIB::NEARBYINT_F128, "nearbyintf128");
	setLibcallName(RTLIB::FMA_F128, "fmaf128");
	setLibcallName(RTLIB::FREXP_F128, "frexpf128");

	if (Subtarget.isAIXABI()) {
	setLibcallName(RTLIB::MEMCPY, isPPC64 ? "___memmove64" : "___memmove");
	setLibcallName(RTLIB::MEMMOVE, isPPC64 ? "___memmove64" : "___memmove");
	setLibcallName(RTLIB::MEMSET, isPPC64 ? "___memset64" : "___memset");
	setLibcallName(RTLIB::BZERO, isPPC64 ? "___bzero64" : "___bzero");
	}

	// With 32 condition bits, we don't need to sink (and duplicate) compares
	// aggressively in CodeGenPrep.
	if (Subtarget.useCRBits()) {
	setHasMultipleConditionRegisters();
	setJumpIsExpensive();
	}

	// TODO: The default entry number is set to 64. This stops most jump table
	// generation on PPC. But it is good for current PPC HWs because the indirect
	// branch instruction mtctr to the jump table may lead to bad branch predict.
	// Re-evaluate this value on future HWs that can do better with mtctr.
	setMinimumJumpTableEntries(PPCMinimumJumpTableEntries);

	setMinFunctionAlignment(Align(4));

	switch (Subtarget.getCPUDirective()) {
	default: break;
	case PPC::DIR_970:
	case PPC::DIR_A2:
	case PPC::DIR_E500:
	case PPC::DIR_E500mc:
	case PPC::DIR_E5500:
	case PPC::DIR_PWR4:
	case PPC::DIR_PWR5:
	case PPC::DIR_PWR5X:
	case PPC::DIR_PWR6:
	case PPC::DIR_PWR6X:
	case PPC::DIR_PWR7:
	case PPC::DIR_PWR8:
	case PPC::DIR_PWR9:
	case PPC::DIR_PWR10:
	case PPC::DIR_PWR11:
	case PPC::DIR_PWR_FUTURE:
	setPrefLoopAlignment(Align(16));
	setPrefFunctionAlignment(Align(16));
	break;
	}

	if (Subtarget.enableMachineScheduler())
	setSchedulingPreference(Sched::Source);
	else
	setSchedulingPreference(Sched::Hybrid);

	computeRegisterProperties(STI.getRegisterInfo());

	// The Freescale cores do better with aggressive inlining of memcpy and
	// friends. GCC uses same threshold of 128 bytes (= 32 word stores).
	if (Subtarget.getCPUDirective() == PPC::DIR_E500mc \|\|
	Subtarget.getCPUDirective() == PPC::DIR_E5500) {
	MaxStoresPerMemset = 32;
	MaxStoresPerMemsetOptSize = 16;
	MaxStoresPerMemcpy = 32;
	MaxStoresPerMemcpyOptSize = 8;
	MaxStoresPerMemmove = 32;
	MaxStoresPerMemmoveOptSize = 8;
	} else if (Subtarget.getCPUDirective() == PPC::DIR_A2) {
	// The A2 also benefits from (very) aggressive inlining of memcpy and
	// friends. The overhead of a the function call, even when warm, can be
	// over one hundred cycles.
	MaxStoresPerMemset = 128;
	MaxStoresPerMemcpy = 128;
	MaxStoresPerMemmove = 128;
	MaxLoadsPerMemcmp = 128;
	} else {
	MaxLoadsPerMemcmp = 8;
	MaxLoadsPerMemcmpOptSize = 4;
	}

	IsStrictFPEnabled = true;

	// Let the subtarget (CPU) decide if a predictable select is more expensive
	// than the corresponding branch. This information is used in CGP to decide
	// when to convert selects into branches.
	PredictableSelectIsExpensive = Subtarget.isPredictableSelectIsExpensive();

	GatherAllAliasesMaxDepth = PPCGatherAllAliasesMaxDepth;
	}

	// ********************************* NOTE **********************************
	// For selecting load and store instructions, the addressing modes are defined
	// as ComplexPatterns in PPCInstrInfo.td, which are then utilized in the TD
	// patterns to match the load the store instructions.
	//
	// The TD definitions for the addressing modes correspond to their respective
	// Select<AddrMode>Form() function in PPCISelDAGToDAG.cpp. These functions rely
	// on SelectOptimalAddrMode(), which calls computeMOFlags() to compute the
	// address mode flags of a particular node. Afterwards, the computed address
	// flags are passed into getAddrModeForFlags() in order to retrieve the optimal
	// addressing mode. SelectOptimalAddrMode() then sets the Base and Displacement
	// accordingly, based on the preferred addressing mode.
	//
	// Within PPCISelLowering.h, there are two enums: MemOpFlags and AddrMode.
	// MemOpFlags contains all the possible flags that can be used to compute the
	// optimal addressing mode for load and store instructions.
	// AddrMode contains all the possible load and store addressing modes available
	// on Power (such as DForm, DSForm, DQForm, XForm, etc.)
	//
	// When adding new load and store instructions, it is possible that new address
	// flags may need to be added into MemOpFlags, and a new addressing mode will
	// need to be added to AddrMode. An entry of the new addressing mode (consisting
	// of the minimal and main distinguishing address flags for the new load/store
	// instructions) will need to be added into initializeAddrModeMap() below.
	// Finally, when adding new addressing modes, the getAddrModeForFlags() will
	// need to be updated to account for selecting the optimal addressing mode.
	// *****************************************************************************
	/// Initialize the map that relates the different addressing modes of the load
	/// and store instructions to a set of flags. This ensures the load/store
	/// instruction is correctly matched during instruction selection.
	void PPCTargetLowering::initializeAddrModeMap() {
	AddrModesMap[PPC::AM_DForm] = {
	// LWZ, STW
	PPC::MOF_ZExt \| PPC::MOF_RPlusSImm16 \| PPC::MOF_WordInt,
	PPC::MOF_ZExt \| PPC::MOF_RPlusLo \| PPC::MOF_WordInt,
	PPC::MOF_ZExt \| PPC::MOF_NotAddNorCst \| PPC::MOF_WordInt,
	PPC::MOF_ZExt \| PPC::MOF_AddrIsSImm32 \| PPC::MOF_WordInt,
	// LBZ, LHZ, STB, STH
	PPC::MOF_ZExt \| PPC::MOF_RPlusSImm16 \| PPC::MOF_SubWordInt,
	PPC::MOF_ZExt \| PPC::MOF_RPlusLo \| PPC::MOF_SubWordInt,
	PPC::MOF_ZExt \| PPC::MOF_NotAddNorCst \| PPC::MOF_SubWordInt,
	PPC::MOF_ZExt \| PPC::MOF_AddrIsSImm32 \| PPC::MOF_SubWordInt,
	// LHA
	PPC::MOF_SExt \| PPC::MOF_RPlusSImm16 \| PPC::MOF_SubWordInt,
	PPC::MOF_SExt \| PPC::MOF_RPlusLo \| PPC::MOF_SubWordInt,
	PPC::MOF_SExt \| PPC::MOF_NotAddNorCst \| PPC::MOF_SubWordInt,
	PPC::MOF_SExt \| PPC::MOF_AddrIsSImm32 \| PPC::MOF_SubWordInt,
	// LFS, LFD, STFS, STFD
	PPC::MOF_RPlusSImm16 \| PPC::MOF_ScalarFloat \| PPC::MOF_SubtargetBeforeP9,
	PPC::MOF_RPlusLo \| PPC::MOF_ScalarFloat \| PPC::MOF_SubtargetBeforeP9,
	PPC::MOF_NotAddNorCst \| PPC::MOF_ScalarFloat \| PPC::MOF_SubtargetBeforeP9,
	PPC::MOF_AddrIsSImm32 \| PPC::MOF_ScalarFloat \| PPC::MOF_SubtargetBeforeP9,
	};
	AddrModesMap[PPC::AM_DSForm] = {
	// LWA
	PPC::MOF_SExt \| PPC::MOF_RPlusSImm16Mult4 \| PPC::MOF_WordInt,
	PPC::MOF_SExt \| PPC::MOF_NotAddNorCst \| PPC::MOF_WordInt,
	PPC::MOF_SExt \| PPC::MOF_AddrIsSImm32 \| PPC::MOF_WordInt,
	// LD, STD
	PPC::MOF_RPlusSImm16Mult4 \| PPC::MOF_DoubleWordInt,
	PPC::MOF_NotAddNorCst \| PPC::MOF_DoubleWordInt,
	PPC::MOF_AddrIsSImm32 \| PPC::MOF_DoubleWordInt,
	// DFLOADf32, DFLOADf64, DSTOREf32, DSTOREf64
	PPC::MOF_RPlusSImm16Mult4 \| PPC::MOF_ScalarFloat \| PPC::MOF_SubtargetP9,
	PPC::MOF_NotAddNorCst \| PPC::MOF_ScalarFloat \| PPC::MOF_SubtargetP9,
	PPC::MOF_AddrIsSImm32 \| PPC::MOF_ScalarFloat \| PPC::MOF_SubtargetP9,
	};
	AddrModesMap[PPC::AM_DQForm] = {
	// LXV, STXV
	PPC::MOF_RPlusSImm16Mult16 \| PPC::MOF_Vector \| PPC::MOF_SubtargetP9,
	PPC::MOF_NotAddNorCst \| PPC::MOF_Vector \| PPC::MOF_SubtargetP9,
	PPC::MOF_AddrIsSImm32 \| PPC::MOF_Vector \| PPC::MOF_SubtargetP9,
	};
	AddrModesMap[PPC::AM_PrefixDForm] = {PPC::MOF_RPlusSImm34 \|
	PPC::MOF_SubtargetP10};
	// TODO: Add mapping for quadword load/store.
	}

	/// getMaxByValAlign - Helper for getByValTypeAlignment to determine
	/// the desired ByVal argument alignment.
	static void getMaxByValAlign(Type *Ty, Align &MaxAlign, Align MaxMaxAlign) {
	if (MaxAlign == MaxMaxAlign)
	return;
	if (VectorType *VTy = dyn_cast<VectorType>(Ty)) {
	if (MaxMaxAlign >= 32 &&
	VTy->getPrimitiveSizeInBits().getFixedValue() >= 256)
	MaxAlign = Align(32);
	else if (VTy->getPrimitiveSizeInBits().getFixedValue() >= 128 &&
	MaxAlign < 16)
	MaxAlign = Align(16);
	} else if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
	Align EltAlign;
	getMaxByValAlign(ATy->getElementType(), EltAlign, MaxMaxAlign);
	if (EltAlign > MaxAlign)
	MaxAlign = EltAlign;
	} else if (StructType *STy = dyn_cast<StructType>(Ty)) {
	for (auto *EltTy : STy->elements()) {
	Align EltAlign;
	getMaxByValAlign(EltTy, EltAlign, MaxMaxAlign);
	if (EltAlign > MaxAlign)
	MaxAlign = EltAlign;
	if (MaxAlign == MaxMaxAlign)
	break;
	}
	}
	}

	/// getByValTypeAlignment - Return the desired alignment for ByVal aggregate
	/// function arguments in the caller parameter area.
	uint64_t PPCTargetLowering::getByValTypeAlignment(Type *Ty,
	const DataLayout &DL) const {
	// 16byte and wider vectors are passed on 16byte boundary.
	// The rest is 8 on PPC64 and 4 on PPC32 boundary.
	Align Alignment = Subtarget.isPPC64() ? Align(8) : Align(4);
	if (Subtarget.hasAltivec())
	getMaxByValAlign(Ty, Alignment, Align(16));
	return Alignment.value();
	}

	bool PPCTargetLowering::useSoftFloat() const {
	return Subtarget.useSoftFloat();
	}

	bool PPCTargetLowering::hasSPE() const {
	return Subtarget.hasSPE();
	}

	bool PPCTargetLowering::preferIncOfAddToSubOfNot(EVT VT) const {
	return VT.isScalarInteger();
	}

	bool PPCTargetLowering::shallExtractConstSplatVectorElementToStore(
	Type *VectorTy, unsigned ElemSizeInBits, unsigned &Index) const {
	if (!Subtarget.isPPC64() \|\| !Subtarget.hasVSX())
	return false;

	if (auto *VTy = dyn_cast<VectorType>(VectorTy)) {
	if (VTy->getScalarType()->isIntegerTy()) {
	// ElemSizeInBits 8/16 can fit in immediate field, not needed here.
	if (ElemSizeInBits == 32) {
	Index = Subtarget.isLittleEndian() ? 2 : 1;
	return true;
	}
	if (ElemSizeInBits == 64) {
	Index = Subtarget.isLittleEndian() ? 1 : 0;
	return true;
	}
	}
	}
	return false;
	}

	const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
	switch ((PPCISD::NodeType)Opcode) {
	case PPCISD::FIRST_NUMBER: break;
	case PPCISD::FSEL: return "PPCISD::FSEL";
	case PPCISD::XSMAXC: return "PPCISD::XSMAXC";
	case PPCISD::XSMINC: return "PPCISD::XSMINC";
	case PPCISD::FCFID: return "PPCISD::FCFID";
	case PPCISD::FCFIDU: return "PPCISD::FCFIDU";
	case PPCISD::FCFIDS: return "PPCISD::FCFIDS";
	case PPCISD::FCFIDUS: return "PPCISD::FCFIDUS";
	case PPCISD::FCTIDZ: return "PPCISD::FCTIDZ";
	case PPCISD::FCTIWZ: return "PPCISD::FCTIWZ";
	case PPCISD::FCTIDUZ: return "PPCISD::FCTIDUZ";
	case PPCISD::FCTIWUZ: return "PPCISD::FCTIWUZ";
	case PPCISD::FRE: return "PPCISD::FRE";
	case PPCISD::FRSQRTE: return "PPCISD::FRSQRTE";
	case PPCISD::FTSQRT:
	return "PPCISD::FTSQRT";
	case PPCISD::FSQRT:
	return "PPCISD::FSQRT";
	case PPCISD::STFIWX: return "PPCISD::STFIWX";
	case PPCISD::VPERM: return "PPCISD::VPERM";
	case PPCISD::XXSPLT: return "PPCISD::XXSPLT";
	case PPCISD::XXSPLTI_SP_TO_DP:
	return "PPCISD::XXSPLTI_SP_TO_DP";
	case PPCISD::XXSPLTI32DX:
	return "PPCISD::XXSPLTI32DX";
	case PPCISD::VECINSERT: return "PPCISD::VECINSERT";
	case PPCISD::XXPERMDI: return "PPCISD::XXPERMDI";
	case PPCISD::XXPERM:
	return "PPCISD::XXPERM";
	case PPCISD::VECSHL: return "PPCISD::VECSHL";
	case PPCISD::CMPB: return "PPCISD::CMPB";
	case PPCISD::Hi: return "PPCISD::Hi";
	case PPCISD::Lo: return "PPCISD::Lo";
	case PPCISD::TOC_ENTRY: return "PPCISD::TOC_ENTRY";
	case PPCISD::ATOMIC_CMP_SWAP_8: return "PPCISD::ATOMIC_CMP_SWAP_8";
	case PPCISD::ATOMIC_CMP_SWAP_16: return "PPCISD::ATOMIC_CMP_SWAP_16";
	case PPCISD::DYNALLOC: return "PPCISD::DYNALLOC";
	case PPCISD::DYNAREAOFFSET: return "PPCISD::DYNAREAOFFSET";
	case PPCISD::PROBED_ALLOCA: return "PPCISD::PROBED_ALLOCA";
	case PPCISD::GlobalBaseReg: return "PPCISD::GlobalBaseReg";
	case PPCISD::SRL: return "PPCISD::SRL";
	case PPCISD::SRA: return "PPCISD::SRA";
	case PPCISD::SHL: return "PPCISD::SHL";
	case PPCISD::SRA_ADDZE: return "PPCISD::SRA_ADDZE";
	case PPCISD::CALL: return "PPCISD::CALL";
	case PPCISD::CALL_NOP: return "PPCISD::CALL_NOP";
	case PPCISD::CALL_NOTOC: return "PPCISD::CALL_NOTOC";
	case PPCISD::CALL_RM:
	return "PPCISD::CALL_RM";
	case PPCISD::CALL_NOP_RM:
	return "PPCISD::CALL_NOP_RM";
	case PPCISD::CALL_NOTOC_RM:
	return "PPCISD::CALL_NOTOC_RM";
	case PPCISD::MTCTR: return "PPCISD::MTCTR";
	case PPCISD::BCTRL: return "PPCISD::BCTRL";
	case PPCISD::BCTRL_LOAD_TOC: return "PPCISD::BCTRL_LOAD_TOC";
	case PPCISD::BCTRL_RM:
	return "PPCISD::BCTRL_RM";
	case PPCISD::BCTRL_LOAD_TOC_RM:
	return "PPCISD::BCTRL_LOAD_TOC_RM";
	case PPCISD::RET_GLUE: return "PPCISD::RET_GLUE";
	case PPCISD::READ_TIME_BASE: return "PPCISD::READ_TIME_BASE";
	case PPCISD::EH_SJLJ_SETJMP: return "PPCISD::EH_SJLJ_SETJMP";
	case PPCISD::EH_SJLJ_LONGJMP: return "PPCISD::EH_SJLJ_LONGJMP";
	case PPCISD::MFOCRF: return "PPCISD::MFOCRF";
	case PPCISD::MFVSR: return "PPCISD::MFVSR";
	case PPCISD::MTVSRA: return "PPCISD::MTVSRA";
	case PPCISD::MTVSRZ: return "PPCISD::MTVSRZ";
	case PPCISD::SINT_VEC_TO_FP: return "PPCISD::SINT_VEC_TO_FP";
	case PPCISD::UINT_VEC_TO_FP: return "PPCISD::UINT_VEC_TO_FP";
	case PPCISD::SCALAR_TO_VECTOR_PERMUTED:
	return "PPCISD::SCALAR_TO_VECTOR_PERMUTED";
	case PPCISD::ANDI_rec_1_EQ_BIT:
	return "PPCISD::ANDI_rec_1_EQ_BIT";
	case PPCISD::ANDI_rec_1_GT_BIT:
	return "PPCISD::ANDI_rec_1_GT_BIT";
	case PPCISD::VCMP: return "PPCISD::VCMP";
	case PPCISD::VCMP_rec: return "PPCISD::VCMP_rec";
	case PPCISD::LBRX: return "PPCISD::LBRX";
	case PPCISD::STBRX: return "PPCISD::STBRX";
	case PPCISD::LFIWAX: return "PPCISD::LFIWAX";
	case PPCISD::LFIWZX: return "PPCISD::LFIWZX";
	case PPCISD::LXSIZX: return "PPCISD::LXSIZX";
	case PPCISD::STXSIX: return "PPCISD::STXSIX";
	case PPCISD::VEXTS: return "PPCISD::VEXTS";
	case PPCISD::LXVD2X: return "PPCISD::LXVD2X";
	case PPCISD::STXVD2X: return "PPCISD::STXVD2X";
	case PPCISD::LOAD_VEC_BE: return "PPCISD::LOAD_VEC_BE";
	case PPCISD::STORE_VEC_BE: return "PPCISD::STORE_VEC_BE";
	case PPCISD::ST_VSR_SCAL_INT:
	return "PPCISD::ST_VSR_SCAL_INT";
	case PPCISD::COND_BRANCH: return "PPCISD::COND_BRANCH";
	case PPCISD::BDNZ: return "PPCISD::BDNZ";
	case PPCISD::BDZ: return "PPCISD::BDZ";
	case PPCISD::MFFS: return "PPCISD::MFFS";
	case PPCISD::FADDRTZ: return "PPCISD::FADDRTZ";
	case PPCISD::TC_RETURN: return "PPCISD::TC_RETURN";
	case PPCISD::CR6SET: return "PPCISD::CR6SET";
	case PPCISD::CR6UNSET: return "PPCISD::CR6UNSET";
	case PPCISD::PPC32_GOT: return "PPCISD::PPC32_GOT";
	case PPCISD::PPC32_PICGOT: return "PPCISD::PPC32_PICGOT";
	case PPCISD::ADDIS_GOT_TPREL_HA: return "PPCISD::ADDIS_GOT_TPREL_HA";
	case PPCISD::LD_GOT_TPREL_L: return "PPCISD::LD_GOT_TPREL_L";
	case PPCISD::ADD_TLS: return "PPCISD::ADD_TLS";
	case PPCISD::ADDIS_TLSGD_HA: return "PPCISD::ADDIS_TLSGD_HA";
	case PPCISD::ADDI_TLSGD_L: return "PPCISD::ADDI_TLSGD_L";
	case PPCISD::GET_TLS_ADDR: return "PPCISD::GET_TLS_ADDR";
	case PPCISD::GET_TLS_MOD_AIX: return "PPCISD::GET_TLS_MOD_AIX";
	case PPCISD::GET_TPOINTER: return "PPCISD::GET_TPOINTER";
	case PPCISD::ADDI_TLSGD_L_ADDR: return "PPCISD::ADDI_TLSGD_L_ADDR";
	case PPCISD::TLSGD_AIX: return "PPCISD::TLSGD_AIX";
	case PPCISD::TLSLD_AIX: return "PPCISD::TLSLD_AIX";
	case PPCISD::ADDIS_TLSLD_HA: return "PPCISD::ADDIS_TLSLD_HA";
	case PPCISD::ADDI_TLSLD_L: return "PPCISD::ADDI_TLSLD_L";
	case PPCISD::GET_TLSLD_ADDR: return "PPCISD::GET_TLSLD_ADDR";
	case PPCISD::ADDI_TLSLD_L_ADDR: return "PPCISD::ADDI_TLSLD_L_ADDR";
	case PPCISD::ADDIS_DTPREL_HA: return "PPCISD::ADDIS_DTPREL_HA";
	case PPCISD::ADDI_DTPREL_L: return "PPCISD::ADDI_DTPREL_L";
	case PPCISD::PADDI_DTPREL:
	return "PPCISD::PADDI_DTPREL";
	case PPCISD::VADD_SPLAT: return "PPCISD::VADD_SPLAT";
	case PPCISD::SC: return "PPCISD::SC";
	case PPCISD::CLRBHRB: return "PPCISD::CLRBHRB";
	case PPCISD::MFBHRBE: return "PPCISD::MFBHRBE";
	case PPCISD::RFEBB: return "PPCISD::RFEBB";
	case PPCISD::XXSWAPD: return "PPCISD::XXSWAPD";
	case PPCISD::SWAP_NO_CHAIN: return "PPCISD::SWAP_NO_CHAIN";
	case PPCISD::BUILD_FP128: return "PPCISD::BUILD_FP128";
	case PPCISD::BUILD_SPE64: return "PPCISD::BUILD_SPE64";
	case PPCISD::EXTRACT_SPE: return "PPCISD::EXTRACT_SPE";
	case PPCISD::EXTSWSLI: return "PPCISD::EXTSWSLI";
	case PPCISD::LD_VSX_LH: return "PPCISD::LD_VSX_LH";
	case PPCISD::FP_EXTEND_HALF: return "PPCISD::FP_EXTEND_HALF";
	case PPCISD::MAT_PCREL_ADDR: return "PPCISD::MAT_PCREL_ADDR";
	case PPCISD::TLS_DYNAMIC_MAT_PCREL_ADDR:
	return "PPCISD::TLS_DYNAMIC_MAT_PCREL_ADDR";
	case PPCISD::TLS_LOCAL_EXEC_MAT_ADDR:
	return "PPCISD::TLS_LOCAL_EXEC_MAT_ADDR";
	case PPCISD::ACC_BUILD: return "PPCISD::ACC_BUILD";
	case PPCISD::PAIR_BUILD: return "PPCISD::PAIR_BUILD";
	case PPCISD::EXTRACT_VSX_REG: return "PPCISD::EXTRACT_VSX_REG";
	case PPCISD::XXMFACC: return "PPCISD::XXMFACC";
	case PPCISD::LD_SPLAT: return "PPCISD::LD_SPLAT";
	case PPCISD::ZEXT_LD_SPLAT: return "PPCISD::ZEXT_LD_SPLAT";
	case PPCISD::SEXT_LD_SPLAT: return "PPCISD::SEXT_LD_SPLAT";
	case PPCISD::FNMSUB: return "PPCISD::FNMSUB";
	case PPCISD::STRICT_FADDRTZ:
	return "PPCISD::STRICT_FADDRTZ";
	case PPCISD::STRICT_FCTIDZ:
	return "PPCISD::STRICT_FCTIDZ";
	case PPCISD::STRICT_FCTIWZ:
	return "PPCISD::STRICT_FCTIWZ";
	case PPCISD::STRICT_FCTIDUZ:
	return "PPCISD::STRICT_FCTIDUZ";
	case PPCISD::STRICT_FCTIWUZ:
	return "PPCISD::STRICT_FCTIWUZ";
	case PPCISD::STRICT_FCFID:
	return "PPCISD::STRICT_FCFID";
	case PPCISD::STRICT_FCFIDU:
	return "PPCISD::STRICT_FCFIDU";
	case PPCISD::STRICT_FCFIDS:
	return "PPCISD::STRICT_FCFIDS";
	case PPCISD::STRICT_FCFIDUS:
	return "PPCISD::STRICT_FCFIDUS";
	case PPCISD::LXVRZX: return "PPCISD::LXVRZX";
	case PPCISD::STORE_COND:
	return "PPCISD::STORE_COND";
	}
	return nullptr;
	}

	EVT PPCTargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &C,
	EVT VT) const {
	if (!VT.isVector())
	return Subtarget.useCRBits() ? MVT::i1 : MVT::i32;

	return VT.changeVectorElementTypeToInteger();
	}

	bool PPCTargetLowering::enableAggressiveFMAFusion(EVT VT) const {
	assert(VT.isFloatingPoint() && "Non-floating-point FMA?");
	return true;
	}

	//===----------------------------------------------------------------------===//
	// Node matching predicates, for use by the tblgen matching code.
	//===----------------------------------------------------------------------===//

	/// isFloatingPointZero - Return true if this is 0.0 or -0.0.
	static bool isFloatingPointZero(SDValue Op) {
	if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Op))
	return CFP->getValueAPF().isZero();
	else if (ISD::isEXTLoad(Op.getNode()) \|\| ISD::isNON_EXTLoad(Op.getNode())) {
	// Maybe this has already been legalized into the constant pool?
	if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(Op.getOperand(1)))
	if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CP->getConstVal()))
	return CFP->getValueAPF().isZero();
	}
	return false;
	}

	/// isConstantOrUndef - Op is either an undef node or a ConstantSDNode. Return
	/// true if Op is undef or if it matches the specified value.
	static bool isConstantOrUndef(int Op, int Val) {
	return Op < 0 \|\| Op == Val;
	}

	/// isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a
	/// VPKUHUM instruction.
	/// The ShuffleKind distinguishes between big-endian operations with
	/// two different inputs (0), either-endian operations with two identical
	/// inputs (1), and little-endian operations with two different inputs (2).
	/// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
	bool PPC::isVPKUHUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind,
	SelectionDAG &DAG) {
	bool IsLE = DAG.getDataLayout().isLittleEndian();
	if (ShuffleKind == 0) {
	if (IsLE)
	return false;
	for (unsigned i = 0; i != 16; ++i)
	if (!isConstantOrUndef(N->getMaskElt(i), i*2+1))
	return false;
	} else if (ShuffleKind == 2) {
	if (!IsLE)
	return false;
	for (unsigned i = 0; i != 16; ++i)
	if (!isConstantOrUndef(N->getMaskElt(i), i*2))
	return false;
	} else if (ShuffleKind == 1) {
	unsigned j = IsLE ? 0 : 1;
	for (unsigned i = 0; i != 8; ++i)
	if (!isConstantOrUndef(N->getMaskElt(i), i*2+j) \|\|
	!isConstantOrUndef(N->getMaskElt(i+8), i*2+j))
	return false;
	}
	return true;
	}

	/// isVPKUWUMShuffleMask - Return true if this is the shuffle mask for a
	/// VPKUWUM instruction.
	/// The ShuffleKind distinguishes between big-endian operations with
	/// two different inputs (0), either-endian operations with two identical
	/// inputs (1), and little-endian operations with two different inputs (2).
	/// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
	bool PPC::isVPKUWUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind,
	SelectionDAG &DAG) {
	bool IsLE = DAG.getDataLayout().isLittleEndian();
	if (ShuffleKind == 0) {
	if (IsLE)
	return false;
	for (unsigned i = 0; i != 16; i += 2)
	if (!isConstantOrUndef(N->getMaskElt(i ), i*2+2) \|\|
	!isConstantOrUndef(N->getMaskElt(i+1), i*2+3))
	return false;
	} else if (ShuffleKind == 2) {
	if (!IsLE)
	return false;
	for (unsigned i = 0; i != 16; i += 2)
	if (!isConstantOrUndef(N->getMaskElt(i ), i*2) \|\|
	!isConstantOrUndef(N->getMaskElt(i+1), i*2+1))
	return false;
	} else if (ShuffleKind == 1) {
	unsigned j = IsLE ? 0 : 2;
	for (unsigned i = 0; i != 8; i += 2)
	if (!isConstantOrUndef(N->getMaskElt(i ), i*2+j) \|\|
	!isConstantOrUndef(N->getMaskElt(i+1), i*2+j+1) \|\|
	!isConstantOrUndef(N->getMaskElt(i+8), i*2+j) \|\|
	!isConstantOrUndef(N->getMaskElt(i+9), i*2+j+1))
	return false;
	}
	return true;
	}

	/// isVPKUDUMShuffleMask - Return true if this is the shuffle mask for a
	/// VPKUDUM instruction, AND the VPKUDUM instruction exists for the
	/// current subtarget.
	///
	/// The ShuffleKind distinguishes between big-endian operations with
	/// two different inputs (0), either-endian operations with two identical
	/// inputs (1), and little-endian operations with two different inputs (2).
	/// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
	bool PPC::isVPKUDUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind,
	SelectionDAG &DAG) {
	const PPCSubtarget &Subtarget = DAG.getSubtarget<PPCSubtarget>();
	if (!Subtarget.hasP8Vector())
	return false;

	bool IsLE = DAG.getDataLayout().isLittleEndian();
	if (ShuffleKind == 0) {
	if (IsLE)
	return false;
	for (unsigned i = 0; i != 16; i += 4)
	if (!isConstantOrUndef(N->getMaskElt(i ), i*2+4) \|\|
	!isConstantOrUndef(N->getMaskElt(i+1), i*2+5) \|\|
	!isConstantOrUndef(N->getMaskElt(i+2), i*2+6) \|\|
	!isConstantOrUndef(N->getMaskElt(i+3), i*2+7))
	return false;
	} else if (ShuffleKind == 2) {
	if (!IsLE)
	return false;
	for (unsigned i = 0; i != 16; i += 4)
	if (!isConstantOrUndef(N->getMaskElt(i ), i*2) \|\|
	!isConstantOrUndef(N->getMaskElt(i+1), i*2+1) \|\|
	!isConstantOrUndef(N->getMaskElt(i+2), i*2+2) \|\|
	!isConstantOrUndef(N->getMaskElt(i+3), i*2+3))
	return false;
	} else if (ShuffleKind == 1) {
	unsigned j = IsLE ? 0 : 4;
	for (unsigned i = 0; i != 8; i += 4)
	if (!isConstantOrUndef(N->getMaskElt(i ), i*2+j) \|\|
	!isConstantOrUndef(N->getMaskElt(i+1), i*2+j+1) \|\|
	!isConstantOrUndef(N->getMaskElt(i+2), i*2+j+2) \|\|
	!isConstantOrUndef(N->getMaskElt(i+3), i*2+j+3) \|\|
	!isConstantOrUndef(N->getMaskElt(i+8), i*2+j) \|\|
	!isConstantOrUndef(N->getMaskElt(i+9), i*2+j+1) \|\|
	!isConstantOrUndef(N->getMaskElt(i+10), i*2+j+2) \|\|
	!isConstantOrUndef(N->getMaskElt(i+11), i*2+j+3))
	return false;
	}
	return true;
	}

	/// isVMerge - Common function, used to match vmrg* shuffles.
	///
	static bool isVMerge(ShuffleVectorSDNode *N, unsigned UnitSize,
	unsigned LHSStart, unsigned RHSStart) {
	if (N->getValueType(0) != MVT::v16i8)
	return false;
	assert((UnitSize == 1 \|\| UnitSize == 2 \|\| UnitSize == 4) &&
	"Unsupported merge size!");

	for (unsigned i = 0; i != 8/UnitSize; ++i) // Step over units
	for (unsigned j = 0; j != UnitSize; ++j) { // Step over bytes within unit
	if (!isConstantOrUndef(N->getMaskElt(iUnitSize2+j),
	LHSStart+j+i*UnitSize) \|\|
	!isConstantOrUndef(N->getMaskElt(iUnitSize2+UnitSize+j),
	RHSStart+j+i*UnitSize))
	return false;
	}
	return true;
	}

	/// isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for
	/// a VMRGL* instruction with the specified unit size (1,2 or 4 bytes).
	/// The ShuffleKind distinguishes between big-endian merges with two
	/// different inputs (0), either-endian merges with two identical inputs (1),
	/// and little-endian merges with two different inputs (2). For the latter,
	/// the input operands are swapped (see PPCInstrAltivec.td).
	bool PPC::isVMRGLShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize,
	unsigned ShuffleKind, SelectionDAG &DAG) {
	if (DAG.getDataLayout().isLittleEndian()) {
	if (ShuffleKind == 1) // unary
	return isVMerge(N, UnitSize, 0, 0);
	else if (ShuffleKind == 2) // swapped
	return isVMerge(N, UnitSize, 0, 16);
	else
	return false;
	} else {
	if (ShuffleKind == 1) // unary
	return isVMerge(N, UnitSize, 8, 8);
	else if (ShuffleKind == 0) // normal
	return isVMerge(N, UnitSize, 8, 24);
	else
	return false;
	}
	}

	/// isVMRGHShuffleMask - Return true if this is a shuffle mask suitable for
	/// a VMRGH* instruction with the specified unit size (1,2 or 4 bytes).
	/// The ShuffleKind distinguishes between big-endian merges with two
	/// different inputs (0), either-endian merges with two identical inputs (1),
	/// and little-endian merges with two different inputs (2). For the latter,
	/// the input operands are swapped (see PPCInstrAltivec.td).
	bool PPC::isVMRGHShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize,
	unsigned ShuffleKind, SelectionDAG &DAG) {
	if (DAG.getDataLayout().isLittleEndian()) {
	if (ShuffleKind == 1) // unary
	return isVMerge(N, UnitSize, 8, 8);
	else if (ShuffleKind == 2) // swapped
	return isVMerge(N, UnitSize, 8, 24);
	else
	return false;
	} else {
	if (ShuffleKind == 1) // unary
	return isVMerge(N, UnitSize, 0, 0);
	else if (ShuffleKind == 0) // normal
	return isVMerge(N, UnitSize, 0, 16);
	else
	return false;
	}
	}

	/**
	* Common function used to match vmrgew and vmrgow shuffles
	*
	* The indexOffset determines whether to look for even or odd words in
	* the shuffle mask. This is based on the of the endianness of the target
	* machine.
	* - Little Endian:
	* - Use offset of 0 to check for odd elements
	* - Use offset of 4 to check for even elements
	* - Big Endian:
	* - Use offset of 0 to check for even elements
	* - Use offset of 4 to check for odd elements
	* A detailed description of the vector element ordering for little endian and
	* big endian can be found at
	* http://www.ibm.com/developerworks/library/l-ibm-xl-c-cpp-compiler/index.html
	* Targeting your applications - what little endian and big endian IBM XL C/C++
	* compiler differences mean to you
	*
	* The mask to the shuffle vector instruction specifies the indices of the
	* elements from the two input vectors to place in the result. The elements are
	* numbered in array-access order, starting with the first vector. These vectors
	* are always of type v16i8, thus each vector will contain 16 elements of size
	* 8. More info on the shuffle vector can be found in the
	* http://llvm.org/docs/LangRef.html#shufflevector-instruction
	* Language Reference.
	*
	* The RHSStartValue indicates whether the same input vectors are used (unary)
	* or two different input vectors are used, based on the following:
	* - If the instruction uses the same vector for both inputs, the range of the
	* indices will be 0 to 15. In this case, the RHSStart value passed should
	* be 0.
	* - If the instruction has two different vectors then the range of the
	* indices will be 0 to 31. In this case, the RHSStart value passed should
	* be 16 (indices 0-15 specify elements in the first vector while indices 16
	* to 31 specify elements in the second vector).
	*
	* \param[in] N The shuffle vector SD Node to analyze
	* \param[in] IndexOffset Specifies whether to look for even or odd elements
	* \param[in] RHSStartValue Specifies the starting index for the righthand input
	* vector to the shuffle_vector instruction
	* \return true iff this shuffle vector represents an even or odd word merge
	*/
	static bool isVMerge(ShuffleVectorSDNode *N, unsigned IndexOffset,
	unsigned RHSStartValue) {
	if (N->getValueType(0) != MVT::v16i8)
	return false;

	for (unsigned i = 0; i < 2; ++i)
	for (unsigned j = 0; j < 4; ++j)
	if (!isConstantOrUndef(N->getMaskElt(i*4+j),
	i*RHSStartValue+j+IndexOffset) \|\|
	!isConstantOrUndef(N->getMaskElt(i*4+j+8),
	i*RHSStartValue+j+IndexOffset+8))
	return false;
	return true;
	}

	/**
	* Determine if the specified shuffle mask is suitable for the vmrgew or
	* vmrgow instructions.
	*
	* \param[in] N The shuffle vector SD Node to analyze
	* \param[in] CheckEven Check for an even merge (true) or an odd merge (false)
	* \param[in] ShuffleKind Identify the type of merge:
	* - 0 = big-endian merge with two different inputs;
	* - 1 = either-endian merge with two identical inputs;
	* - 2 = little-endian merge with two different inputs (inputs are swapped for
	* little-endian merges).
	* \param[in] DAG The current SelectionDAG
	* \return true iff this shuffle mask
	*/
	bool PPC::isVMRGEOShuffleMask(ShuffleVectorSDNode *N, bool CheckEven,
	unsigned ShuffleKind, SelectionDAG &DAG) {
	if (DAG.getDataLayout().isLittleEndian()) {
	unsigned indexOffset = CheckEven ? 4 : 0;
	if (ShuffleKind == 1) // Unary
	return isVMerge(N, indexOffset, 0);
	else if (ShuffleKind == 2) // swapped
	return isVMerge(N, indexOffset, 16);
	else
	return false;
	}
	else {
	unsigned indexOffset = CheckEven ? 0 : 4;
	if (ShuffleKind == 1) // Unary
	return isVMerge(N, indexOffset, 0);
	else if (ShuffleKind == 0) // Normal
	return isVMerge(N, indexOffset, 16);
	else
	return false;
	}
	return false;
	}

	/// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the shift
	/// amount, otherwise return -1.
	/// The ShuffleKind distinguishes between big-endian operations with two
	/// different inputs (0), either-endian operations with two identical inputs
	/// (1), and little-endian operations with two different inputs (2). For the
	/// latter, the input operands are swapped (see PPCInstrAltivec.td).
	int PPC::isVSLDOIShuffleMask(SDNode *N, unsigned ShuffleKind,
	SelectionDAG &DAG) {
	if (N->getValueType(0) != MVT::v16i8)
	return -1;

	ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);

	// Find the first non-undef value in the shuffle mask.
	unsigned i;
	for (i = 0; i != 16 && SVOp->getMaskElt(i) < 0; ++i)
	/search/;

	if (i == 16) return -1; // all undef.

	// Otherwise, check to see if the rest of the elements are consecutively
	// numbered from this value.
	unsigned ShiftAmt = SVOp->getMaskElt(i);
	if (ShiftAmt < i) return -1;

	ShiftAmt -= i;
	bool isLE = DAG.getDataLayout().isLittleEndian();

	if ((ShuffleKind == 0 && !isLE) \|\| (ShuffleKind == 2 && isLE)) {
	// Check the rest of the elements to see if they are consecutive.
	for (++i; i != 16; ++i)
	if (!isConstantOrUndef(SVOp->getMaskElt(i), ShiftAmt+i))
	return -1;
	} else if (ShuffleKind == 1) {
	// Check the rest of the elements to see if they are consecutive.
	for (++i; i != 16; ++i)
	if (!isConstantOrUndef(SVOp->getMaskElt(i), (ShiftAmt+i) & 15))
	return -1;
	} else
	return -1;

	if (isLE)
	ShiftAmt = 16 - ShiftAmt;

	return ShiftAmt;
	}

	/// isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand
	/// specifies a splat of a single element that is suitable for input to
	/// one of the splat operations (VSPLTB/VSPLTH/VSPLTW/XXSPLTW/LXVDSX/etc.).
	bool PPC::isSplatShuffleMask(ShuffleVectorSDNode *N, unsigned EltSize) {
	EVT VT = N->getValueType(0);
	if (VT == MVT::v2i64 \|\| VT == MVT::v2f64)
	return EltSize == 8 && N->getMaskElt(0) == N->getMaskElt(1);

	assert(VT == MVT::v16i8 && isPowerOf2_32(EltSize) &&
	EltSize <= 8 && "Can only handle 1,2,4,8 byte element sizes");

	// The consecutive indices need to specify an element, not part of two
	// different elements. So abandon ship early if this isn't the case.
	if (N->getMaskElt(0) % EltSize != 0)
	return false;

	// This is a splat operation if each element of the permute is the same, and
	// if the value doesn't reference the second vector.
	unsigned ElementBase = N->getMaskElt(0);

	// FIXME: Handle UNDEF elements too!
	if (ElementBase >= 16)
	return false;

	// Check that the indices are consecutive, in the case of a multi-byte element
	// splatted with a v16i8 mask.
	for (unsigned i = 1; i != EltSize; ++i)
	if (N->getMaskElt(i) < 0 \|\| N->getMaskElt(i) != (int)(i+ElementBase))
	return false;

	for (unsigned i = EltSize, e = 16; i != e; i += EltSize) {
	if (N->getMaskElt(i) < 0) continue;
	for (unsigned j = 0; j != EltSize; ++j)
	if (N->getMaskElt(i+j) != N->getMaskElt(j))
	return false;
	}
	return true;
	}

	/// Check that the mask is shuffling N byte elements. Within each N byte
	/// element of the mask, the indices could be either in increasing or
	/// decreasing order as long as they are consecutive.
	/// \param[in] N the shuffle vector SD Node to analyze
	/// \param[in] Width the element width in bytes, could be 2/4/8/16 (HalfWord/
	/// Word/DoubleWord/QuadWord).
	/// \param[in] StepLen the delta indices number among the N byte element, if
	/// the mask is in increasing/decreasing order then it is 1/-1.
	/// \return true iff the mask is shuffling N byte elements.
	static bool isNByteElemShuffleMask(ShuffleVectorSDNode *N, unsigned Width,
	int StepLen) {
	assert((Width == 2 \|\| Width == 4 \|\| Width == 8 \|\| Width == 16) &&
	"Unexpected element width.");
	assert((StepLen == 1 \|\| StepLen == -1) && "Unexpected element width.");

	unsigned NumOfElem = 16 / Width;
	unsigned MaskVal[16]; // Width is never greater than 16
	for (unsigned i = 0; i < NumOfElem; ++i) {
	MaskVal[0] = N->getMaskElt(i * Width);
	if ((StepLen == 1) && (MaskVal[0] % Width)) {
	return false;
	} else if ((StepLen == -1) && ((MaskVal[0] + 1) % Width)) {
	return false;
	}

	for (unsigned int j = 1; j < Width; ++j) {
	MaskVal[j] = N->getMaskElt(i * Width + j);
	if (MaskVal[j] != MaskVal[j-1] + StepLen) {
	return false;
	}
	}
	}

	return true;
	}

	bool PPC::isXXINSERTWMask(ShuffleVectorSDNode *N, unsigned &ShiftElts,
	unsigned &InsertAtByte, bool &Swap, bool IsLE) {
	if (!isNByteElemShuffleMask(N, 4, 1))
	return false;

	// Now we look at mask elements 0,4,8,12
	unsigned M0 = N->getMaskElt(0) / 4;
	unsigned M1 = N->getMaskElt(4) / 4;
	unsigned M2 = N->getMaskElt(8) / 4;
	unsigned M3 = N->getMaskElt(12) / 4;
	unsigned LittleEndianShifts[] = { 2, 1, 0, 3 };
	unsigned BigEndianShifts[] = { 3, 0, 1, 2 };

	// Below, let H and L be arbitrary elements of the shuffle mask
	// where H is in the range [4,7] and L is in the range [0,3].
	// H, 1, 2, 3 or L, 5, 6, 7
	if ((M0 > 3 && M1 == 1 && M2 == 2 && M3 == 3) \|\|
	(M0 < 4 && M1 == 5 && M2 == 6 && M3 == 7)) {
	ShiftElts = IsLE ? LittleEndianShifts[M0 & 0x3] : BigEndianShifts[M0 & 0x3];
	InsertAtByte = IsLE ? 12 : 0;
	Swap = M0 < 4;
	return true;
	}
	// 0, H, 2, 3 or 4, L, 6, 7
	if ((M1 > 3 && M0 == 0 && M2 == 2 && M3 == 3) \|\|
	(M1 < 4 && M0 == 4 && M2 == 6 && M3 == 7)) {
	ShiftElts = IsLE ? LittleEndianShifts[M1 & 0x3] : BigEndianShifts[M1 & 0x3];
	InsertAtByte = IsLE ? 8 : 4;
	Swap = M1 < 4;
	return true;
	}
	// 0, 1, H, 3 or 4, 5, L, 7
	if ((M2 > 3 && M0 == 0 && M1 == 1 && M3 == 3) \|\|
	(M2 < 4 && M0 == 4 && M1 == 5 && M3 == 7)) {
	ShiftElts = IsLE ? LittleEndianShifts[M2 & 0x3] : BigEndianShifts[M2 & 0x3];
	InsertAtByte = IsLE ? 4 : 8;
	Swap = M2 < 4;
	return true;
	}
	// 0, 1, 2, H or 4, 5, 6, L
	if ((M3 > 3 && M0 == 0 && M1 == 1 && M2 == 2) \|\|
	(M3 < 4 && M0 == 4 && M1 == 5 && M2 == 6)) {
	ShiftElts = IsLE ? LittleEndianShifts[M3 & 0x3] : BigEndianShifts[M3 & 0x3];
	InsertAtByte = IsLE ? 0 : 12;
	Swap = M3 < 4;
	return true;
	}

	// If both vector operands for the shuffle are the same vector, the mask will
	// contain only elements from the first one and the second one will be undef.
	if (N->getOperand(1).isUndef()) {
	ShiftElts = 0;
	Swap = true;
	unsigned XXINSERTWSrcElem = IsLE ? 2 : 1;
	if (M0 == XXINSERTWSrcElem && M1 == 1 && M2 == 2 && M3 == 3) {
	InsertAtByte = IsLE ? 12 : 0;
	return true;
	}
	if (M0 == 0 && M1 == XXINSERTWSrcElem && M2 == 2 && M3 == 3) {
	InsertAtByte = IsLE ? 8 : 4;
	return true;
	}
	if (M0 == 0 && M1 == 1 && M2 == XXINSERTWSrcElem && M3 == 3) {
	InsertAtByte = IsLE ? 4 : 8;
	return true;
	}
	if (M0 == 0 && M1 == 1 && M2 == 2 && M3 == XXINSERTWSrcElem) {
	InsertAtByte = IsLE ? 0 : 12;
	return true;
	}
	}

	return false;
	}

	bool PPC::isXXSLDWIShuffleMask(ShuffleVectorSDNode *N, unsigned &ShiftElts,
	bool &Swap, bool IsLE) {
	assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8");
	// Ensure each byte index of the word is consecutive.
	if (!isNByteElemShuffleMask(N, 4, 1))
	return false;

	// Now we look at mask elements 0,4,8,12, which are the beginning of words.
	unsigned M0 = N->getMaskElt(0) / 4;
	unsigned M1 = N->getMaskElt(4) / 4;
	unsigned M2 = N->getMaskElt(8) / 4;
	unsigned M3 = N->getMaskElt(12) / 4;

	// If both vector operands for the shuffle are the same vector, the mask will
	// contain only elements from the first one and the second one will be undef.
	if (N->getOperand(1).isUndef()) {
	assert(M0 < 4 && "Indexing into an undef vector?");
	if (M1 != (M0 + 1) % 4 \|\| M2 != (M1 + 1) % 4 \|\| M3 != (M2 + 1) % 4)
	return false;

	ShiftElts = IsLE ? (4 - M0) % 4 : M0;
	Swap = false;
	return true;
	}

	// Ensure each word index of the ShuffleVector Mask is consecutive.
	if (M1 != (M0 + 1) % 8 \|\| M2 != (M1 + 1) % 8 \|\| M3 != (M2 + 1) % 8)
	return false;

	if (IsLE) {
	if (M0 == 0 \|\| M0 == 7 \|\| M0 == 6 \|\| M0 == 5) {
	// Input vectors don't need to be swapped if the leading element
	// of the result is one of the 3 left elements of the second vector
	// (or if there is no shift to be done at all).
	Swap = false;
	ShiftElts = (8 - M0) % 8;
	} else if (M0 == 4 \|\| M0 == 3 \|\| M0 == 2 \|\| M0 == 1) {
	// Input vectors need to be swapped if the leading element
	// of the result is one of the 3 left elements of the first vector
	// (or if we're shifting by 4 - thereby simply swapping the vectors).
	Swap = true;
	ShiftElts = (4 - M0) % 4;
	}

	return true;
	} else { // BE
	if (M0 == 0 \|\| M0 == 1 \|\| M0 == 2 \|\| M0 == 3) {
	// Input vectors don't need to be swapped if the leading element
	// of the result is one of the 4 elements of the first vector.
	Swap = false;
	ShiftElts = M0;
	} else if (M0 == 4 \|\| M0 == 5 \|\| M0 == 6 \|\| M0 == 7) {
	// Input vectors need to be swapped if the leading element
	// of the result is one of the 4 elements of the right vector.
	Swap = true;
	ShiftElts = M0 - 4;
	}

	return true;
	}
	}

	bool static isXXBRShuffleMaskHelper(ShuffleVectorSDNode *N, int Width) {
	assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8");

	if (!isNByteElemShuffleMask(N, Width, -1))
	return false;

	for (int i = 0; i < 16; i += Width)
	if (N->getMaskElt(i) != i + Width - 1)
	return false;

	return true;
	}

	bool PPC::isXXBRHShuffleMask(ShuffleVectorSDNode *N) {
	return isXXBRShuffleMaskHelper(N, 2);
	}

	bool PPC::isXXBRWShuffleMask(ShuffleVectorSDNode *N) {
	return isXXBRShuffleMaskHelper(N, 4);
	}

	bool PPC::isXXBRDShuffleMask(ShuffleVectorSDNode *N) {
	return isXXBRShuffleMaskHelper(N, 8);
	}

	bool PPC::isXXBRQShuffleMask(ShuffleVectorSDNode *N) {
	return isXXBRShuffleMaskHelper(N, 16);
	}

	/// Can node \p N be lowered to an XXPERMDI instruction? If so, set \p Swap
	/// if the inputs to the instruction should be swapped and set \p DM to the
	/// value for the immediate.
	/// Specifically, set \p Swap to true only if \p N can be lowered to XXPERMDI
	/// AND element 0 of the result comes from the first input (LE) or second input
	/// (BE). Set \p DM to the calculated result (0-3) only if \p N can be lowered.
	/// \return true iff the given mask of shuffle node \p N is a XXPERMDI shuffle
	/// mask.
	bool PPC::isXXPERMDIShuffleMask(ShuffleVectorSDNode *N, unsigned &DM,
	bool &Swap, bool IsLE) {
	assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8");

	// Ensure each byte index of the double word is consecutive.
	if (!isNByteElemShuffleMask(N, 8, 1))
	return false;

	unsigned M0 = N->getMaskElt(0) / 8;
	unsigned M1 = N->getMaskElt(8) / 8;
	assert(((M0 \| M1) < 4) && "A mask element out of bounds?");

	// If both vector operands for the shuffle are the same vector, the mask will
	// contain only elements from the first one and the second one will be undef.
	if (N->getOperand(1).isUndef()) {
	if ((M0 \| M1) < 2) {
	DM = IsLE ? (((~M1) & 1) << 1) + ((~M0) & 1) : (M0 << 1) + (M1 & 1);
	Swap = false;
	return true;
	} else
	return false;
	}

	if (IsLE) {
	if (M0 > 1 && M1 < 2) {
	Swap = false;
	} else if (M0 < 2 && M1 > 1) {
	M0 = (M0 + 2) % 4;
	M1 = (M1 + 2) % 4;
	Swap = true;
	} else
	return false;

	// Note: if control flow comes here that means Swap is already set above
	DM = (((~M1) & 1) << 1) + ((~M0) & 1);
	return true;
	} else { // BE
	if (M0 < 2 && M1 > 1) {
	Swap = false;
	} else if (M0 > 1 && M1 < 2) {
	M0 = (M0 + 2) % 4;
	M1 = (M1 + 2) % 4;
	Swap = true;
	} else
	return false;

	// Note: if control flow comes here that means Swap is already set above
	DM = (M0 << 1) + (M1 & 1);
	return true;
	}
	}


	/// getSplatIdxForPPCMnemonics - Return the splat index as a value that is
	/// appropriate for PPC mnemonics (which have a big endian bias - namely
	/// elements are counted from the left of the vector register).
	unsigned PPC::getSplatIdxForPPCMnemonics(SDNode *N, unsigned EltSize,
	SelectionDAG &DAG) {
	ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
	assert(isSplatShuffleMask(SVOp, EltSize));
	EVT VT = SVOp->getValueType(0);

	if (VT == MVT::v2i64 \|\| VT == MVT::v2f64)
	return DAG.getDataLayout().isLittleEndian() ? 1 - SVOp->getMaskElt(0)
	: SVOp->getMaskElt(0);

	if (DAG.getDataLayout().isLittleEndian())
	return (16 / EltSize) - 1 - (SVOp->getMaskElt(0) / EltSize);
	else
	return SVOp->getMaskElt(0) / EltSize;
	}

	/// get_VSPLTI_elt - If this is a build_vector of constants which can be formed
	/// by using a vspltis[bhw] instruction of the specified element size, return
	/// the constant being splatted. The ByteSize field indicates the number of
	/// bytes of each element [124] -> [bhw].
	SDValue PPC::get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG) {
	SDValue OpVal;

	// If ByteSize of the splat is bigger than the element size of the
	// build_vector, then we have a case where we are checking for a splat where
	// multiple elements of the buildvector are folded together into a single
	// logical element of the splat (e.g. "vsplish 1" to splat {0,1}*8).
	unsigned EltSize = 16/N->getNumOperands();
	if (EltSize < ByteSize) {
	unsigned Multiple = ByteSize/EltSize; // Number of BV entries per spltval.
	SDValue UniquedVals[4];
	assert(Multiple > 1 && Multiple <= 4 && "How can this happen?");

	// See if all of the elements in the buildvector agree across.
	for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
	if (N->getOperand(i).isUndef()) continue;
	// If the element isn't a constant, bail fully out.
	if (!isa<ConstantSDNode>(N->getOperand(i))) return SDValue();

	if (!UniquedVals[i&(Multiple-1)].getNode())
	UniquedVals[i&(Multiple-1)] = N->getOperand(i);
	else if (UniquedVals[i&(Multiple-1)] != N->getOperand(i))
	return SDValue(); // no match.
	}

	// Okay, if we reached this point, UniquedVals[0..Multiple-1] contains
	// either constant or undef values that are identical for each chunk. See
	// if these chunks can form into a larger vspltis*.

	// Check to see if all of the leading entries are either 0 or -1. If
	// neither, then this won't fit into the immediate field.
	bool LeadingZero = true;
	bool LeadingOnes = true;
	for (unsigned i = 0; i != Multiple-1; ++i) {
	if (!UniquedVals[i].getNode()) continue; // Must have been undefs.

	LeadingZero &= isNullConstant(UniquedVals[i]);
	LeadingOnes &= isAllOnesConstant(UniquedVals[i]);
	}
	// Finally, check the least significant entry.
	if (LeadingZero) {
	if (!UniquedVals[Multiple-1].getNode())
	return DAG.getTargetConstant(0, SDLoc(N), MVT::i32); // 0,0,0,undef
	int Val = UniquedVals[Multiple - 1]->getAsZExtVal();
	if (Val < 16) // 0,0,0,4 -> vspltisw(4)
	return DAG.getTargetConstant(Val, SDLoc(N), MVT::i32);
	}
	if (LeadingOnes) {
	if (!UniquedVals[Multiple-1].getNode())
	return DAG.getTargetConstant(~0U, SDLoc(N), MVT::i32); // -1,-1,-1,undef
	int Val =cast<ConstantSDNode>(UniquedVals[Multiple-1])->getSExtValue();
	if (Val >= -16) // -1,-1,-1,-2 -> vspltisw(-2)
	return DAG.getTargetConstant(Val, SDLoc(N), MVT::i32);
	}

	return SDValue();
	}

	// Check to see if this buildvec has a single non-undef value in its elements.
	for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
	if (N->getOperand(i).isUndef()) continue;
	if (!OpVal.getNode())
	OpVal = N->getOperand(i);
	else if (OpVal != N->getOperand(i))
	return SDValue();
	}

	if (!OpVal.getNode()) return SDValue(); // All UNDEF: use implicit def.

	unsigned ValSizeInBytes = EltSize;
	uint64_t Value = 0;
	if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
	Value = CN->getZExtValue();
	} else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal)) {
	assert(CN->getValueType(0) == MVT::f32 && "Only one legal FP vector type!");
	Value = llvm::bit_cast<uint32_t>(CN->getValueAPF().convertToFloat());
	}

	// If the splat value is larger than the element value, then we can never do
	// this splat. The only case that we could fit the replicated bits into our
	// immediate field for would be zero, and we prefer to use vxor for it.
	if (ValSizeInBytes < ByteSize) return SDValue();

	// If the element value is larger than the splat value, check if it consists
	// of a repeated bit pattern of size ByteSize.
	if (!APInt(ValSizeInBytes * 8, Value).isSplat(ByteSize * 8))
	return SDValue();

	// Properly sign extend the value.
	int MaskVal = SignExtend32(Value, ByteSize * 8);

	// If this is zero, don't match, zero matches ISD::isBuildVectorAllZeros.
	if (MaskVal == 0) return SDValue();

	// Finally, if this value fits in a 5 bit sext field, return it
	if (SignExtend32<5>(MaskVal) == MaskVal)
	return DAG.getTargetConstant(MaskVal, SDLoc(N), MVT::i32);
	return SDValue();
	}

	//===----------------------------------------------------------------------===//
	// Addressing Mode Selection
	//===----------------------------------------------------------------------===//

	/// isIntS16Immediate - This method tests to see if the node is either a 32-bit
	/// or 64-bit immediate, and if the value can be accurately represented as a
	/// sign extension from a 16-bit value. If so, this returns true and the
	/// immediate.
	bool llvm::isIntS16Immediate(SDNode *N, int16_t &Imm) {
	if (!isa<ConstantSDNode>(N))
	return false;

	Imm = (int16_t)N->getAsZExtVal();
	if (N->getValueType(0) == MVT::i32)
	return Imm == (int32_t)N->getAsZExtVal();
	else
	return Imm == (int64_t)N->getAsZExtVal();
	}
	bool llvm::isIntS16Immediate(SDValue Op, int16_t &Imm) {
	return isIntS16Immediate(Op.getNode(), Imm);
	}

	/// Used when computing address flags for selecting loads and stores.
	/// If we have an OR, check if the LHS and RHS are provably disjoint.
	/// An OR of two provably disjoint values is equivalent to an ADD.
	/// Most PPC load/store instructions compute the effective address as a sum,
	/// so doing this conversion is useful.
	static bool provablyDisjointOr(SelectionDAG &DAG, const SDValue &N) {
	if (N.getOpcode() != ISD::OR)
	return false;
	KnownBits LHSKnown = DAG.computeKnownBits(N.getOperand(0));
	if (!LHSKnown.Zero.getBoolValue())
	return false;
	KnownBits RHSKnown = DAG.computeKnownBits(N.getOperand(1));
	return (~(LHSKnown.Zero \| RHSKnown.Zero) == 0);
	}

	/// SelectAddressEVXRegReg - Given the specified address, check to see if it can
	/// be represented as an indexed [r+r] operation.
	bool PPCTargetLowering::SelectAddressEVXRegReg(SDValue N, SDValue &Base,
	SDValue &Index,
	SelectionDAG &DAG) const {
	for (SDNode *U : N->uses()) {
	if (MemSDNode *Memop = dyn_cast<MemSDNode>(U)) {
	if (Memop->getMemoryVT() == MVT::f64) {
	Base = N.getOperand(0);
	Index = N.getOperand(1);
	return true;
	}
	}
	}
	return false;
	}

	/// isIntS34Immediate - This method tests if value of node given can be
	/// accurately represented as a sign extension from a 34-bit value. If so,
	/// this returns true and the immediate.
	bool llvm::isIntS34Immediate(SDNode *N, int64_t &Imm) {
	if (!isa<ConstantSDNode>(N))
	return false;

	Imm = (int64_t)N->getAsZExtVal();
	return isInt<34>(Imm);
	}
	bool llvm::isIntS34Immediate(SDValue Op, int64_t &Imm) {
	return isIntS34Immediate(Op.getNode(), Imm);
	}

	/// SelectAddressRegReg - Given the specified addressed, check to see if it
	/// can be represented as an indexed [r+r] operation. Returns false if it
	/// can be more efficiently represented as [r+imm]. If \p EncodingAlignment is
	/// non-zero and N can be represented by a base register plus a signed 16-bit
	/// displacement, make a more precise judgement by checking (displacement % \p
	/// EncodingAlignment).
	bool PPCTargetLowering::SelectAddressRegReg(
	SDValue N, SDValue &Base, SDValue &Index, SelectionDAG &DAG,
	MaybeAlign EncodingAlignment) const {
	// If we have a PC Relative target flag don't select as [reg+reg]. It will be
	// a [pc+imm].
	if (SelectAddressPCRel(N, Base))
	return false;

	int16_t Imm = 0;
	if (N.getOpcode() == ISD::ADD) {
	// Is there any SPE load/store (f64), which can't handle 16bit offset?
	// SPE load/store can only handle 8-bit offsets.
	if (hasSPE() && SelectAddressEVXRegReg(N, Base, Index, DAG))
	return true;
	if (isIntS16Immediate(N.getOperand(1), Imm) &&
	(!EncodingAlignment \|\| isAligned(*EncodingAlignment, Imm)))
	return false; // r+i
	if (N.getOperand(1).getOpcode() == PPCISD::Lo)
	return false; // r+i

	Base = N.getOperand(0);
	Index = N.getOperand(1);
	return true;
	} else if (N.getOpcode() == ISD::OR) {
	if (isIntS16Immediate(N.getOperand(1), Imm) &&
	(!EncodingAlignment \|\| isAligned(*EncodingAlignment, Imm)))
	return false; // r+i can fold it if we can.

	// If this is an or of disjoint bitfields, we can codegen this as an add
	// (for better address arithmetic) if the LHS and RHS of the OR are provably
	// disjoint.
	KnownBits LHSKnown = DAG.computeKnownBits(N.getOperand(0));

	if (LHSKnown.Zero.getBoolValue()) {
	KnownBits RHSKnown = DAG.computeKnownBits(N.getOperand(1));
	// If all of the bits are known zero on the LHS or RHS, the add won't
	// carry.
	if (~(LHSKnown.Zero \| RHSKnown.Zero) == 0) {
	Base = N.getOperand(0);
	Index = N.getOperand(1);
	return true;
	}
	}
	}

	return false;
	}

	// If we happen to be doing an i64 load or store into a stack slot that has
	// less than a 4-byte alignment, then the frame-index elimination may need to
	// use an indexed load or store instruction (because the offset may not be a
	// multiple of 4). The extra register needed to hold the offset comes from the
	// register scavenger, and it is possible that the scavenger will need to use
	// an emergency spill slot. As a result, we need to make sure that a spill slot
	// is allocated when doing an i64 load/store into a less-than-4-byte-aligned
	// stack slot.
	static void fixupFuncForFI(SelectionDAG &DAG, int FrameIdx, EVT VT) {
	// FIXME: This does not handle the LWA case.
	if (VT != MVT::i64)
	return;

	// NOTE: We'll exclude negative FIs here, which come from argument
	// lowering, because there are no known test cases triggering this problem
	// using packed structures (or similar). We can remove this exclusion if
	// we find such a test case. The reason why this is so test-case driven is
	// because this entire 'fixup' is only to prevent crashes (from the
	// register scavenger) on not-really-valid inputs. For example, if we have:
	// %a = alloca i1
	// %b = bitcast i1* %a to i64*
	// store i64* a, i64 b
	// then the store should really be marked as 'align 1', but is not. If it
	// were marked as 'align 1' then the indexed form would have been
	// instruction-selected initially, and the problem this 'fixup' is preventing
	// won't happen regardless.
	if (FrameIdx < 0)
	return;

	MachineFunction &MF = DAG.getMachineFunction();
	MachineFrameInfo &MFI = MF.getFrameInfo();

	if (MFI.getObjectAlign(FrameIdx) >= Align(4))
	return;

	PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
	FuncInfo->setHasNonRISpills();
	}

	/// Returns true if the address N can be represented by a base register plus
	/// a signed 16-bit displacement [r+imm], and if it is not better
	/// represented as reg+reg. If \p EncodingAlignment is non-zero, only accept
	/// displacements that are multiples of that value.
	bool PPCTargetLowering::SelectAddressRegImm(
	SDValue N, SDValue &Disp, SDValue &Base, SelectionDAG &DAG,
	MaybeAlign EncodingAlignment) const {
	// FIXME dl should come from parent load or store, not from address
	SDLoc dl(N);

	// If we have a PC Relative target flag don't select as [reg+imm]. It will be
	// a [pc+imm].
	if (SelectAddressPCRel(N, Base))
	return false;

	// If this can be more profitably realized as r+r, fail.
	if (SelectAddressRegReg(N, Disp, Base, DAG, EncodingAlignment))
	return false;

	if (N.getOpcode() == ISD::ADD) {
	int16_t imm = 0;
	if (isIntS16Immediate(N.getOperand(1), imm) &&
	(!EncodingAlignment \|\| isAligned(*EncodingAlignment, imm))) {
	Disp = DAG.getTargetConstant(imm, dl, N.getValueType());
	if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0))) {
	Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
	fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
	} else {
	Base = N.getOperand(0);
	}
	return true; // [r+i]
	} else if (N.getOperand(1).getOpcode() == PPCISD::Lo) {
	// Match LOAD (ADD (X, Lo(G))).
	assert(!N.getOperand(1).getConstantOperandVal(1) &&
	"Cannot handle constant offsets yet!");
	Disp = N.getOperand(1).getOperand(0); // The global address.
	assert(Disp.getOpcode() == ISD::TargetGlobalAddress \|\|
	Disp.getOpcode() == ISD::TargetGlobalTLSAddress \|\|
	Disp.getOpcode() == ISD::TargetConstantPool \|\|
	Disp.getOpcode() == ISD::TargetJumpTable);
	Base = N.getOperand(0);
	return true; // [&g+r]
	}
	} else if (N.getOpcode() == ISD::OR) {
	int16_t imm = 0;
	if (isIntS16Immediate(N.getOperand(1), imm) &&
	(!EncodingAlignment \|\| isAligned(*EncodingAlignment, imm))) {
	// If this is an or of disjoint bitfields, we can codegen this as an add
	// (for better address arithmetic) if the LHS and RHS of the OR are
	// provably disjoint.
	KnownBits LHSKnown = DAG.computeKnownBits(N.getOperand(0));

	if ((LHSKnown.Zero.getZExtValue()\|~(uint64_t)imm) == ~0ULL) {
	// If all of the bits are known zero on the LHS or RHS, the add won't
	// carry.
	if (FrameIndexSDNode *FI =
	dyn_cast<FrameIndexSDNode>(N.getOperand(0))) {
	Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
	fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
	} else {
	Base = N.getOperand(0);
	}
	Disp = DAG.getTargetConstant(imm, dl, N.getValueType());
	return true;
	}
	}
	} else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N)) {
	// Loading from a constant address.

	// If this address fits entirely in a 16-bit sext immediate field, codegen
	// this as "d, 0"
	int16_t Imm;
	if (isIntS16Immediate(CN, Imm) &&
	(!EncodingAlignment \|\| isAligned(*EncodingAlignment, Imm))) {
	Disp = DAG.getTargetConstant(Imm, dl, CN->getValueType(0));
	Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
	CN->getValueType(0));
	return true;
	}

	// Handle 32-bit sext immediates with LIS + addr mode.
	if ((CN->getValueType(0) == MVT::i32 \|\|
	(int64_t)CN->getZExtValue() == (int)CN->getZExtValue()) &&
	(!EncodingAlignment \|\|
	isAligned(*EncodingAlignment, CN->getZExtValue()))) {
	int Addr = (int)CN->getZExtValue();

	// Otherwise, break this down into an LIS + disp.
	Disp = DAG.getTargetConstant((short)Addr, dl, MVT::i32);

	Base = DAG.getTargetConstant((Addr - (signed short)Addr) >> 16, dl,
	MVT::i32);
	unsigned Opc = CN->getValueType(0) == MVT::i32 ? PPC::LIS : PPC::LIS8;
	Base = SDValue(DAG.getMachineNode(Opc, dl, CN->getValueType(0), Base), 0);
	return true;
	}
	}

	Disp = DAG.getTargetConstant(0, dl, getPointerTy(DAG.getDataLayout()));
	if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N)) {
	Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
	fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
	} else
	Base = N;
	return true; // [r+0]
	}

	/// Similar to the 16-bit case but for instructions that take a 34-bit
	/// displacement field (prefixed loads/stores).
	bool PPCTargetLowering::SelectAddressRegImm34(SDValue N, SDValue &Disp,
	SDValue &Base,
	SelectionDAG &DAG) const {
	// Only on 64-bit targets.
	if (N.getValueType() != MVT::i64)
	return false;

	SDLoc dl(N);
	int64_t Imm = 0;

	if (N.getOpcode() == ISD::ADD) {
	if (!isIntS34Immediate(N.getOperand(1), Imm))
	return false;
	Disp = DAG.getTargetConstant(Imm, dl, N.getValueType());
	if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0)))
	Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
	else
	Base = N.getOperand(0);
	return true;
	}

	if (N.getOpcode() == ISD::OR) {
	if (!isIntS34Immediate(N.getOperand(1), Imm))
	return false;
	// If this is an or of disjoint bitfields, we can codegen this as an add
	// (for better address arithmetic) if the LHS and RHS of the OR are
	// provably disjoint.
	KnownBits LHSKnown = DAG.computeKnownBits(N.getOperand(0));
	if ((LHSKnown.Zero.getZExtValue() \| ~(uint64_t)Imm) != ~0ULL)
	return false;
	if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0)))
	Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
	else
	Base = N.getOperand(0);
	Disp = DAG.getTargetConstant(Imm, dl, N.getValueType());
	return true;
	}

	if (isIntS34Immediate(N, Imm)) { // If the address is a 34-bit const.
	Disp = DAG.getTargetConstant(Imm, dl, N.getValueType());
	Base = DAG.getRegister(PPC::ZERO8, N.getValueType());
	return true;
	}

	return false;
	}

	/// SelectAddressRegRegOnly - Given the specified addressed, force it to be
	/// represented as an indexed [r+r] operation.
	bool PPCTargetLowering::SelectAddressRegRegOnly(SDValue N, SDValue &Base,
	SDValue &Index,
	SelectionDAG &DAG) const {
	// Check to see if we can easily represent this as an [r+r] address. This
	// will fail if it thinks that the address is more profitably represented as
	// reg+imm, e.g. where imm = 0.
	if (SelectAddressRegReg(N, Base, Index, DAG))
	return true;

	// If the address is the result of an add, we will utilize the fact that the
	// address calculation includes an implicit add. However, we can reduce
	// register pressure if we do not materialize a constant just for use as the
	// index register. We only get rid of the add if it is not an add of a
	// value and a 16-bit signed constant and both have a single use.
	int16_t imm = 0;
	if (N.getOpcode() == ISD::ADD &&
	(!isIntS16Immediate(N.getOperand(1), imm) \|\|
	!N.getOperand(1).hasOneUse() \|\| !N.getOperand(0).hasOneUse())) {
	Base = N.getOperand(0);
	Index = N.getOperand(1);
	return true;
	}

	// Otherwise, do it the hard way, using R0 as the base register.
	Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
	N.getValueType());
	Index = N;
	return true;
	}

	template <typename Ty> static bool isValidPCRelNode(SDValue N) {
	Ty *PCRelCand = dyn_cast<Ty>(N);
	return PCRelCand && (PPCInstrInfo::hasPCRelFlag(PCRelCand->getTargetFlags()));
	}

	/// Returns true if this address is a PC Relative address.
	/// PC Relative addresses are marked with the flag PPCII::MO_PCREL_FLAG
	/// or if the node opcode is PPCISD::MAT_PCREL_ADDR.
	bool PPCTargetLowering::SelectAddressPCRel(SDValue N, SDValue &Base) const {
	// This is a materialize PC Relative node. Always select this as PC Relative.
	Base = N;
	if (N.getOpcode() == PPCISD::MAT_PCREL_ADDR)
	return true;
	if (isValidPCRelNode<ConstantPoolSDNode>(N) \|\|
	isValidPCRelNode<GlobalAddressSDNode>(N) \|\|
	isValidPCRelNode<JumpTableSDNode>(N) \|\|
	isValidPCRelNode<BlockAddressSDNode>(N))
	return true;
	return false;
	}

	/// Returns true if we should use a direct load into vector instruction
	/// (such as lxsd or lfd), instead of a load into gpr + direct move sequence.
	static bool usePartialVectorLoads(SDNode *N, const PPCSubtarget& ST) {

	// If there are any other uses other than scalar to vector, then we should
	// keep it as a scalar load -> direct move pattern to prevent multiple
	// loads.
	LoadSDNode *LD = dyn_cast<LoadSDNode>(N);
	if (!LD)
	return false;

	EVT MemVT = LD->getMemoryVT();
	if (!MemVT.isSimple())
	return false;
	switch(MemVT.getSimpleVT().SimpleTy) {
	case MVT::i64:
	break;
	case MVT::i32:
	if (!ST.hasP8Vector())
	return false;
	break;
	case MVT::i16:
	case MVT::i8:
	if (!ST.hasP9Vector())
	return false;
	break;
	default:
	return false;
	}

	SDValue LoadedVal(N, 0);
	if (!LoadedVal.hasOneUse())
	return false;

	for (SDNode::use_iterator UI = LD->use_begin(), UE = LD->use_end();
	UI != UE; ++UI)
	if (UI.getUse().get().getResNo() == 0 &&
	UI->getOpcode() != ISD::SCALAR_TO_VECTOR &&
	UI->getOpcode() != PPCISD::SCALAR_TO_VECTOR_PERMUTED)
	return false;

	return true;
	}

	/// getPreIndexedAddressParts - returns true by value, base pointer and
	/// offset pointer and addressing mode by reference if the node's address
	/// can be legally represented as pre-indexed load / store address.
	bool PPCTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base,
	SDValue &Offset,
	ISD::MemIndexedMode &AM,
	SelectionDAG &DAG) const {
	if (DisablePPCPreinc) return false;

	bool isLoad = true;
	SDValue Ptr;
	EVT VT;
	Align Alignment;
	if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
	Ptr = LD->getBasePtr();
	VT = LD->getMemoryVT();
	Alignment = LD->getAlign();
	} else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
	Ptr = ST->getBasePtr();
	VT = ST->getMemoryVT();
	Alignment = ST->getAlign();
	isLoad = false;
	} else
	return false;

	// Do not generate pre-inc forms for specific loads that feed scalar_to_vector
	// instructions because we can fold these into a more efficient instruction
	// instead, (such as LXSD).
	if (isLoad && usePartialVectorLoads(N, Subtarget)) {
	return false;
	}

	// PowerPC doesn't have preinc load/store instructions for vectors
	if (VT.isVector())
	return false;

	if (SelectAddressRegReg(Ptr, Base, Offset, DAG)) {
	// Common code will reject creating a pre-inc form if the base pointer
	// is a frame index, or if N is a store and the base pointer is either
	// the same as or a predecessor of the value being stored. Check for
	// those situations here, and try with swapped Base/Offset instead.
	bool Swap = false;

	if (isa<FrameIndexSDNode>(Base) \|\| isa<RegisterSDNode>(Base))
	Swap = true;
	else if (!isLoad) {
	SDValue Val = cast<StoreSDNode>(N)->getValue();
	if (Val == Base \|\| Base.getNode()->isPredecessorOf(Val.getNode()))
	Swap = true;
	}

	if (Swap)
	std::swap(Base, Offset);

	AM = ISD::PRE_INC;
	return true;
	}

	// LDU/STU can only handle immediates that are a multiple of 4.
	if (VT != MVT::i64) {
	if (!SelectAddressRegImm(Ptr, Offset, Base, DAG, std::nullopt))
	return false;
	} else {
	// LDU/STU need an address with at least 4-byte alignment.
	if (Alignment < Align(4))
	return false;

	if (!SelectAddressRegImm(Ptr, Offset, Base, DAG, Align(4)))
	return false;
	}

	if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
	// PPC64 doesn't have lwau, but it does have lwaux. Reject preinc load of
	// sext i32 to i64 when addr mode is r+i.
	if (LD->getValueType(0) == MVT::i64 && LD->getMemoryVT() == MVT::i32 &&
	LD->getExtensionType() == ISD::SEXTLOAD &&
	isa<ConstantSDNode>(Offset))
	return false;
	}

	AM = ISD::PRE_INC;
	return true;
	}

	//===----------------------------------------------------------------------===//
	// LowerOperation implementation
	//===----------------------------------------------------------------------===//

	/// Return true if we should reference labels using a PICBase, set the HiOpFlags
	/// and LoOpFlags to the target MO flags.
	static void getLabelAccessInfo(bool IsPIC, const PPCSubtarget &Subtarget,
	unsigned &HiOpFlags, unsigned &LoOpFlags,
	const GlobalValue *GV = nullptr) {
	HiOpFlags = PPCII::MO_HA;
	LoOpFlags = PPCII::MO_LO;

	// Don't use the pic base if not in PIC relocation model.
	if (IsPIC) {
	HiOpFlags = PPCII::MO_PIC_HA_FLAG;
	LoOpFlags = PPCII::MO_PIC_LO_FLAG;
	}
	}

	static SDValue LowerLabelRef(SDValue HiPart, SDValue LoPart, bool isPIC,
	SelectionDAG &DAG) {
	SDLoc DL(HiPart);
	EVT PtrVT = HiPart.getValueType();
	SDValue Zero = DAG.getConstant(0, DL, PtrVT);

	SDValue Hi = DAG.getNode(PPCISD::Hi, DL, PtrVT, HiPart, Zero);
	SDValue Lo = DAG.getNode(PPCISD::Lo, DL, PtrVT, LoPart, Zero);

	// With PIC, the first instruction is actually "GR+hi(&G)".
	if (isPIC)
	Hi = DAG.getNode(ISD::ADD, DL, PtrVT,
	DAG.getNode(PPCISD::GlobalBaseReg, DL, PtrVT), Hi);

	// Generate non-pic code that has direct accesses to the constant pool.
	// The address of the global is just (hi(&g)+lo(&g)).
	return DAG.getNode(ISD::ADD, DL, PtrVT, Hi, Lo);
	}

	static void setUsesTOCBasePtr(MachineFunction &MF) {
	PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
	FuncInfo->setUsesTOCBasePtr();
	}

	static void setUsesTOCBasePtr(SelectionDAG &DAG) {
	setUsesTOCBasePtr(DAG.getMachineFunction());
	}

	SDValue PPCTargetLowering::getTOCEntry(SelectionDAG &DAG, const SDLoc &dl,
	SDValue GA) const {
	const bool Is64Bit = Subtarget.isPPC64();
	EVT VT = Is64Bit ? MVT::i64 : MVT::i32;
	SDValue Reg = Is64Bit ? DAG.getRegister(PPC::X2, VT)
	: Subtarget.isAIXABI()
	? DAG.getRegister(PPC::R2, VT)
	: DAG.getNode(PPCISD::GlobalBaseReg, dl, VT);
	SDValue Ops[] = { GA, Reg };
	return DAG.getMemIntrinsicNode(
	PPCISD::TOC_ENTRY, dl, DAG.getVTList(VT, MVT::Other), Ops, VT,
	MachinePointerInfo::getGOT(DAG.getMachineFunction()), std::nullopt,
	MachineMemOperand::MOLoad);
	}

	SDValue PPCTargetLowering::LowerConstantPool(SDValue Op,
	SelectionDAG &DAG) const {
	EVT PtrVT = Op.getValueType();
	ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
	const Constant *C = CP->getConstVal();

	// 64-bit SVR4 ABI and AIX ABI code are always position-independent.
	// The actual address of the GlobalValue is stored in the TOC.
	if (Subtarget.is64BitELFABI() \|\| Subtarget.isAIXABI()) {
	if (Subtarget.isUsingPCRelativeCalls()) {
	SDLoc DL(CP);
	EVT Ty = getPointerTy(DAG.getDataLayout());
	SDValue ConstPool = DAG.getTargetConstantPool(
	C, Ty, CP->getAlign(), CP->getOffset(), PPCII::MO_PCREL_FLAG);
	return DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, ConstPool);
	}
	setUsesTOCBasePtr(DAG);
	SDValue GA = DAG.getTargetConstantPool(C, PtrVT, CP->getAlign(), 0);
	return getTOCEntry(DAG, SDLoc(CP), GA);
	}

	unsigned MOHiFlag, MOLoFlag;
	bool IsPIC = isPositionIndependent();
	getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag);

	if (IsPIC && Subtarget.isSVR4ABI()) {
	SDValue GA =
	DAG.getTargetConstantPool(C, PtrVT, CP->getAlign(), PPCII::MO_PIC_FLAG);
	return getTOCEntry(DAG, SDLoc(CP), GA);
	}

	SDValue CPIHi =
	DAG.getTargetConstantPool(C, PtrVT, CP->getAlign(), 0, MOHiFlag);
	SDValue CPILo =
	DAG.getTargetConstantPool(C, PtrVT, CP->getAlign(), 0, MOLoFlag);
	return LowerLabelRef(CPIHi, CPILo, IsPIC, DAG);
	}

	// For 64-bit PowerPC, prefer the more compact relative encodings.
	// This trades 32 bits per jump table entry for one or two instructions
	// on the jump site.
	unsigned PPCTargetLowering::getJumpTableEncoding() const {
	if (isJumpTableRelative())
	return MachineJumpTableInfo::EK_LabelDifference32;

	return TargetLowering::getJumpTableEncoding();
	}

	bool PPCTargetLowering::isJumpTableRelative() const {
	if (UseAbsoluteJumpTables)
	return false;
	if (Subtarget.isPPC64() \|\| Subtarget.isAIXABI())
	return true;
	return TargetLowering::isJumpTableRelative();
	}

	SDValue PPCTargetLowering::getPICJumpTableRelocBase(SDValue Table,
	SelectionDAG &DAG) const {
	if (!Subtarget.isPPC64() \|\| Subtarget.isAIXABI())
	return TargetLowering::getPICJumpTableRelocBase(Table, DAG);

	switch (getTargetMachine().getCodeModel()) {
	case CodeModel::Small:
	case CodeModel::Medium:
	return TargetLowering::getPICJumpTableRelocBase(Table, DAG);
	default:
	return DAG.getNode(PPCISD::GlobalBaseReg, SDLoc(),
	getPointerTy(DAG.getDataLayout()));
	}
	}

	const MCExpr *
	PPCTargetLowering::getPICJumpTableRelocBaseExpr(const MachineFunction *MF,
	unsigned JTI,
	MCContext &Ctx) const {
	if (!Subtarget.isPPC64() \|\| Subtarget.isAIXABI())
	return TargetLowering::getPICJumpTableRelocBaseExpr(MF, JTI, Ctx);

	switch (getTargetMachine().getCodeModel()) {
	case CodeModel::Small:
	case CodeModel::Medium:
	return TargetLowering::getPICJumpTableRelocBaseExpr(MF, JTI, Ctx);
	default:
	return MCSymbolRefExpr::create(MF->getPICBaseSymbol(), Ctx);
	}
	}

	SDValue PPCTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const {
	EVT PtrVT = Op.getValueType();
	JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);

	// isUsingPCRelativeCalls() returns true when PCRelative is enabled
	if (Subtarget.isUsingPCRelativeCalls()) {
	SDLoc DL(JT);
	EVT Ty = getPointerTy(DAG.getDataLayout());
	SDValue GA =
	DAG.getTargetJumpTable(JT->getIndex(), Ty, PPCII::MO_PCREL_FLAG);
	SDValue MatAddr = DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, GA);
	return MatAddr;
	}

	// 64-bit SVR4 ABI and AIX ABI code are always position-independent.
	// The actual address of the GlobalValue is stored in the TOC.
	if (Subtarget.is64BitELFABI() \|\| Subtarget.isAIXABI()) {
	setUsesTOCBasePtr(DAG);
	SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
	return getTOCEntry(DAG, SDLoc(JT), GA);
	}

	unsigned MOHiFlag, MOLoFlag;
	bool IsPIC = isPositionIndependent();
	getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag);

	if (IsPIC && Subtarget.isSVR4ABI()) {
	SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT,
	PPCII::MO_PIC_FLAG);
	return getTOCEntry(DAG, SDLoc(GA), GA);
	}

	SDValue JTIHi = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOHiFlag);
	SDValue JTILo = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOLoFlag);
	return LowerLabelRef(JTIHi, JTILo, IsPIC, DAG);
	}

	SDValue PPCTargetLowering::LowerBlockAddress(SDValue Op,
	SelectionDAG &DAG) const {
	EVT PtrVT = Op.getValueType();
	BlockAddressSDNode *BASDN = cast<BlockAddressSDNode>(Op);
	const BlockAddress *BA = BASDN->getBlockAddress();

	// isUsingPCRelativeCalls() returns true when PCRelative is enabled
	if (Subtarget.isUsingPCRelativeCalls()) {
	SDLoc DL(BASDN);
	EVT Ty = getPointerTy(DAG.getDataLayout());
	SDValue GA = DAG.getTargetBlockAddress(BA, Ty, BASDN->getOffset(),
	PPCII::MO_PCREL_FLAG);
	SDValue MatAddr = DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, GA);
	return MatAddr;
	}

	// 64-bit SVR4 ABI and AIX ABI code are always position-independent.
	// The actual BlockAddress is stored in the TOC.
	if (Subtarget.is64BitELFABI() \|\| Subtarget.isAIXABI()) {
	setUsesTOCBasePtr(DAG);
	SDValue GA = DAG.getTargetBlockAddress(BA, PtrVT, BASDN->getOffset());
	return getTOCEntry(DAG, SDLoc(BASDN), GA);
	}

	// 32-bit position-independent ELF stores the BlockAddress in the .got.
	if (Subtarget.is32BitELFABI() && isPositionIndependent())
	return getTOCEntry(
	DAG, SDLoc(BASDN),
	DAG.getTargetBlockAddress(BA, PtrVT, BASDN->getOffset()));

	unsigned MOHiFlag, MOLoFlag;
	bool IsPIC = isPositionIndependent();
	getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag);
	SDValue TgtBAHi = DAG.getTargetBlockAddress(BA, PtrVT, 0, MOHiFlag);
	SDValue TgtBALo = DAG.getTargetBlockAddress(BA, PtrVT, 0, MOLoFlag);
	return LowerLabelRef(TgtBAHi, TgtBALo, IsPIC, DAG);
	}

	SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op,
	SelectionDAG &DAG) const {
	if (Subtarget.isAIXABI())
	return LowerGlobalTLSAddressAIX(Op, DAG);

	return LowerGlobalTLSAddressLinux(Op, DAG);
	}

	/// updateForAIXShLibTLSModelOpt - Helper to initialize TLS model opt settings,
	/// and then apply the update.
	static void updateForAIXShLibTLSModelOpt(TLSModel::Model &Model,
	SelectionDAG &DAG,
	const TargetMachine &TM) {
	// Initialize TLS model opt setting lazily:
	// (1) Use initial-exec for single TLS var references within current function.
	// (2) Use local-dynamic for multiple TLS var references within current
	// function.
	PPCFunctionInfo *FuncInfo =
	DAG.getMachineFunction().getInfo<PPCFunctionInfo>();
	if (!FuncInfo->isAIXFuncTLSModelOptInitDone()) {
	SmallPtrSet<const GlobalValue *, 8> TLSGV;
	// Iterate over all instructions within current function, collect all TLS
	// global variables (global variables taken as the first parameter to
	// Intrinsic::threadlocal_address).
	const Function &Func = DAG.getMachineFunction().getFunction();
	for (Function::const_iterator BI = Func.begin(), BE = Func.end(); BI != BE;
	++BI)
	for (BasicBlock::const_iterator II = BI->begin(), IE = BI->end();
	II != IE; ++II)
	if (II->getOpcode() == Instruction::Call)
	if (const CallInst CI = dyn_cast<const CallInst>(&II))
	if (Function *CF = CI->getCalledFunction())
	if (CF->isDeclaration() &&
	CF->getIntrinsicID() == Intrinsic::threadlocal_address)
	if (const GlobalValue *GV =
	dyn_cast<GlobalValue>(II->getOperand(0))) {
	TLSModel::Model GVModel = TM.getTLSModel(GV);
	if (GVModel == TLSModel::LocalDynamic)
	TLSGV.insert(GV);
	}

	unsigned TLSGVCnt = TLSGV.size();
	LLVM_DEBUG(dbgs() << format("LocalDynamic TLSGV count:%d\n", TLSGVCnt));
	if (TLSGVCnt <= PPCAIXTLSModelOptUseIEForLDLimit)
	FuncInfo->setAIXFuncUseTLSIEForLD();
	FuncInfo->setAIXFuncTLSModelOptInitDone();
	}

	if (FuncInfo->isAIXFuncUseTLSIEForLD()) {
	LLVM_DEBUG(
	dbgs() << DAG.getMachineFunction().getName()
	<< " function is using the TLS-IE model for TLS-LD access.\n");
	Model = TLSModel::InitialExec;
	}
	}

	SDValue PPCTargetLowering::LowerGlobalTLSAddressAIX(SDValue Op,
	SelectionDAG &DAG) const {
	GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);

	if (DAG.getTarget().useEmulatedTLS())
	report_fatal_error("Emulated TLS is not yet supported on AIX");

	SDLoc dl(GA);
	const GlobalValue *GV = GA->getGlobal();
	EVT PtrVT = getPointerTy(DAG.getDataLayout());
	bool Is64Bit = Subtarget.isPPC64();
	TLSModel::Model Model = getTargetMachine().getTLSModel(GV);

	// Apply update to the TLS model.
	if (Subtarget.hasAIXShLibTLSModelOpt())
	updateForAIXShLibTLSModelOpt(Model, DAG, getTargetMachine());

	bool IsTLSLocalExecModel = Model == TLSModel::LocalExec;

	if (IsTLSLocalExecModel \|\| Model == TLSModel::InitialExec) {
	bool HasAIXSmallLocalExecTLS = Subtarget.hasAIXSmallLocalExecTLS();
	bool HasAIXSmallTLSGlobalAttr = false;
	SDValue VariableOffsetTGA =
	DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, PPCII::MO_TPREL_FLAG);
	SDValue VariableOffset = getTOCEntry(DAG, dl, VariableOffsetTGA);
	SDValue TLSReg;

	if (const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV))
	if (GVar->hasAttribute("aix-small-tls"))
	HasAIXSmallTLSGlobalAttr = true;

	if (Is64Bit) {
	// For local-exec and initial-exec on AIX (64-bit), the sequence generated
	// involves a load of the variable offset (from the TOC), followed by an
	// add of the loaded variable offset to R13 (the thread pointer).
	// This code sequence looks like:
	// ld reg1,var[TC](2)
	// add reg2, reg1, r13 // r13 contains the thread pointer
	TLSReg = DAG.getRegister(PPC::X13, MVT::i64);

	// With the -maix-small-local-exec-tls option, or with the "aix-small-tls"
	// global variable attribute, produce a faster access sequence for
	// local-exec TLS variables where the offset from the TLS base is encoded
	// as an immediate operand.
	//
	// We only utilize the faster local-exec access sequence when the TLS
	// variable has a size within the policy limit. We treat types that are
	// not sized or are empty as being over the policy size limit.
	if ((HasAIXSmallLocalExecTLS \|\| HasAIXSmallTLSGlobalAttr) &&
	IsTLSLocalExecModel) {
	Type *GVType = GV->getValueType();
	if (GVType->isSized() && !GVType->isEmptyTy() &&
	GV->getDataLayout().getTypeAllocSize(GVType) <=
	AIXSmallTlsPolicySizeLimit)
	return DAG.getNode(PPCISD::Lo, dl, PtrVT, VariableOffsetTGA, TLSReg);
	}
	} else {
	// For local-exec and initial-exec on AIX (32-bit), the sequence generated
	// involves loading the variable offset from the TOC, generating a call to
	// .__get_tpointer to get the thread pointer (which will be in R3), and
	// adding the two together:
	// lwz reg1,var[TC](2)
	// bla .__get_tpointer
	// add reg2, reg1, r3
	TLSReg = DAG.getNode(PPCISD::GET_TPOINTER, dl, PtrVT);

	// We do not implement the 32-bit version of the faster access sequence
	// for local-exec that is controlled by the -maix-small-local-exec-tls
	// option, or the "aix-small-tls" global variable attribute.
	if (HasAIXSmallLocalExecTLS \|\| HasAIXSmallTLSGlobalAttr)
	report_fatal_error("The small-local-exec TLS access sequence is "
	"currently only supported on AIX (64-bit mode).");
	}
	return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TLSReg, VariableOffset);
	}

	if (Model == TLSModel::LocalDynamic) {
	bool HasAIXSmallLocalDynamicTLS = Subtarget.hasAIXSmallLocalDynamicTLS();

	// We do not implement the 32-bit version of the faster access sequence
	// for local-dynamic that is controlled by -maix-small-local-dynamic-tls.
	if (!Is64Bit && HasAIXSmallLocalDynamicTLS)
	report_fatal_error("The small-local-dynamic TLS access sequence is "
	"currently only supported on AIX (64-bit mode).");

	// For local-dynamic on AIX, we need to generate one TOC entry for each
	// variable offset, and a single module-handle TOC entry for the entire
	// file.

	SDValue VariableOffsetTGA =
	DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, PPCII::MO_TLSLD_FLAG);
	SDValue VariableOffset = getTOCEntry(DAG, dl, VariableOffsetTGA);

	Module *M = DAG.getMachineFunction().getFunction().getParent();
	GlobalVariable *TLSGV =
	dyn_cast_or_null<GlobalVariable>(M->getOrInsertGlobal(
	StringRef("_$TLSML"), PointerType::getUnqual(*DAG.getContext())));
	TLSGV->setThreadLocalMode(GlobalVariable::LocalDynamicTLSModel);
	assert(TLSGV && "Not able to create GV for _$TLSML.");
	SDValue ModuleHandleTGA =
	DAG.getTargetGlobalAddress(TLSGV, dl, PtrVT, 0, PPCII::MO_TLSLDM_FLAG);
	SDValue ModuleHandleTOC = getTOCEntry(DAG, dl, ModuleHandleTGA);
	SDValue ModuleHandle =
	DAG.getNode(PPCISD::TLSLD_AIX, dl, PtrVT, ModuleHandleTOC);

	// With the -maix-small-local-dynamic-tls option, produce a faster access
	// sequence for local-dynamic TLS variables where the offset from the
	// module-handle is encoded as an immediate operand.
	//
	// We only utilize the faster local-dynamic access sequence when the TLS
	// variable has a size within the policy limit. We treat types that are
	// not sized or are empty as being over the policy size limit.
	if (HasAIXSmallLocalDynamicTLS) {
	Type *GVType = GV->getValueType();
	if (GVType->isSized() && !GVType->isEmptyTy() &&
	GV->getDataLayout().getTypeAllocSize(GVType) <=
	AIXSmallTlsPolicySizeLimit)
	return DAG.getNode(PPCISD::Lo, dl, PtrVT, VariableOffsetTGA,
	ModuleHandle);
	}

	return DAG.getNode(ISD::ADD, dl, PtrVT, ModuleHandle, VariableOffset);
	}

	// If Local- or Initial-exec or Local-dynamic is not possible or specified,
	// all GlobalTLSAddress nodes are lowered using the general-dynamic model. We
	// need to generate two TOC entries, one for the variable offset, one for the
	// region handle. The global address for the TOC entry of the region handle is
	// created with the MO_TLSGDM_FLAG flag and the global address for the TOC
	// entry of the variable offset is created with MO_TLSGD_FLAG.
	SDValue VariableOffsetTGA =
	DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, PPCII::MO_TLSGD_FLAG);
	SDValue RegionHandleTGA =
	DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, PPCII::MO_TLSGDM_FLAG);
	SDValue VariableOffset = getTOCEntry(DAG, dl, VariableOffsetTGA);
	SDValue RegionHandle = getTOCEntry(DAG, dl, RegionHandleTGA);
	return DAG.getNode(PPCISD::TLSGD_AIX, dl, PtrVT, VariableOffset,
	RegionHandle);
	}

	SDValue PPCTargetLowering::LowerGlobalTLSAddressLinux(SDValue Op,
	SelectionDAG &DAG) const {
	// FIXME: TLS addresses currently use medium model code sequences,
	// which is the most useful form. Eventually support for small and
	// large models could be added if users need it, at the cost of
	// additional complexity.
	GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
	if (DAG.getTarget().useEmulatedTLS())
	return LowerToTLSEmulatedModel(GA, DAG);

	SDLoc dl(GA);
	const GlobalValue *GV = GA->getGlobal();
	EVT PtrVT = getPointerTy(DAG.getDataLayout());
	bool is64bit = Subtarget.isPPC64();
	const Module *M = DAG.getMachineFunction().getFunction().getParent();
	PICLevel::Level picLevel = M->getPICLevel();

	const TargetMachine &TM = getTargetMachine();
	TLSModel::Model Model = TM.getTLSModel(GV);

	if (Model == TLSModel::LocalExec) {
	if (Subtarget.isUsingPCRelativeCalls()) {
	SDValue TLSReg = DAG.getRegister(PPC::X13, MVT::i64);
	SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
	PPCII::MO_TPREL_PCREL_FLAG);
	SDValue MatAddr =
	DAG.getNode(PPCISD::TLS_LOCAL_EXEC_MAT_ADDR, dl, PtrVT, TGA);
	return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TLSReg, MatAddr);
	}

	SDValue TGAHi = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
	PPCII::MO_TPREL_HA);
	SDValue TGALo = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
	PPCII::MO_TPREL_LO);
	SDValue TLSReg = is64bit ? DAG.getRegister(PPC::X13, MVT::i64)
	: DAG.getRegister(PPC::R2, MVT::i32);

	SDValue Hi = DAG.getNode(PPCISD::Hi, dl, PtrVT, TGAHi, TLSReg);
	return DAG.getNode(PPCISD::Lo, dl, PtrVT, TGALo, Hi);
	}

	if (Model == TLSModel::InitialExec) {
	bool IsPCRel = Subtarget.isUsingPCRelativeCalls();
	SDValue TGA = DAG.getTargetGlobalAddress(
	GV, dl, PtrVT, 0, IsPCRel ? PPCII::MO_GOT_TPREL_PCREL_FLAG : 0);
	SDValue TGATLS = DAG.getTargetGlobalAddress(
	GV, dl, PtrVT, 0, IsPCRel ? PPCII::MO_TLS_PCREL_FLAG : PPCII::MO_TLS);
	SDValue TPOffset;
	if (IsPCRel) {
	SDValue MatPCRel = DAG.getNode(PPCISD::MAT_PCREL_ADDR, dl, PtrVT, TGA);
	TPOffset = DAG.getLoad(MVT::i64, dl, DAG.getEntryNode(), MatPCRel,
	MachinePointerInfo());
	} else {
	SDValue GOTPtr;
	if (is64bit) {
	setUsesTOCBasePtr(DAG);
	SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
	GOTPtr =
	DAG.getNode(PPCISD::ADDIS_GOT_TPREL_HA, dl, PtrVT, GOTReg, TGA);
	} else {
	if (!TM.isPositionIndependent())
	GOTPtr = DAG.getNode(PPCISD::PPC32_GOT, dl, PtrVT);
	else if (picLevel == PICLevel::SmallPIC)
	GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT);
	else
	GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT);
	}
	TPOffset = DAG.getNode(PPCISD::LD_GOT_TPREL_L, dl, PtrVT, TGA, GOTPtr);
	}
	return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TPOffset, TGATLS);
	}

	if (Model == TLSModel::GeneralDynamic) {
	if (Subtarget.isUsingPCRelativeCalls()) {
	SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
	PPCII::MO_GOT_TLSGD_PCREL_FLAG);
	return DAG.getNode(PPCISD::TLS_DYNAMIC_MAT_PCREL_ADDR, dl, PtrVT, TGA);
	}

	SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
	SDValue GOTPtr;
	if (is64bit) {
	setUsesTOCBasePtr(DAG);
	SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
	GOTPtr = DAG.getNode(PPCISD::ADDIS_TLSGD_HA, dl, PtrVT,
	GOTReg, TGA);
	} else {
	if (picLevel == PICLevel::SmallPIC)
	GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT);
	else
	GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT);
	}
	return DAG.getNode(PPCISD::ADDI_TLSGD_L_ADDR, dl, PtrVT,
	GOTPtr, TGA, TGA);
	}

	if (Model == TLSModel::LocalDynamic) {
	if (Subtarget.isUsingPCRelativeCalls()) {
	SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
	PPCII::MO_GOT_TLSLD_PCREL_FLAG);
	SDValue MatPCRel =
	DAG.getNode(PPCISD::TLS_DYNAMIC_MAT_PCREL_ADDR, dl, PtrVT, TGA);
	return DAG.getNode(PPCISD::PADDI_DTPREL, dl, PtrVT, MatPCRel, TGA);
	}

	SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
	SDValue GOTPtr;
	if (is64bit) {
	setUsesTOCBasePtr(DAG);
	SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
	GOTPtr = DAG.getNode(PPCISD::ADDIS_TLSLD_HA, dl, PtrVT,
	GOTReg, TGA);
	} else {
	if (picLevel == PICLevel::SmallPIC)
	GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT);
	else
	GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT);
	}
	SDValue TLSAddr = DAG.getNode(PPCISD::ADDI_TLSLD_L_ADDR, dl,
	PtrVT, GOTPtr, TGA, TGA);
	SDValue DtvOffsetHi = DAG.getNode(PPCISD::ADDIS_DTPREL_HA, dl,
	PtrVT, TLSAddr, TGA);
	return DAG.getNode(PPCISD::ADDI_DTPREL_L, dl, PtrVT, DtvOffsetHi, TGA);
	}

	llvm_unreachable("Unknown TLS model!");
	}

	SDValue PPCTargetLowering::LowerGlobalAddress(SDValue Op,
	SelectionDAG &DAG) const {
	EVT PtrVT = Op.getValueType();
	GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
	SDLoc DL(GSDN);
	const GlobalValue *GV = GSDN->getGlobal();

	// 64-bit SVR4 ABI & AIX ABI code is always position-independent.
	// The actual address of the GlobalValue is stored in the TOC.
	if (Subtarget.is64BitELFABI() \|\| Subtarget.isAIXABI()) {
	if (Subtarget.isUsingPCRelativeCalls()) {
	EVT Ty = getPointerTy(DAG.getDataLayout());
	if (isAccessedAsGotIndirect(Op)) {
	SDValue GA = DAG.getTargetGlobalAddress(GV, DL, Ty, GSDN->getOffset(),
	PPCII::MO_GOT_PCREL_FLAG);
	SDValue MatPCRel = DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, GA);
	SDValue Load = DAG.getLoad(MVT::i64, DL, DAG.getEntryNode(), MatPCRel,
	MachinePointerInfo());
	return Load;
	} else {
	SDValue GA = DAG.getTargetGlobalAddress(GV, DL, Ty, GSDN->getOffset(),
	PPCII::MO_PCREL_FLAG);
	return DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, GA);
	}
	}
	setUsesTOCBasePtr(DAG);
	SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset());
	return getTOCEntry(DAG, DL, GA);
	}

	unsigned MOHiFlag, MOLoFlag;
	bool IsPIC = isPositionIndependent();
	getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag, GV);

	if (IsPIC && Subtarget.isSVR4ABI()) {
	SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT,
	GSDN->getOffset(),
	PPCII::MO_PIC_FLAG);
	return getTOCEntry(DAG, DL, GA);
	}

	SDValue GAHi =
	DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset(), MOHiFlag);
	SDValue GALo =
	DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset(), MOLoFlag);

	return LowerLabelRef(GAHi, GALo, IsPIC, DAG);
	}

	SDValue PPCTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
	bool IsStrict = Op->isStrictFPOpcode();
	ISD::CondCode CC =
	cast<CondCodeSDNode>(Op.getOperand(IsStrict ? 3 : 2))->get();
	SDValue LHS = Op.getOperand(IsStrict ? 1 : 0);
	SDValue RHS = Op.getOperand(IsStrict ? 2 : 1);
	SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue();
	EVT LHSVT = LHS.getValueType();
	SDLoc dl(Op);

	// Soften the setcc with libcall if it is fp128.
	if (LHSVT == MVT::f128) {
	assert(!Subtarget.hasP9Vector() &&
	"SETCC for f128 is already legal under Power9!");
	softenSetCCOperands(DAG, LHSVT, LHS, RHS, CC, dl, LHS, RHS, Chain,
	Op->getOpcode() == ISD::STRICT_FSETCCS);
	if (RHS.getNode())
	LHS = DAG.getNode(ISD::SETCC, dl, Op.getValueType(), LHS, RHS,
	DAG.getCondCode(CC));
	if (IsStrict)
	return DAG.getMergeValues({LHS, Chain}, dl);
	return LHS;
	}

	assert(!IsStrict && "Don't know how to handle STRICT_FSETCC!");

	if (Op.getValueType() == MVT::v2i64) {
	// When the operands themselves are v2i64 values, we need to do something
	// special because VSX has no underlying comparison operations for these.
	if (LHS.getValueType() == MVT::v2i64) {
	// Equality can be handled by casting to the legal type for Altivec
	// comparisons, everything else needs to be expanded.
	if (CC != ISD::SETEQ && CC != ISD::SETNE)
	return SDValue();
	SDValue SetCC32 = DAG.getSetCC(
	dl, MVT::v4i32, DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, LHS),
	DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, RHS), CC);
	int ShuffV[] = {1, 0, 3, 2};
	SDValue Shuff =
	DAG.getVectorShuffle(MVT::v4i32, dl, SetCC32, SetCC32, ShuffV);
	return DAG.getBitcast(MVT::v2i64,
	DAG.getNode(CC == ISD::SETEQ ? ISD::AND : ISD::OR,
	dl, MVT::v4i32, Shuff, SetCC32));
	}

	// We handle most of these in the usual way.
	return Op;
	}

	// If we're comparing for equality to zero, expose the fact that this is
	// implemented as a ctlz/srl pair on ppc, so that the dag combiner can
	// fold the new nodes.
	if (SDValue V = lowerCmpEqZeroToCtlzSrl(Op, DAG))
	return V;

	if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(RHS)) {
	// Leave comparisons against 0 and -1 alone for now, since they're usually
	// optimized. FIXME: revisit this when we can custom lower all setcc
	// optimizations.
	if (C->isAllOnes() \|\| C->isZero())
	return SDValue();
	}

	// If we have an integer seteq/setne, turn it into a compare against zero
	// by xor'ing the rhs with the lhs, which is faster than setting a
	// condition register, reading it back out, and masking the correct bit. The
	// normal approach here uses sub to do this instead of xor. Using xor exposes
	// the result to other bit-twiddling opportunities.
	if (LHSVT.isInteger() && (CC == ISD::SETEQ \|\| CC == ISD::SETNE)) {
	EVT VT = Op.getValueType();
	SDValue Sub = DAG.getNode(ISD::XOR, dl, LHSVT, LHS, RHS);
	return DAG.getSetCC(dl, VT, Sub, DAG.getConstant(0, dl, LHSVT), CC);
	}
	return SDValue();
	}

	SDValue PPCTargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const {
	SDNode *Node = Op.getNode();
	EVT VT = Node->getValueType(0);
	EVT PtrVT = getPointerTy(DAG.getDataLayout());
	SDValue InChain = Node->getOperand(0);
	SDValue VAListPtr = Node->getOperand(1);
	const Value *SV = cast<SrcValueSDNode>(Node->getOperand(2))->getValue();
	SDLoc dl(Node);

	assert(!Subtarget.isPPC64() && "LowerVAARG is PPC32 only");

	// gpr_index
	SDValue GprIndex = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, InChain,
	VAListPtr, MachinePointerInfo(SV), MVT::i8);
	InChain = GprIndex.getValue(1);

	if (VT == MVT::i64) {
	// Check if GprIndex is even
	SDValue GprAnd = DAG.getNode(ISD::AND, dl, MVT::i32, GprIndex,
	DAG.getConstant(1, dl, MVT::i32));
	SDValue CC64 = DAG.getSetCC(dl, MVT::i32, GprAnd,
	DAG.getConstant(0, dl, MVT::i32), ISD::SETNE);
	SDValue GprIndexPlusOne = DAG.getNode(ISD::ADD, dl, MVT::i32, GprIndex,
	DAG.getConstant(1, dl, MVT::i32));
	// Align GprIndex to be even if it isn't
	GprIndex = DAG.getNode(ISD::SELECT, dl, MVT::i32, CC64, GprIndexPlusOne,
	GprIndex);
	}

	// fpr index is 1 byte after gpr
	SDValue FprPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
	DAG.getConstant(1, dl, MVT::i32));

	// fpr
	SDValue FprIndex = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, InChain,
	FprPtr, MachinePointerInfo(SV), MVT::i8);
	InChain = FprIndex.getValue(1);

	SDValue RegSaveAreaPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
	DAG.getConstant(8, dl, MVT::i32));

	SDValue OverflowAreaPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
	DAG.getConstant(4, dl, MVT::i32));

	// areas
	SDValue OverflowArea =
	DAG.getLoad(MVT::i32, dl, InChain, OverflowAreaPtr, MachinePointerInfo());
	InChain = OverflowArea.getValue(1);

	SDValue RegSaveArea =
	DAG.getLoad(MVT::i32, dl, InChain, RegSaveAreaPtr, MachinePointerInfo());
	InChain = RegSaveArea.getValue(1);

	// select overflow_area if index > 8
	SDValue CC = DAG.getSetCC(dl, MVT::i32, VT.isInteger() ? GprIndex : FprIndex,
	DAG.getConstant(8, dl, MVT::i32), ISD::SETLT);

	// adjustment constant gpr_index * 4/8
	SDValue RegConstant = DAG.getNode(ISD::MUL, dl, MVT::i32,
	VT.isInteger() ? GprIndex : FprIndex,
	DAG.getConstant(VT.isInteger() ? 4 : 8, dl,
	MVT::i32));

	// OurReg = RegSaveArea + RegConstant
	SDValue OurReg = DAG.getNode(ISD::ADD, dl, PtrVT, RegSaveArea,
	RegConstant);

	// Floating types are 32 bytes into RegSaveArea
	if (VT.isFloatingPoint())
	OurReg = DAG.getNode(ISD::ADD, dl, PtrVT, OurReg,
	DAG.getConstant(32, dl, MVT::i32));

	// increase {f,g}pr_index by 1 (or 2 if VT is i64)
	SDValue IndexPlus1 = DAG.getNode(ISD::ADD, dl, MVT::i32,
	VT.isInteger() ? GprIndex : FprIndex,
	DAG.getConstant(VT == MVT::i64 ? 2 : 1, dl,
	MVT::i32));

	InChain = DAG.getTruncStore(InChain, dl, IndexPlus1,
	VT.isInteger() ? VAListPtr : FprPtr,
	MachinePointerInfo(SV), MVT::i8);

	// determine if we should load from reg_save_area or overflow_area
	SDValue Result = DAG.getNode(ISD::SELECT, dl, PtrVT, CC, OurReg, OverflowArea);

	// increase overflow_area by 4/8 if gpr/fpr > 8
	SDValue OverflowAreaPlusN = DAG.getNode(ISD::ADD, dl, PtrVT, OverflowArea,
	DAG.getConstant(VT.isInteger() ? 4 : 8,
	dl, MVT::i32));

	OverflowArea = DAG.getNode(ISD::SELECT, dl, MVT::i32, CC, OverflowArea,
	OverflowAreaPlusN);

	InChain = DAG.getTruncStore(InChain, dl, OverflowArea, OverflowAreaPtr,
	MachinePointerInfo(), MVT::i32);

	return DAG.getLoad(VT, dl, InChain, Result, MachinePointerInfo());
	}

	SDValue PPCTargetLowering::LowerVACOPY(SDValue Op, SelectionDAG &DAG) const {
	assert(!Subtarget.isPPC64() && "LowerVACOPY is PPC32 only");

	// We have to copy the entire va_list struct:
	// 2sizeof(char) + 2 Byte alignment + 2sizeof(char*) = 12 Byte
	return DAG.getMemcpy(Op.getOperand(0), Op, Op.getOperand(1), Op.getOperand(2),
	DAG.getConstant(12, SDLoc(Op), MVT::i32), Align(8),
	false, true, /CI=/nullptr, std::nullopt,
	MachinePointerInfo(), MachinePointerInfo());
	}

	SDValue PPCTargetLowering::LowerADJUST_TRAMPOLINE(SDValue Op,
	SelectionDAG &DAG) const {
	if (Subtarget.isAIXABI())
	report_fatal_error("ADJUST_TRAMPOLINE operation is not supported on AIX.");

	return Op.getOperand(0);
	}

	SDValue PPCTargetLowering::LowerINLINEASM(SDValue Op, SelectionDAG &DAG) const {
	MachineFunction &MF = DAG.getMachineFunction();
	PPCFunctionInfo &MFI = *MF.getInfo<PPCFunctionInfo>();

	assert((Op.getOpcode() == ISD::INLINEASM \|\|
	Op.getOpcode() == ISD::INLINEASM_BR) &&
	"Expecting Inline ASM node.");

	// If an LR store is already known to be required then there is not point in
	// checking this ASM as well.
	if (MFI.isLRStoreRequired())
	return Op;

	// Inline ASM nodes have an optional last operand that is an incoming Flag of
	// type MVT::Glue. We want to ignore this last operand if that is the case.
	unsigned NumOps = Op.getNumOperands();
	if (Op.getOperand(NumOps - 1).getValueType() == MVT::Glue)
	--NumOps;

	// Check all operands that may contain the LR.
	for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) {
	const InlineAsm::Flag Flags(Op.getConstantOperandVal(i));
	unsigned NumVals = Flags.getNumOperandRegisters();
	++i; // Skip the ID value.

	switch (Flags.getKind()) {
	default:
	llvm_unreachable("Bad flags!");
	case InlineAsm::Kind::RegUse:
	case InlineAsm::Kind::Imm:
	case InlineAsm::Kind::Mem:
	i += NumVals;
	break;
	case InlineAsm::Kind::Clobber:
	case InlineAsm::Kind::RegDef:
	case InlineAsm::Kind::RegDefEarlyClobber: {
	for (; NumVals; --NumVals, ++i) {
	Register Reg = cast<RegisterSDNode>(Op.getOperand(i))->getReg();
	if (Reg != PPC::LR && Reg != PPC::LR8)
	continue;
	MFI.setLRStoreRequired();
	return Op;
	}
	break;
	}
	}
	}

	return Op;
	}

	SDValue PPCTargetLowering::LowerINIT_TRAMPOLINE(SDValue Op,
	SelectionDAG &DAG) const {
	if (Subtarget.isAIXABI())
	report_fatal_error("INIT_TRAMPOLINE operation is not supported on AIX.");

	SDValue Chain = Op.getOperand(0);
	SDValue Trmp = Op.getOperand(1); // trampoline
	SDValue FPtr = Op.getOperand(2); // nested function
	SDValue Nest = Op.getOperand(3); // 'nest' parameter value
	SDLoc dl(Op);

	EVT PtrVT = getPointerTy(DAG.getDataLayout());
	bool isPPC64 = (PtrVT == MVT::i64);
	Type IntPtrTy = DAG.getDataLayout().getIntPtrType(DAG.getContext());

	TargetLowering::ArgListTy Args;
	TargetLowering::ArgListEntry Entry;

	Entry.Ty = IntPtrTy;
	Entry.Node = Trmp; Args.push_back(Entry);

	// TrampSize == (isPPC64 ? 48 : 40);
	Entry.Node = DAG.getConstant(isPPC64 ? 48 : 40, dl,
	isPPC64 ? MVT::i64 : MVT::i32);
	Args.push_back(Entry);

	Entry.Node = FPtr; Args.push_back(Entry);
	Entry.Node = Nest; Args.push_back(Entry);

	// Lower to a call to __trampoline_setup(Trmp, TrampSize, FPtr, ctx_reg)
	TargetLowering::CallLoweringInfo CLI(DAG);
	CLI.setDebugLoc(dl).setChain(Chain).setLibCallee(
	CallingConv::C, Type::getVoidTy(*DAG.getContext()),
	DAG.getExternalSymbol("__trampoline_setup", PtrVT), std::move(Args));

	std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
	return CallResult.second;
	}

	SDValue PPCTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const {
	MachineFunction &MF = DAG.getMachineFunction();
	PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
	EVT PtrVT = getPointerTy(MF.getDataLayout());

	SDLoc dl(Op);

	if (Subtarget.isPPC64() \|\| Subtarget.isAIXABI()) {
	// vastart just stores the address of the VarArgsFrameIndex slot into the
	// memory location argument.
	SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
	const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
	return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1),
	MachinePointerInfo(SV));
	}

	// For the 32-bit SVR4 ABI we follow the layout of the va_list struct.
	// We suppose the given va_list is already allocated.
	//
	// typedef struct {
	// char gpr; /* index into the array of 8 GPRs
	// * stored in the register save area
	// * gpr=0 corresponds to r3,
	// * gpr=1 to r4, etc.
	// */
	// char fpr; /* index into the array of 8 FPRs
	// * stored in the register save area
	// * fpr=0 corresponds to f1,
	// * fpr=1 to f2, etc.
	// */
	// char *overflow_arg_area;
	// /* location on stack that holds
	// * the next overflow argument
	// */
	// char *reg_save_area;
	// /* where r3:r10 and f1:f8 (if saved)
	// * are stored
	// */
	// } va_list[1];

	SDValue ArgGPR = DAG.getConstant(FuncInfo->getVarArgsNumGPR(), dl, MVT::i32);
	SDValue ArgFPR = DAG.getConstant(FuncInfo->getVarArgsNumFPR(), dl, MVT::i32);
	SDValue StackOffsetFI = DAG.getFrameIndex(FuncInfo->getVarArgsStackOffset(),
	PtrVT);
	SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
	PtrVT);

	uint64_t FrameOffset = PtrVT.getSizeInBits()/8;
	SDValue ConstFrameOffset = DAG.getConstant(FrameOffset, dl, PtrVT);

	uint64_t StackOffset = PtrVT.getSizeInBits()/8 - 1;
	SDValue ConstStackOffset = DAG.getConstant(StackOffset, dl, PtrVT);

	uint64_t FPROffset = 1;
	SDValue ConstFPROffset = DAG.getConstant(FPROffset, dl, PtrVT);

	const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();

	// Store first byte : number of int regs
	SDValue firstStore =
	DAG.getTruncStore(Op.getOperand(0), dl, ArgGPR, Op.getOperand(1),
	MachinePointerInfo(SV), MVT::i8);
	uint64_t nextOffset = FPROffset;
	SDValue nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, Op.getOperand(1),
	ConstFPROffset);

	// Store second byte : number of float regs
	SDValue secondStore =
	DAG.getTruncStore(firstStore, dl, ArgFPR, nextPtr,
	MachinePointerInfo(SV, nextOffset), MVT::i8);
	nextOffset += StackOffset;
	nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstStackOffset);

	// Store second word : arguments given on stack
	SDValue thirdStore = DAG.getStore(secondStore, dl, StackOffsetFI, nextPtr,
	MachinePointerInfo(SV, nextOffset));
	nextOffset += FrameOffset;
	nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstFrameOffset);

	// Store third word : arguments given in registers
	return DAG.getStore(thirdStore, dl, FR, nextPtr,
	MachinePointerInfo(SV, nextOffset));
	}

	/// FPR - The set of FP registers that should be allocated for arguments
	/// on Darwin and AIX.
	static const MCPhysReg FPR[] = {PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5,
	PPC::F6, PPC::F7, PPC::F8, PPC::F9, PPC::F10,
	PPC::F11, PPC::F12, PPC::F13};

	/// CalculateStackSlotSize - Calculates the size reserved for this argument on
	/// the stack.
	static unsigned CalculateStackSlotSize(EVT ArgVT, ISD::ArgFlagsTy Flags,
	unsigned PtrByteSize) {
	unsigned ArgSize = ArgVT.getStoreSize();
	if (Flags.isByVal())
	ArgSize = Flags.getByValSize();

	// Round up to multiples of the pointer size, except for array members,
	// which are always packed.
	if (!Flags.isInConsecutiveRegs())
	ArgSize = ((ArgSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;

	return ArgSize;
	}

	/// CalculateStackSlotAlignment - Calculates the alignment of this argument
	/// on the stack.
	static Align CalculateStackSlotAlignment(EVT ArgVT, EVT OrigVT,
	ISD::ArgFlagsTy Flags,
	unsigned PtrByteSize) {
	Align Alignment(PtrByteSize);

	// Altivec parameters are padded to a 16 byte boundary.
	if (ArgVT == MVT::v4f32 \|\| ArgVT == MVT::v4i32 \|\|
	ArgVT == MVT::v8i16 \|\| ArgVT == MVT::v16i8 \|\|
	ArgVT == MVT::v2f64 \|\| ArgVT == MVT::v2i64 \|\|
	ArgVT == MVT::v1i128 \|\| ArgVT == MVT::f128)
	Alignment = Align(16);

	// ByVal parameters are aligned as requested.
	if (Flags.isByVal()) {
	auto BVAlign = Flags.getNonZeroByValAlign();
	if (BVAlign > PtrByteSize) {
	if (BVAlign.value() % PtrByteSize != 0)
	llvm_unreachable(
	"ByVal alignment is not a multiple of the pointer size");

	Alignment = BVAlign;
	}
	}

	// Array members are always packed to their original alignment.
	if (Flags.isInConsecutiveRegs()) {
	// If the array member was split into multiple registers, the first
	// needs to be aligned to the size of the full type. (Except for
	// ppcf128, which is only aligned as its f64 components.)
	if (Flags.isSplit() && OrigVT != MVT::ppcf128)
	Alignment = Align(OrigVT.getStoreSize());
	else
	Alignment = Align(ArgVT.getStoreSize());
	}

	return Alignment;
	}

	/// CalculateStackSlotUsed - Return whether this argument will use its
	/// stack slot (instead of being passed in registers). ArgOffset,
	/// AvailableFPRs, and AvailableVRs must hold the current argument
	/// position, and will be updated to account for this argument.
	static bool CalculateStackSlotUsed(EVT ArgVT, EVT OrigVT, ISD::ArgFlagsTy Flags,
	unsigned PtrByteSize, unsigned LinkageSize,
	unsigned ParamAreaSize, unsigned &ArgOffset,
	unsigned &AvailableFPRs,
	unsigned &AvailableVRs) {
	bool UseMemory = false;

	// Respect alignment of argument on the stack.
	Align Alignment =
	CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
	ArgOffset = alignTo(ArgOffset, Alignment);
	// If there's no space left in the argument save area, we must
	// use memory (this check also catches zero-sized arguments).
	if (ArgOffset >= LinkageSize + ParamAreaSize)
	UseMemory = true;

	// Allocate argument on the stack.
	ArgOffset += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize);
	if (Flags.isInConsecutiveRegsLast())
	ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
	// If we overran the argument save area, we must use memory
	// (this check catches arguments passed partially in memory)
	if (ArgOffset > LinkageSize + ParamAreaSize)
	UseMemory = true;

	// However, if the argument is actually passed in an FPR or a VR,
	// we don't use memory after all.
	if (!Flags.isByVal()) {
	if (ArgVT == MVT::f32 \|\| ArgVT == MVT::f64)
	if (AvailableFPRs > 0) {
	--AvailableFPRs;
	return false;
	}
	if (ArgVT == MVT::v4f32 \|\| ArgVT == MVT::v4i32 \|\|
	ArgVT == MVT::v8i16 \|\| ArgVT == MVT::v16i8 \|\|
	ArgVT == MVT::v2f64 \|\| ArgVT == MVT::v2i64 \|\|
	ArgVT == MVT::v1i128 \|\| ArgVT == MVT::f128)
	if (AvailableVRs > 0) {
	--AvailableVRs;
	return false;
	}
	}

	return UseMemory;
	}

	/// EnsureStackAlignment - Round stack frame size up from NumBytes to
	/// ensure minimum alignment required for target.
	static unsigned EnsureStackAlignment(const PPCFrameLowering *Lowering,
	unsigned NumBytes) {
	return alignTo(NumBytes, Lowering->getStackAlign());
	}

	SDValue PPCTargetLowering::LowerFormalArguments(
	SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
	const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
	SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
	if (Subtarget.isAIXABI())
	return LowerFormalArguments_AIX(Chain, CallConv, isVarArg, Ins, dl, DAG,
	InVals);
	if (Subtarget.is64BitELFABI())
	return LowerFormalArguments_64SVR4(Chain, CallConv, isVarArg, Ins, dl, DAG,
	InVals);
	assert(Subtarget.is32BitELFABI());
	return LowerFormalArguments_32SVR4(Chain, CallConv, isVarArg, Ins, dl, DAG,
	InVals);
	}

	SDValue PPCTargetLowering::LowerFormalArguments_32SVR4(
	SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
	const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
	SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {

	// 32-bit SVR4 ABI Stack Frame Layout:
	// +-----------------------------------+
	// +--> \| Back chain \|
	// \| +-----------------------------------+
	// \| \| Floating-point register save area \|
	// \| +-----------------------------------+
	// \| \| General register save area \|
	// \| +-----------------------------------+
	// \| \| CR save word \|
	// \| +-----------------------------------+
	// \| \| VRSAVE save word \|
	// \| +-----------------------------------+
	// \| \| Alignment padding \|
	// \| +-----------------------------------+
	// \| \| Vector register save area \|
	// \| +-----------------------------------+
	// \| \| Local variable space \|
	// \| +-----------------------------------+
	// \| \| Parameter list area \|
	// \| +-----------------------------------+
	// \| \| LR save word \|
	// \| +-----------------------------------+
	// SP--> +--- \| Back chain \|
	// +-----------------------------------+
	//
	// Specifications:
	// System V Application Binary Interface PowerPC Processor Supplement
	// AltiVec Technology Programming Interface Manual

	MachineFunction &MF = DAG.getMachineFunction();
	MachineFrameInfo &MFI = MF.getFrameInfo();
	PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();

	EVT PtrVT = getPointerTy(MF.getDataLayout());
	// Potential tail calls could cause overwriting of argument stack slots.
	bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
	(CallConv == CallingConv::Fast));
	const Align PtrAlign(4);

	// Assign locations to all of the incoming arguments.
	SmallVector<CCValAssign, 16> ArgLocs;
	PPCCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
	*DAG.getContext());

	// Reserve space for the linkage area on the stack.
	unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
	CCInfo.AllocateStack(LinkageSize, PtrAlign);
	if (useSoftFloat())
	CCInfo.PreAnalyzeFormalArguments(Ins);

	CCInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4);
	CCInfo.clearWasPPCF128();

	for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
	CCValAssign &VA = ArgLocs[i];

	// Arguments stored in registers.
	if (VA.isRegLoc()) {
	const TargetRegisterClass *RC;
	EVT ValVT = VA.getValVT();

	switch (ValVT.getSimpleVT().SimpleTy) {
	default:
	llvm_unreachable("ValVT not supported by formal arguments Lowering");
	case MVT::i1:
	case MVT::i32:
	RC = &PPC::GPRCRegClass;
	break;
	case MVT::f32:
	if (Subtarget.hasP8Vector())
	RC = &PPC::VSSRCRegClass;
	else if (Subtarget.hasSPE())
	RC = &PPC::GPRCRegClass;
	else
	RC = &PPC::F4RCRegClass;
	break;
	case MVT::f64:
	if (Subtarget.hasVSX())
	RC = &PPC::VSFRCRegClass;
	else if (Subtarget.hasSPE())
	// SPE passes doubles in GPR pairs.
	RC = &PPC::GPRCRegClass;
	else
	RC = &PPC::F8RCRegClass;
	break;
	case MVT::v16i8:
	case MVT::v8i16:
	case MVT::v4i32:
	RC = &PPC::VRRCRegClass;
	break;
	case MVT::v4f32:
	RC = &PPC::VRRCRegClass;
	break;
	case MVT::v2f64:
	case MVT::v2i64:
	RC = &PPC::VRRCRegClass;
	break;
	}

	SDValue ArgValue;
	// Transform the arguments stored in physical registers into
	// virtual ones.
	if (VA.getLocVT() == MVT::f64 && Subtarget.hasSPE()) {
	assert(i + 1 < e && "No second half of double precision argument");
	Register RegLo = MF.addLiveIn(VA.getLocReg(), RC);
	Register RegHi = MF.addLiveIn(ArgLocs[++i].getLocReg(), RC);
	SDValue ArgValueLo = DAG.getCopyFromReg(Chain, dl, RegLo, MVT::i32);
	SDValue ArgValueHi = DAG.getCopyFromReg(Chain, dl, RegHi, MVT::i32);
	if (!Subtarget.isLittleEndian())
	std::swap (ArgValueLo, ArgValueHi);
	ArgValue = DAG.getNode(PPCISD::BUILD_SPE64, dl, MVT::f64, ArgValueLo,
	ArgValueHi);
	} else {
	Register Reg = MF.addLiveIn(VA.getLocReg(), RC);
	ArgValue = DAG.getCopyFromReg(Chain, dl, Reg,
	ValVT == MVT::i1 ? MVT::i32 : ValVT);
	if (ValVT == MVT::i1)
	ArgValue = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, ArgValue);
	}

	InVals.push_back(ArgValue);
	} else {
	// Argument stored in memory.
	assert(VA.isMemLoc());

	// Get the extended size of the argument type in stack
	unsigned ArgSize = VA.getLocVT().getStoreSize();
	// Get the actual size of the argument type
	unsigned ObjSize = VA.getValVT().getStoreSize();
	unsigned ArgOffset = VA.getLocMemOffset();
	// Stack objects in PPC32 are right justified.
	ArgOffset += ArgSize - ObjSize;
	int FI = MFI.CreateFixedObject(ArgSize, ArgOffset, isImmutable);

	// Create load nodes to retrieve arguments from the stack.
	SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
	InVals.push_back(
	DAG.getLoad(VA.getValVT(), dl, Chain, FIN, MachinePointerInfo()));
	}
	}

	// Assign locations to all of the incoming aggregate by value arguments.
	// Aggregates passed by value are stored in the local variable space of the
	// caller's stack frame, right above the parameter list area.
	SmallVector<CCValAssign, 16> ByValArgLocs;
	CCState CCByValInfo(CallConv, isVarArg, DAG.getMachineFunction(),
	ByValArgLocs, *DAG.getContext());

	// Reserve stack space for the allocations in CCInfo.
	CCByValInfo.AllocateStack(CCInfo.getStackSize(), PtrAlign);

	CCByValInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4_ByVal);

	// Area that is at least reserved in the caller of this function.
	unsigned MinReservedArea = CCByValInfo.getStackSize();
	MinReservedArea = std::max(MinReservedArea, LinkageSize);

	// Set the size that is at least reserved in caller of this function. Tail
	// call optimized function's reserved stack space needs to be aligned so that
	// taking the difference between two stack areas will result in an aligned
	// stack.
	MinReservedArea =
	EnsureStackAlignment(Subtarget.getFrameLowering(), MinReservedArea);
	FuncInfo->setMinReservedArea(MinReservedArea);

	SmallVector<SDValue, 8> MemOps;

	// If the function takes variable number of arguments, make a frame index for
	// the start of the first vararg value... for expansion of llvm.va_start.
	if (isVarArg) {
	static const MCPhysReg GPArgRegs[] = {
	PPC::R3, PPC::R4, PPC::R5, PPC::R6,
	PPC::R7, PPC::R8, PPC::R9, PPC::R10,
	};
	const unsigned NumGPArgRegs = std::size(GPArgRegs);

	static const MCPhysReg FPArgRegs[] = {
	PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7,
	PPC::F8
	};
	unsigned NumFPArgRegs = std::size(FPArgRegs);

	if (useSoftFloat() \|\| hasSPE())
	NumFPArgRegs = 0;

	FuncInfo->setVarArgsNumGPR(CCInfo.getFirstUnallocated(GPArgRegs));
	FuncInfo->setVarArgsNumFPR(CCInfo.getFirstUnallocated(FPArgRegs));

	// Make room for NumGPArgRegs and NumFPArgRegs.
	int Depth = NumGPArgRegs * PtrVT.getSizeInBits()/8 +
	NumFPArgRegs * MVT(MVT::f64).getSizeInBits()/8;

	FuncInfo->setVarArgsStackOffset(MFI.CreateFixedObject(
	PtrVT.getSizeInBits() / 8, CCInfo.getStackSize(), true));

	FuncInfo->setVarArgsFrameIndex(
	MFI.CreateStackObject(Depth, Align(8), false));
	SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);

	// The fixed integer arguments of a variadic function are stored to the
	// VarArgsFrameIndex on the stack so that they may be loaded by
	// dereferencing the result of va_next.
	for (unsigned GPRIndex = 0; GPRIndex != NumGPArgRegs; ++GPRIndex) {
	// Get an existing live-in vreg, or add a new one.
	Register VReg = MF.getRegInfo().getLiveInVirtReg(GPArgRegs[GPRIndex]);
	if (!VReg)
	VReg = MF.addLiveIn(GPArgRegs[GPRIndex], &PPC::GPRCRegClass);

	SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
	SDValue Store =
	DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
	MemOps.push_back(Store);
	// Increment the address by four for the next argument to store
	SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, dl, PtrVT);
	FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
	}

	// FIXME 32-bit SVR4: We only need to save FP argument registers if CR bit 6
	// is set.
	// The double arguments are stored to the VarArgsFrameIndex
	// on the stack.
	for (unsigned FPRIndex = 0; FPRIndex != NumFPArgRegs; ++FPRIndex) {
	// Get an existing live-in vreg, or add a new one.
	Register VReg = MF.getRegInfo().getLiveInVirtReg(FPArgRegs[FPRIndex]);
	if (!VReg)
	VReg = MF.addLiveIn(FPArgRegs[FPRIndex], &PPC::F8RCRegClass);

	SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::f64);
	SDValue Store =
	DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
	MemOps.push_back(Store);
	// Increment the address by eight for the next argument to store
	SDValue PtrOff = DAG.getConstant(MVT(MVT::f64).getSizeInBits()/8, dl,
	PtrVT);
	FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
	}
	}

	if (!MemOps.empty())
	Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);

	return Chain;
	}

	// PPC64 passes i8, i16, and i32 values in i64 registers. Promote
	// value to MVT::i64 and then truncate to the correct register size.
	SDValue PPCTargetLowering::extendArgForPPC64(ISD::ArgFlagsTy Flags,
	EVT ObjectVT, SelectionDAG &DAG,
	SDValue ArgVal,
	const SDLoc &dl) const {
	if (Flags.isSExt())
	ArgVal = DAG.getNode(ISD::AssertSext, dl, MVT::i64, ArgVal,
	DAG.getValueType(ObjectVT));
	else if (Flags.isZExt())
	ArgVal = DAG.getNode(ISD::AssertZext, dl, MVT::i64, ArgVal,
	DAG.getValueType(ObjectVT));

	return DAG.getNode(ISD::TRUNCATE, dl, ObjectVT, ArgVal);
	}

	SDValue PPCTargetLowering::LowerFormalArguments_64SVR4(
	SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
	const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
	SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
	// TODO: add description of PPC stack frame format, or at least some docs.
	//
	bool isELFv2ABI = Subtarget.isELFv2ABI();
	bool isLittleEndian = Subtarget.isLittleEndian();
	MachineFunction &MF = DAG.getMachineFunction();
	MachineFrameInfo &MFI = MF.getFrameInfo();
	PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();

	assert(!(CallConv == CallingConv::Fast && isVarArg) &&
	"fastcc not supported on varargs functions");

	EVT PtrVT = getPointerTy(MF.getDataLayout());
	// Potential tail calls could cause overwriting of argument stack slots.
	bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
	(CallConv == CallingConv::Fast));
	unsigned PtrByteSize = 8;
	unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();

	static const MCPhysReg GPR[] = {
	PPC::X3, PPC::X4, PPC::X5, PPC::X6,
	PPC::X7, PPC::X8, PPC::X9, PPC::X10,
	};
	static const MCPhysReg VR[] = {
	PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
	PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
	};

	const unsigned Num_GPR_Regs = std::size(GPR);
	const unsigned Num_FPR_Regs = useSoftFloat() ? 0 : 13;
	const unsigned Num_VR_Regs = std::size(VR);

	// Do a first pass over the arguments to determine whether the ABI
	// guarantees that our caller has allocated the parameter save area
	// on its stack frame. In the ELFv1 ABI, this is always the case;
	// in the ELFv2 ABI, it is true if this is a vararg function or if
	// any parameter is located in a stack slot.

	bool HasParameterArea = !isELFv2ABI \|\| isVarArg;
	unsigned ParamAreaSize = Num_GPR_Regs * PtrByteSize;
	unsigned NumBytes = LinkageSize;
	unsigned AvailableFPRs = Num_FPR_Regs;
	unsigned AvailableVRs = Num_VR_Regs;
	for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
	if (Ins[i].Flags.isNest())
	continue;

	if (CalculateStackSlotUsed(Ins[i].VT, Ins[i].ArgVT, Ins[i].Flags,
	PtrByteSize, LinkageSize, ParamAreaSize,
	NumBytes, AvailableFPRs, AvailableVRs))
	HasParameterArea = true;
	}

	// Add DAG nodes to load the arguments or copy them out of registers. On
	// entry to a function on PPC, the arguments start after the linkage area,
	// although the first ones are often in registers.

	unsigned ArgOffset = LinkageSize;
	unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
	SmallVector<SDValue, 8> MemOps;
	Function::const_arg_iterator FuncArg = MF.getFunction().arg_begin();
	unsigned CurArgIdx = 0;
	for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) {
	SDValue ArgVal;
	bool needsLoad = false;
	EVT ObjectVT = Ins[ArgNo].VT;
	EVT OrigVT = Ins[ArgNo].ArgVT;
	unsigned ObjSize = ObjectVT.getStoreSize();
	unsigned ArgSize = ObjSize;
	ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags;
	if (Ins[ArgNo].isOrigArg()) {
	std::advance(FuncArg, Ins[ArgNo].getOrigArgIndex() - CurArgIdx);
	CurArgIdx = Ins[ArgNo].getOrigArgIndex();
	}
	// We re-align the argument offset for each argument, except when using the
	// fast calling convention, when we need to make sure we do that only when
	// we'll actually use a stack slot.
	unsigned CurArgOffset;
	Align Alignment;
	auto ComputeArgOffset = [&]() {
	/* Respect alignment of argument on the stack. */
	Alignment =
	CalculateStackSlotAlignment(ObjectVT, OrigVT, Flags, PtrByteSize);
	ArgOffset = alignTo(ArgOffset, Alignment);
	CurArgOffset = ArgOffset;
	};

	if (CallConv != CallingConv::Fast) {
	ComputeArgOffset();

	/* Compute GPR index associated with argument offset. */
	GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
	GPR_idx = std::min(GPR_idx, Num_GPR_Regs);
	}

	// FIXME the codegen can be much improved in some cases.
	// We do not have to keep everything in memory.
	if (Flags.isByVal()) {
	assert(Ins[ArgNo].isOrigArg() && "Byval arguments cannot be implicit");

	if (CallConv == CallingConv::Fast)
	ComputeArgOffset();

	// ObjSize is the true size, ArgSize rounded up to multiple of registers.
	ObjSize = Flags.getByValSize();
	ArgSize = ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
	// Empty aggregate parameters do not take up registers. Examples:
	// struct { } a;
	// union { } b;
	// int c[0];
	// etc. However, we have to provide a place-holder in InVals, so
	// pretend we have an 8-byte item at the current address for that
	// purpose.
	if (!ObjSize) {
	int FI = MFI.CreateFixedObject(PtrByteSize, ArgOffset, true);
	SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
	InVals.push_back(FIN);
	continue;
	}

	// Create a stack object covering all stack doublewords occupied
	// by the argument. If the argument is (fully or partially) on
	// the stack, or if the argument is fully in registers but the
	// caller has allocated the parameter save anyway, we can refer
	// directly to the caller's stack frame. Otherwise, create a
	// local copy in our own frame.
	int FI;
	if (HasParameterArea \|\|
	ArgSize + ArgOffset > LinkageSize + Num_GPR_Regs * PtrByteSize)
	FI = MFI.CreateFixedObject(ArgSize, ArgOffset, false, true);
	else
	FI = MFI.CreateStackObject(ArgSize, Alignment, false);
	SDValue FIN = DAG.getFrameIndex(FI, PtrVT);

	// Handle aggregates smaller than 8 bytes.
	if (ObjSize < PtrByteSize) {
	// The value of the object is its address, which differs from the
	// address of the enclosing doubleword on big-endian systems.
	SDValue Arg = FIN;
	if (!isLittleEndian) {
	SDValue ArgOff = DAG.getConstant(PtrByteSize - ObjSize, dl, PtrVT);
	Arg = DAG.getNode(ISD::ADD, dl, ArgOff.getValueType(), Arg, ArgOff);
	}
	InVals.push_back(Arg);

	if (GPR_idx != Num_GPR_Regs) {
	Register VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
	FuncInfo->addLiveInAttr(VReg, Flags);
	SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
	EVT ObjType = EVT::getIntegerVT(DAG.getContext(), ObjSize 8);
	SDValue Store =
	DAG.getTruncStore(Val.getValue(1), dl, Val, Arg,
	MachinePointerInfo(&*FuncArg), ObjType);
	MemOps.push_back(Store);
	}
	// Whether we copied from a register or not, advance the offset
	// into the parameter save area by a full doubleword.
	ArgOffset += PtrByteSize;
	continue;
	}

	// The value of the object is its address, which is the address of
	// its first stack doubleword.
	InVals.push_back(FIN);

	// Store whatever pieces of the object are in registers to memory.
	for (unsigned j = 0; j < ArgSize; j += PtrByteSize) {
	if (GPR_idx == Num_GPR_Regs)
	break;

	Register VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
	FuncInfo->addLiveInAttr(VReg, Flags);
	SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
	SDValue Addr = FIN;
	if (j) {
	SDValue Off = DAG.getConstant(j, dl, PtrVT);
	Addr = DAG.getNode(ISD::ADD, dl, Off.getValueType(), Addr, Off);
	}
	unsigned StoreSizeInBits = std::min(PtrByteSize, (ObjSize - j)) * 8;
	EVT ObjType = EVT::getIntegerVT(*DAG.getContext(), StoreSizeInBits);
	SDValue Store =
	DAG.getTruncStore(Val.getValue(1), dl, Val, Addr,
	MachinePointerInfo(&*FuncArg, j), ObjType);
	MemOps.push_back(Store);
	++GPR_idx;
	}
	ArgOffset += ArgSize;
	continue;
	}

	switch (ObjectVT.getSimpleVT().SimpleTy) {
	default: llvm_unreachable("Unhandled argument type!");
	case MVT::i1:
	case MVT::i32:
	case MVT::i64:
	if (Flags.isNest()) {
	// The 'nest' parameter, if any, is passed in R11.
	Register VReg = MF.addLiveIn(PPC::X11, &PPC::G8RCRegClass);
	ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);

	if (ObjectVT == MVT::i32 \|\| ObjectVT == MVT::i1)
	ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl);

	break;
	}

	// These can be scalar arguments or elements of an integer array type
	// passed directly. Clang may use those instead of "byval" aggregate
	// types to avoid forcing arguments to memory unnecessarily.
	if (GPR_idx != Num_GPR_Regs) {
	Register VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
	FuncInfo->addLiveInAttr(VReg, Flags);
	ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);

	if (ObjectVT == MVT::i32 \|\| ObjectVT == MVT::i1)
	// PPC64 passes i8, i16, and i32 values in i64 registers. Promote
	// value to MVT::i64 and then truncate to the correct register size.
	ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl);
	} else {
	if (CallConv == CallingConv::Fast)
	ComputeArgOffset();

	needsLoad = true;
	ArgSize = PtrByteSize;
	}
	if (CallConv != CallingConv::Fast \|\| needsLoad)
	ArgOffset += 8;
	break;

	case MVT::f32:
	case MVT::f64:
	// These can be scalar arguments or elements of a float array type
	// passed directly. The latter are used to implement ELFv2 homogenous
	// float aggregates.
	if (FPR_idx != Num_FPR_Regs) {
	unsigned VReg;

	if (ObjectVT == MVT::f32)
	VReg = MF.addLiveIn(FPR[FPR_idx],
	Subtarget.hasP8Vector()
	? &PPC::VSSRCRegClass
	: &PPC::F4RCRegClass);
	else
	VReg = MF.addLiveIn(FPR[FPR_idx], Subtarget.hasVSX()
	? &PPC::VSFRCRegClass
	: &PPC::F8RCRegClass);

	ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
	++FPR_idx;
	} else if (GPR_idx != Num_GPR_Regs && CallConv != CallingConv::Fast) {
	// FIXME: We may want to re-enable this for CallingConv::Fast on the P8
	// once we support fp <-> gpr moves.

	// This can only ever happen in the presence of f32 array types,
	// since otherwise we never run out of FPRs before running out
	// of GPRs.
	Register VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
	FuncInfo->addLiveInAttr(VReg, Flags);
	ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);

	if (ObjectVT == MVT::f32) {
	if ((ArgOffset % PtrByteSize) == (isLittleEndian ? 4 : 0))
	ArgVal = DAG.getNode(ISD::SRL, dl, MVT::i64, ArgVal,
	DAG.getConstant(32, dl, MVT::i32));
	ArgVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, ArgVal);
	}

	ArgVal = DAG.getNode(ISD::BITCAST, dl, ObjectVT, ArgVal);
	} else {
	if (CallConv == CallingConv::Fast)
	ComputeArgOffset();

	needsLoad = true;
	}

	// When passing an array of floats, the array occupies consecutive
	// space in the argument area; only round up to the next doubleword
	// at the end of the array. Otherwise, each float takes 8 bytes.
	if (CallConv != CallingConv::Fast \|\| needsLoad) {
	ArgSize = Flags.isInConsecutiveRegs() ? ObjSize : PtrByteSize;
	ArgOffset += ArgSize;
	if (Flags.isInConsecutiveRegsLast())
	ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
	}
	break;
	case MVT::v4f32:
	case MVT::v4i32:
	case MVT::v8i16:
	case MVT::v16i8:
	case MVT::v2f64:
	case MVT::v2i64:
	case MVT::v1i128:
	case MVT::f128:
	// These can be scalar arguments or elements of a vector array type
	// passed directly. The latter are used to implement ELFv2 homogenous
	// vector aggregates.
	if (VR_idx != Num_VR_Regs) {
	Register VReg = MF.addLiveIn(VR[VR_idx], &PPC::VRRCRegClass);
	ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
	++VR_idx;
	} else {
	if (CallConv == CallingConv::Fast)
	ComputeArgOffset();
	needsLoad = true;
	}
	if (CallConv != CallingConv::Fast \|\| needsLoad)
	ArgOffset += 16;
	break;
	}

	// We need to load the argument to a virtual register if we determined
	// above that we ran out of physical registers of the appropriate type.
	if (needsLoad) {
	if (ObjSize < ArgSize && !isLittleEndian)
	CurArgOffset += ArgSize - ObjSize;
	int FI = MFI.CreateFixedObject(ObjSize, CurArgOffset, isImmutable);
	SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
	ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, MachinePointerInfo());
	}

	InVals.push_back(ArgVal);
	}

	// Area that is at least reserved in the caller of this function.
	unsigned MinReservedArea;
	if (HasParameterArea)
	MinReservedArea = std::max(ArgOffset, LinkageSize + 8 * PtrByteSize);
	else
	MinReservedArea = LinkageSize;

	// Set the size that is at least reserved in caller of this function. Tail
	// call optimized functions' reserved stack space needs to be aligned so that
	// taking the difference between two stack areas will result in an aligned
	// stack.
	MinReservedArea =
	EnsureStackAlignment(Subtarget.getFrameLowering(), MinReservedArea);
	FuncInfo->setMinReservedArea(MinReservedArea);

	// If the function takes variable number of arguments, make a frame index for
	// the start of the first vararg value... for expansion of llvm.va_start.
	// On ELFv2ABI spec, it writes:
	// C programs that are intended to be portable across different compilers
	// and architectures must use the header file <stdarg.h> to deal with variable
	// argument lists.
	if (isVarArg && MFI.hasVAStart()) {
	int Depth = ArgOffset;

	FuncInfo->setVarArgsFrameIndex(
	MFI.CreateFixedObject(PtrByteSize, Depth, true));
	SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);

	// If this function is vararg, store any remaining integer argument regs
	// to their spots on the stack so that they may be loaded by dereferencing
	// the result of va_next.
	for (GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
	GPR_idx < Num_GPR_Regs; ++GPR_idx) {
	Register VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
	SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
	SDValue Store =
	DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
	MemOps.push_back(Store);
	// Increment the address by four for the next argument to store
	SDValue PtrOff = DAG.getConstant(PtrByteSize, dl, PtrVT);
	FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
	}
	}

	if (!MemOps.empty())
	Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);

	return Chain;
	}

	/// CalculateTailCallSPDiff - Get the amount the stack pointer has to be
	/// adjusted to accommodate the arguments for the tailcall.
	static int CalculateTailCallSPDiff(SelectionDAG& DAG, bool isTailCall,
	unsigned ParamSize) {

	if (!isTailCall) return 0;

	PPCFunctionInfo *FI = DAG.getMachineFunction().getInfo<PPCFunctionInfo>();
	unsigned CallerMinReservedArea = FI->getMinReservedArea();
	int SPDiff = (int)CallerMinReservedArea - (int)ParamSize;
	// Remember only if the new adjustment is bigger.
	if (SPDiff < FI->getTailCallSPDelta())
	FI->setTailCallSPDelta(SPDiff);

	return SPDiff;
	}

	static bool isFunctionGlobalAddress(const GlobalValue *CalleeGV);

	static bool callsShareTOCBase(const Function *Caller,
	const GlobalValue *CalleeGV,
	const TargetMachine &TM) {
	// It does not make sense to call callsShareTOCBase() with a caller that
	// is PC Relative since PC Relative callers do not have a TOC.
	#ifndef NDEBUG
	const PPCSubtarget STICaller = &TM.getSubtarget<PPCSubtarget>(Caller);
	assert(!STICaller->isUsingPCRelativeCalls() &&
	"PC Relative callers do not have a TOC and cannot share a TOC Base");
	#endif

	// Callee is either a GlobalAddress or an ExternalSymbol. ExternalSymbols
	// don't have enough information to determine if the caller and callee share
	// the same TOC base, so we have to pessimistically assume they don't for
	// correctness.
	if (!CalleeGV)
	return false;

	// If the callee is preemptable, then the static linker will use a plt-stub
	// which saves the toc to the stack, and needs a nop after the call
	// instruction to convert to a toc-restore.
	if (!TM.shouldAssumeDSOLocal(CalleeGV))
	return false;

	// Functions with PC Relative enabled may clobber the TOC in the same DSO.
	// We may need a TOC restore in the situation where the caller requires a
	// valid TOC but the callee is PC Relative and does not.
	const Function *F = dyn_cast<Function>(CalleeGV);
	const GlobalAlias *Alias = dyn_cast<GlobalAlias>(CalleeGV);

	// If we have an Alias we can try to get the function from there.
	if (Alias) {
	const GlobalObject *GlobalObj = Alias->getAliaseeObject();
	F = dyn_cast<Function>(GlobalObj);
	}

	// If we still have no valid function pointer we do not have enough
	// information to determine if the callee uses PC Relative calls so we must
	// assume that it does.
	if (!F)
	return false;

	// If the callee uses PC Relative we cannot guarantee that the callee won't
	// clobber the TOC of the caller and so we must assume that the two
	// functions do not share a TOC base.
	const PPCSubtarget STICallee = &TM.getSubtarget<PPCSubtarget>(F);
	if (STICallee->isUsingPCRelativeCalls())
	return false;

	// If the GV is not a strong definition then we need to assume it can be
	// replaced by another function at link time. The function that replaces
	// it may not share the same TOC as the caller since the callee may be
	// replaced by a PC Relative version of the same function.
	if (!CalleeGV->isStrongDefinitionForLinker())
	return false;

	// The medium and large code models are expected to provide a sufficiently
	// large TOC to provide all data addressing needs of a module with a
	// single TOC.
	if (CodeModel::Medium == TM.getCodeModel() \|\|
	CodeModel::Large == TM.getCodeModel())
	return true;

	// Any explicitly-specified sections and section prefixes must also match.
	// Also, if we're using -ffunction-sections, then each function is always in
	// a different section (the same is true for COMDAT functions).
	if (TM.getFunctionSections() \|\| CalleeGV->hasComdat() \|\|
	Caller->hasComdat() \|\| CalleeGV->getSection() != Caller->getSection())
	return false;
	if (const auto *F = dyn_cast<Function>(CalleeGV)) {
	if (F->getSectionPrefix() != Caller->getSectionPrefix())
	return false;
	}

	return true;
	}

	static bool
	needStackSlotPassParameters(const PPCSubtarget &Subtarget,
	const SmallVectorImpl<ISD::OutputArg> &Outs) {
	assert(Subtarget.is64BitELFABI());

	const unsigned PtrByteSize = 8;
	const unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();

	static const MCPhysReg GPR[] = {
	PPC::X3, PPC::X4, PPC::X5, PPC::X6,
	PPC::X7, PPC::X8, PPC::X9, PPC::X10,
	};
	static const MCPhysReg VR[] = {
	PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
	PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
	};

	const unsigned NumGPRs = std::size(GPR);
	const unsigned NumFPRs = 13;
	const unsigned NumVRs = std::size(VR);
	const unsigned ParamAreaSize = NumGPRs * PtrByteSize;

	unsigned NumBytes = LinkageSize;
	unsigned AvailableFPRs = NumFPRs;
	unsigned AvailableVRs = NumVRs;

	for (const ISD::OutputArg& Param : Outs) {
	if (Param.Flags.isNest()) continue;

	if (CalculateStackSlotUsed(Param.VT, Param.ArgVT, Param.Flags, PtrByteSize,
	LinkageSize, ParamAreaSize, NumBytes,
	AvailableFPRs, AvailableVRs))
	return true;
	}
	return false;
	}

	static bool hasSameArgumentList(const Function *CallerFn, const CallBase &CB) {
	if (CB.arg_size() != CallerFn->arg_size())
	return false;

	auto CalleeArgIter = CB.arg_begin();
	auto CalleeArgEnd = CB.arg_end();
	Function::const_arg_iterator CallerArgIter = CallerFn->arg_begin();

	for (; CalleeArgIter != CalleeArgEnd; ++CalleeArgIter, ++CallerArgIter) {
	const Value* CalleeArg = *CalleeArgIter;
	const Value* CallerArg = &(*CallerArgIter);
	if (CalleeArg == CallerArg)
	continue;

	// e.g. @caller([4 x i64] %a, [4 x i64] %b) {
	// tail call @callee([4 x i64] undef, [4 x i64] %b)
	// }
	// 1st argument of callee is undef and has the same type as caller.
	if (CalleeArg->getType() == CallerArg->getType() &&
	isa<UndefValue>(CalleeArg))
	continue;

	return false;
	}

	return true;
	}

	// Returns true if TCO is possible between the callers and callees
	// calling conventions.
	static bool
	areCallingConvEligibleForTCO_64SVR4(CallingConv::ID CallerCC,
	CallingConv::ID CalleeCC) {
	// Tail calls are possible with fastcc and ccc.
	auto isTailCallableCC = [] (CallingConv::ID CC){
	return CC == CallingConv::C \|\| CC == CallingConv::Fast;
	};
	if (!isTailCallableCC(CallerCC) \|\| !isTailCallableCC(CalleeCC))
	return false;

	// We can safely tail call both fastcc and ccc callees from a c calling
	// convention caller. If the caller is fastcc, we may have less stack space
	// than a non-fastcc caller with the same signature so disable tail-calls in
	// that case.
	return CallerCC == CallingConv::C \|\| CallerCC == CalleeCC;
	}

	bool PPCTargetLowering::IsEligibleForTailCallOptimization_64SVR4(
	const GlobalValue *CalleeGV, CallingConv::ID CalleeCC,
	CallingConv::ID CallerCC, const CallBase *CB, bool isVarArg,
	const SmallVectorImpl<ISD::OutputArg> &Outs,
	const SmallVectorImpl<ISD::InputArg> &Ins, const Function *CallerFunc,
	bool isCalleeExternalSymbol) const {
	bool TailCallOpt = getTargetMachine().Options.GuaranteedTailCallOpt;

	if (DisableSCO && !TailCallOpt) return false;

	// Variadic argument functions are not supported.
	if (isVarArg) return false;

	// Check that the calling conventions are compatible for tco.
	if (!areCallingConvEligibleForTCO_64SVR4(CallerCC, CalleeCC))
	return false;

	// Caller contains any byval parameter is not supported.
	if (any_of(Ins, [](const ISD::InputArg &IA) { return IA.Flags.isByVal(); }))
	return false;

	// Callee contains any byval parameter is not supported, too.
	// Note: This is a quick work around, because in some cases, e.g.
	// caller's stack size > callee's stack size, we are still able to apply
	// sibling call optimization. For example, gcc is able to do SCO for caller1
	// in the following example, but not for caller2.
	// struct test {
	// long int a;
	// char ary[56];
	// } gTest;
	// __attribute__((noinline)) int callee(struct test v, struct test *b) {
	// b->a = v.a;
	// return 0;
	// }
	// void caller1(struct test a, struct test c, struct test *b) {
	// callee(gTest, b); }
	// void caller2(struct test *b) { callee(gTest, b); }
	if (any_of(Outs, [](const ISD::OutputArg& OA) { return OA.Flags.isByVal(); }))
	return false;

	// If callee and caller use different calling conventions, we cannot pass
	// parameters on stack since offsets for the parameter area may be different.
	if (CallerCC != CalleeCC && needStackSlotPassParameters(Subtarget, Outs))
	return false;

	// All variants of 64-bit ELF ABIs without PC-Relative addressing require that
	// the caller and callee share the same TOC for TCO/SCO. If the caller and
	// callee potentially have different TOC bases then we cannot tail call since
	// we need to restore the TOC pointer after the call.
	// ref: https://bugzilla.mozilla.org/show_bug.cgi?id=973977
	// We cannot guarantee this for indirect calls or calls to external functions.
	// When PC-Relative addressing is used, the concept of the TOC is no longer
	// applicable so this check is not required.
	// Check first for indirect calls.
	if (!Subtarget.isUsingPCRelativeCalls() &&
	!isFunctionGlobalAddress(CalleeGV) && !isCalleeExternalSymbol)
	return false;

	// Check if we share the TOC base.
	if (!Subtarget.isUsingPCRelativeCalls() &&
	!callsShareTOCBase(CallerFunc, CalleeGV, getTargetMachine()))
	return false;

	// TCO allows altering callee ABI, so we don't have to check further.
	if (CalleeCC == CallingConv::Fast && TailCallOpt)
	return true;

	if (DisableSCO) return false;

	// If callee use the same argument list that caller is using, then we can
	// apply SCO on this case. If it is not, then we need to check if callee needs
	// stack for passing arguments.
	// PC Relative tail calls may not have a CallBase.
	// If there is no CallBase we cannot verify if we have the same argument
	// list so assume that we don't have the same argument list.
	if (CB && !hasSameArgumentList(CallerFunc, *CB) &&
	needStackSlotPassParameters(Subtarget, Outs))
	return false;
	else if (!CB && needStackSlotPassParameters(Subtarget, Outs))
	return false;

	return true;
	}

	/// IsEligibleForTailCallOptimization - Check whether the call is eligible
	/// for tail call optimization. Targets which want to do tail call
	/// optimization should implement this function.
	bool PPCTargetLowering::IsEligibleForTailCallOptimization(
	const GlobalValue *CalleeGV, CallingConv::ID CalleeCC,
	CallingConv::ID CallerCC, bool isVarArg,
	const SmallVectorImpl<ISD::InputArg> &Ins) const {
	if (!getTargetMachine().Options.GuaranteedTailCallOpt)
	return false;

	// Variable argument functions are not supported.
	if (isVarArg)
	return false;

	if (CalleeCC == CallingConv::Fast && CallerCC == CalleeCC) {
	// Functions containing by val parameters are not supported.
	if (any_of(Ins, [](const ISD::InputArg &IA) { return IA.Flags.isByVal(); }))
	return false;

	// Non-PIC/GOT tail calls are supported.
	if (getTargetMachine().getRelocationModel() != Reloc::PIC_)
	return true;

	// At the moment we can only do local tail calls (in same module, hidden
	// or protected) if we are generating PIC.
	if (CalleeGV)
	return CalleeGV->hasHiddenVisibility() \|\|
	CalleeGV->hasProtectedVisibility();
	}

	return false;
	}

	/// isCallCompatibleAddress - Return the immediate to use if the specified
	/// 32-bit value is representable in the immediate field of a BxA instruction.
	static SDNode *isBLACompatibleAddress(SDValue Op, SelectionDAG &DAG) {
	ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
	if (!C) return nullptr;

	int Addr = C->getZExtValue();
	if ((Addr & 3) != 0 \|\| // Low 2 bits are implicitly zero.
	SignExtend32<26>(Addr) != Addr)
	return nullptr; // Top 6 bits have to be sext of immediate.

	return DAG
	.getConstant(
	(int)C->getZExtValue() >> 2, SDLoc(Op),
	DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout()))
	.getNode();
	}

	namespace {

	struct TailCallArgumentInfo {
	SDValue Arg;
	SDValue FrameIdxOp;
	int FrameIdx = 0;

	TailCallArgumentInfo() = default;
	};

	} // end anonymous namespace

	/// StoreTailCallArgumentsToStackSlot - Stores arguments to their stack slot.
	static void StoreTailCallArgumentsToStackSlot(
	SelectionDAG &DAG, SDValue Chain,
	const SmallVectorImpl<TailCallArgumentInfo> &TailCallArgs,
	SmallVectorImpl<SDValue> &MemOpChains, const SDLoc &dl) {
	for (unsigned i = 0, e = TailCallArgs.size(); i != e; ++i) {
	SDValue Arg = TailCallArgs[i].Arg;
	SDValue FIN = TailCallArgs[i].FrameIdxOp;
	int FI = TailCallArgs[i].FrameIdx;
	// Store relative to framepointer.
	MemOpChains.push_back(DAG.getStore(
	Chain, dl, Arg, FIN,
	MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI)));
	}
	}

	/// EmitTailCallStoreFPAndRetAddr - Move the frame pointer and return address to
	/// the appropriate stack slot for the tail call optimized function call.
	static SDValue EmitTailCallStoreFPAndRetAddr(SelectionDAG &DAG, SDValue Chain,
	SDValue OldRetAddr, SDValue OldFP,
	int SPDiff, const SDLoc &dl) {
	if (SPDiff) {
	// Calculate the new stack slot for the return address.
	MachineFunction &MF = DAG.getMachineFunction();
	const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>();
	const PPCFrameLowering *FL = Subtarget.getFrameLowering();
	bool isPPC64 = Subtarget.isPPC64();
	int SlotSize = isPPC64 ? 8 : 4;
	int NewRetAddrLoc = SPDiff + FL->getReturnSaveOffset();
	int NewRetAddr = MF.getFrameInfo().CreateFixedObject(SlotSize,
	NewRetAddrLoc, true);
	EVT VT = isPPC64 ? MVT::i64 : MVT::i32;
	SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewRetAddr, VT);
	Chain = DAG.getStore(Chain, dl, OldRetAddr, NewRetAddrFrIdx,
	MachinePointerInfo::getFixedStack(MF, NewRetAddr));
	}
	return Chain;
	}

	/// CalculateTailCallArgDest - Remember Argument for later processing. Calculate
	/// the position of the argument.
	static void
	CalculateTailCallArgDest(SelectionDAG &DAG, MachineFunction &MF, bool isPPC64,
	SDValue Arg, int SPDiff, unsigned ArgOffset,
	SmallVectorImpl<TailCallArgumentInfo>& TailCallArguments) {
	int Offset = ArgOffset + SPDiff;
	uint32_t OpSize = (Arg.getValueSizeInBits() + 7) / 8;
	int FI = MF.getFrameInfo().CreateFixedObject(OpSize, Offset, true);
	EVT VT = isPPC64 ? MVT::i64 : MVT::i32;
	SDValue FIN = DAG.getFrameIndex(FI, VT);
	TailCallArgumentInfo Info;
	Info.Arg = Arg;
	Info.FrameIdxOp = FIN;
	Info.FrameIdx = FI;
	TailCallArguments.push_back(Info);
	}

	/// EmitTCFPAndRetAddrLoad - Emit load from frame pointer and return address
	/// stack slot. Returns the chain as result and the loaded frame pointers in
	/// LROpOut/FPOpout. Used when tail calling.
	SDValue PPCTargetLowering::EmitTailCallLoadFPAndRetAddr(
	SelectionDAG &DAG, int SPDiff, SDValue Chain, SDValue &LROpOut,
	SDValue &FPOpOut, const SDLoc &dl) const {
	if (SPDiff) {
	// Load the LR and FP stack slot for later adjusting.
	EVT VT = Subtarget.isPPC64() ? MVT::i64 : MVT::i32;
	LROpOut = getReturnAddrFrameIndex(DAG);
	LROpOut = DAG.getLoad(VT, dl, Chain, LROpOut, MachinePointerInfo());
	Chain = SDValue(LROpOut.getNode(), 1);
	}
	return Chain;
	}

	/// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified
	/// by "Src" to address "Dst" of size "Size". Alignment information is
	/// specified by the specific parameter attribute. The copy will be passed as
	/// a byval function parameter.
	/// Sometimes what we are copying is the end of a larger object, the part that
	/// does not fit in registers.
	static SDValue CreateCopyOfByValArgument(SDValue Src, SDValue Dst,
	SDValue Chain, ISD::ArgFlagsTy Flags,
	SelectionDAG &DAG, const SDLoc &dl) {
	SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), dl, MVT::i32);
	return DAG.getMemcpy(
	Chain, dl, Dst, Src, SizeNode, Flags.getNonZeroByValAlign(), false, false,
	/CI=/nullptr, std::nullopt, MachinePointerInfo(), MachinePointerInfo());
	}

	/// LowerMemOpCallTo - Store the argument to the stack or remember it in case of
	/// tail calls.
	static void LowerMemOpCallTo(
	SelectionDAG &DAG, MachineFunction &MF, SDValue Chain, SDValue Arg,
	SDValue PtrOff, int SPDiff, unsigned ArgOffset, bool isPPC64,
	bool isTailCall, bool isVector, SmallVectorImpl<SDValue> &MemOpChains,
	SmallVectorImpl<TailCallArgumentInfo> &TailCallArguments, const SDLoc &dl) {
	EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());
	if (!isTailCall) {
	if (isVector) {
	SDValue StackPtr;
	if (isPPC64)
	StackPtr = DAG.getRegister(PPC::X1, MVT::i64);
	else
	StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
	PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr,
	DAG.getConstant(ArgOffset, dl, PtrVT));
	}
	MemOpChains.push_back(
	DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo()));
	// Calculate and remember argument location.
	} else CalculateTailCallArgDest(DAG, MF, isPPC64, Arg, SPDiff, ArgOffset,
	TailCallArguments);
	}

	static void
	PrepareTailCall(SelectionDAG &DAG, SDValue &InGlue, SDValue &Chain,
	const SDLoc &dl, int SPDiff, unsigned NumBytes, SDValue LROp,
	SDValue FPOp,
	SmallVectorImpl<TailCallArgumentInfo> &TailCallArguments) {
	// Emit a sequence of copyto/copyfrom virtual registers for arguments that
	// might overwrite each other in case of tail call optimization.
	SmallVector<SDValue, 8> MemOpChains2;
	// Do not flag preceding copytoreg stuff together with the following stuff.
	InGlue = SDValue();
	StoreTailCallArgumentsToStackSlot(DAG, Chain, TailCallArguments,
	MemOpChains2, dl);
	if (!MemOpChains2.empty())
	Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains2);

	// Store the return address to the appropriate stack slot.
	Chain = EmitTailCallStoreFPAndRetAddr(DAG, Chain, LROp, FPOp, SPDiff, dl);

	// Emit callseq_end just before tailcall node.
	Chain = DAG.getCALLSEQ_END(Chain, NumBytes, 0, InGlue, dl);
	InGlue = Chain.getValue(1);
	}

	// Is this global address that of a function that can be called by name? (as
	// opposed to something that must hold a descriptor for an indirect call).
	static bool isFunctionGlobalAddress(const GlobalValue *GV) {
	if (GV) {
	if (GV->isThreadLocal())
	return false;

	return GV->getValueType()->isFunctionTy();
	}

	return false;
	}

	SDValue PPCTargetLowering::LowerCallResult(
	SDValue Chain, SDValue InGlue, CallingConv::ID CallConv, bool isVarArg,
	const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
	SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
	SmallVector<CCValAssign, 16> RVLocs;
	CCState CCRetInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
	*DAG.getContext());

	CCRetInfo.AnalyzeCallResult(
	Ins, (Subtarget.isSVR4ABI() && CallConv == CallingConv::Cold)
	? RetCC_PPC_Cold
	: RetCC_PPC);

	// Copy all of the result registers out of their specified physreg.
	for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
	CCValAssign &VA = RVLocs[i];
	assert(VA.isRegLoc() && "Can only return in registers!");

	SDValue Val;

	if (Subtarget.hasSPE() && VA.getLocVT() == MVT::f64) {
	SDValue Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
	InGlue);
	Chain = Lo.getValue(1);
	InGlue = Lo.getValue(2);
	VA = RVLocs[++i]; // skip ahead to next loc
	SDValue Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
	InGlue);
	Chain = Hi.getValue(1);
	InGlue = Hi.getValue(2);
	if (!Subtarget.isLittleEndian())
	std::swap (Lo, Hi);
	Val = DAG.getNode(PPCISD::BUILD_SPE64, dl, MVT::f64, Lo, Hi);
	} else {
	Val = DAG.getCopyFromReg(Chain, dl,
	VA.getLocReg(), VA.getLocVT(), InGlue);
	Chain = Val.getValue(1);
	InGlue = Val.getValue(2);
	}

	switch (VA.getLocInfo()) {
	default: llvm_unreachable("Unknown loc info!");
	case CCValAssign::Full: break;
	case CCValAssign::AExt:
	Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
	break;
	case CCValAssign::ZExt:
	Val = DAG.getNode(ISD::AssertZext, dl, VA.getLocVT(), Val,
	DAG.getValueType(VA.getValVT()));
	Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
	break;
	case CCValAssign::SExt:
	Val = DAG.getNode(ISD::AssertSext, dl, VA.getLocVT(), Val,
	DAG.getValueType(VA.getValVT()));
	Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
	break;
	}

	InVals.push_back(Val);
	}

	return Chain;
	}

	static bool isIndirectCall(const SDValue &Callee, SelectionDAG &DAG,
	const PPCSubtarget &Subtarget, bool isPatchPoint) {
	auto *G = dyn_cast<GlobalAddressSDNode>(Callee);
	const GlobalValue *GV = G ? G->getGlobal() : nullptr;

	// PatchPoint calls are not indirect.
	if (isPatchPoint)
	return false;

	if (isFunctionGlobalAddress(GV) \|\| isa<ExternalSymbolSDNode>(Callee))
	return false;

	// Darwin, and 32-bit ELF can use a BLA. The descriptor based ABIs can not
	// becuase the immediate function pointer points to a descriptor instead of
	// a function entry point. The ELFv2 ABI cannot use a BLA because the function
	// pointer immediate points to the global entry point, while the BLA would
	// need to jump to the local entry point (see rL211174).
	if (!Subtarget.usesFunctionDescriptors() && !Subtarget.isELFv2ABI() &&
	isBLACompatibleAddress(Callee, DAG))
	return false;

	return true;
	}

	// AIX and 64-bit ELF ABIs w/o PCRel require a TOC save/restore around calls.
	static inline bool isTOCSaveRestoreRequired(const PPCSubtarget &Subtarget) {
	return Subtarget.isAIXABI() \|\|
	(Subtarget.is64BitELFABI() && !Subtarget.isUsingPCRelativeCalls());
	}

	static unsigned getCallOpcode(PPCTargetLowering::CallFlags CFlags,
	const Function &Caller, const SDValue &Callee,
	const PPCSubtarget &Subtarget,
	const TargetMachine &TM,
	bool IsStrictFPCall = false) {
	if (CFlags.IsTailCall)
	return PPCISD::TC_RETURN;

	unsigned RetOpc = 0;
	// This is a call through a function pointer.
	if (CFlags.IsIndirect) {
	// AIX and the 64-bit ELF ABIs need to maintain the TOC pointer accross
	// indirect calls. The save of the caller's TOC pointer to the stack will be
	// inserted into the DAG as part of call lowering. The restore of the TOC
	// pointer is modeled by using a pseudo instruction for the call opcode that
	// represents the 2 instruction sequence of an indirect branch and link,
	// immediately followed by a load of the TOC pointer from the stack save
	// slot into gpr2. For 64-bit ELFv2 ABI with PCRel, do not restore the TOC
	// as it is not saved or used.
	RetOpc = isTOCSaveRestoreRequired(Subtarget) ? PPCISD::BCTRL_LOAD_TOC
	: PPCISD::BCTRL;
	} else if (Subtarget.isUsingPCRelativeCalls()) {
	assert(Subtarget.is64BitELFABI() && "PC Relative is only on ELF ABI.");
	RetOpc = PPCISD::CALL_NOTOC;
	} else if (Subtarget.isAIXABI() \|\| Subtarget.is64BitELFABI()) {
	// The ABIs that maintain a TOC pointer accross calls need to have a nop
	// immediately following the call instruction if the caller and callee may
	// have different TOC bases. At link time if the linker determines the calls
	// may not share a TOC base, the call is redirected to a trampoline inserted
	// by the linker. The trampoline will (among other things) save the callers
	// TOC pointer at an ABI designated offset in the linkage area and the
	// linker will rewrite the nop to be a load of the TOC pointer from the
	// linkage area into gpr2.
	auto *G = dyn_cast<GlobalAddressSDNode>(Callee);
	const GlobalValue *GV = G ? G->getGlobal() : nullptr;
	RetOpc =
	callsShareTOCBase(&Caller, GV, TM) ? PPCISD::CALL : PPCISD::CALL_NOP;
	} else
	RetOpc = PPCISD::CALL;
	if (IsStrictFPCall) {
	switch (RetOpc) {
	default:
	llvm_unreachable("Unknown call opcode");
	case PPCISD::BCTRL_LOAD_TOC:
	RetOpc = PPCISD::BCTRL_LOAD_TOC_RM;
	break;
	case PPCISD::BCTRL:
	RetOpc = PPCISD::BCTRL_RM;
	break;
	case PPCISD::CALL_NOTOC:
	RetOpc = PPCISD::CALL_NOTOC_RM;
	break;
	case PPCISD::CALL:
	RetOpc = PPCISD::CALL_RM;
	break;
	case PPCISD::CALL_NOP:
	RetOpc = PPCISD::CALL_NOP_RM;
	break;
	}
	}
	return RetOpc;
	}

	static SDValue transformCallee(const SDValue &Callee, SelectionDAG &DAG,
	const SDLoc &dl, const PPCSubtarget &Subtarget) {
	if (!Subtarget.usesFunctionDescriptors() && !Subtarget.isELFv2ABI())
	if (SDNode *Dest = isBLACompatibleAddress(Callee, DAG))
	return SDValue(Dest, 0);

	// Returns true if the callee is local, and false otherwise.
	auto isLocalCallee = [&]() {
	const GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee);
	const GlobalValue *GV = G ? G->getGlobal() : nullptr;

	return DAG.getTarget().shouldAssumeDSOLocal(GV) &&
	!isa_and_nonnull<GlobalIFunc>(GV);
	};

	// The PLT is only used in 32-bit ELF PIC mode. Attempting to use the PLT in
	// a static relocation model causes some versions of GNU LD (2.17.50, at
	// least) to force BSS-PLT, instead of secure-PLT, even if all objects are
	// built with secure-PLT.
	bool UsePlt =
	Subtarget.is32BitELFABI() && !isLocalCallee() &&
	Subtarget.getTargetMachine().getRelocationModel() == Reloc::PIC_;

	const auto getAIXFuncEntryPointSymbolSDNode = [&](const GlobalValue *GV) {
	const TargetMachine &TM = Subtarget.getTargetMachine();
	const TargetLoweringObjectFile *TLOF = TM.getObjFileLowering();
	MCSymbolXCOFF *S =
	cast<MCSymbolXCOFF>(TLOF->getFunctionEntryPointSymbol(GV, TM));

	MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());
	return DAG.getMCSymbol(S, PtrVT);
	};

	auto *G = dyn_cast<GlobalAddressSDNode>(Callee);
	const GlobalValue *GV = G ? G->getGlobal() : nullptr;
	if (isFunctionGlobalAddress(GV)) {
	const GlobalValue *GV = cast<GlobalAddressSDNode>(Callee)->getGlobal();

	if (Subtarget.isAIXABI()) {
	assert(!isa<GlobalIFunc>(GV) && "IFunc is not supported on AIX.");
	return getAIXFuncEntryPointSymbolSDNode(GV);
	}
	return DAG.getTargetGlobalAddress(GV, dl, Callee.getValueType(), 0,
	UsePlt ? PPCII::MO_PLT : 0);
	}

	if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
	const char *SymName = S->getSymbol();
	if (Subtarget.isAIXABI()) {
	// If there exists a user-declared function whose name is the same as the
	// ExternalSymbol's, then we pick up the user-declared version.
	const Module *Mod = DAG.getMachineFunction().getFunction().getParent();
	if (const Function *F =
	dyn_cast_or_null<Function>(Mod->getNamedValue(SymName)))
	return getAIXFuncEntryPointSymbolSDNode(F);

	// On AIX, direct function calls reference the symbol for the function's
	// entry point, which is named by prepending a "." before the function's
	// C-linkage name. A Qualname is returned here because an external
	// function entry point is a csect with XTY_ER property.
	const auto getExternalFunctionEntryPointSymbol = [&](StringRef SymName) {
	auto &Context = DAG.getMachineFunction().getContext();
	MCSectionXCOFF *Sec = Context.getXCOFFSection(
	(Twine(".") + Twine(SymName)).str(), SectionKind::getMetadata(),
	XCOFF::CsectProperties(XCOFF::XMC_PR, XCOFF::XTY_ER));
	return Sec->getQualNameSymbol();
	};

	SymName = getExternalFunctionEntryPointSymbol(SymName)->getName().data();
	}
	return DAG.getTargetExternalSymbol(SymName, Callee.getValueType(),
	UsePlt ? PPCII::MO_PLT : 0);
	}

	// No transformation needed.
	assert(Callee.getNode() && "What no callee?");
	return Callee;
	}

	static SDValue getOutputChainFromCallSeq(SDValue CallSeqStart) {
	assert(CallSeqStart.getOpcode() == ISD::CALLSEQ_START &&
	"Expected a CALLSEQ_STARTSDNode.");

	// The last operand is the chain, except when the node has glue. If the node
	// has glue, then the last operand is the glue, and the chain is the second
	// last operand.
	SDValue LastValue = CallSeqStart.getValue(CallSeqStart->getNumValues() - 1);
	if (LastValue.getValueType() != MVT::Glue)
	return LastValue;

	return CallSeqStart.getValue(CallSeqStart->getNumValues() - 2);
	}

	// Creates the node that moves a functions address into the count register
	// to prepare for an indirect call instruction.
	static void prepareIndirectCall(SelectionDAG &DAG, SDValue &Callee,
	SDValue &Glue, SDValue &Chain,
	const SDLoc &dl) {
	SDValue MTCTROps[] = {Chain, Callee, Glue};
	EVT ReturnTypes[] = {MVT::Other, MVT::Glue};
	Chain = DAG.getNode(PPCISD::MTCTR, dl, ReturnTypes,
	ArrayRef(MTCTROps, Glue.getNode() ? 3 : 2));
	// The glue is the second value produced.
	Glue = Chain.getValue(1);
	}

	static void prepareDescriptorIndirectCall(SelectionDAG &DAG, SDValue &Callee,
	SDValue &Glue, SDValue &Chain,
	SDValue CallSeqStart,
	const CallBase *CB, const SDLoc &dl,
	bool hasNest,
	const PPCSubtarget &Subtarget) {
	// Function pointers in the 64-bit SVR4 ABI do not point to the function
	// entry point, but to the function descriptor (the function entry point
	// address is part of the function descriptor though).
	// The function descriptor is a three doubleword structure with the
	// following fields: function entry point, TOC base address and
	// environment pointer.
	// Thus for a call through a function pointer, the following actions need
	// to be performed:
	// 1. Save the TOC of the caller in the TOC save area of its stack
	// frame (this is done in LowerCall_Darwin() or LowerCall_64SVR4()).
	// 2. Load the address of the function entry point from the function
	// descriptor.
	// 3. Load the TOC of the callee from the function descriptor into r2.
	// 4. Load the environment pointer from the function descriptor into
	// r11.
	// 5. Branch to the function entry point address.
	// 6. On return of the callee, the TOC of the caller needs to be
	// restored (this is done in FinishCall()).
	//
	// The loads are scheduled at the beginning of the call sequence, and the
	// register copies are flagged together to ensure that no other
	// operations can be scheduled in between. E.g. without flagging the
	// copies together, a TOC access in the caller could be scheduled between
	// the assignment of the callee TOC and the branch to the callee, which leads
	// to incorrect code.

	// Start by loading the function address from the descriptor.
	SDValue LDChain = getOutputChainFromCallSeq(CallSeqStart);
	auto MMOFlags = Subtarget.hasInvariantFunctionDescriptors()
	? (MachineMemOperand::MODereferenceable \|
	MachineMemOperand::MOInvariant)
	: MachineMemOperand::MONone;

	MachinePointerInfo MPI(CB ? CB->getCalledOperand() : nullptr);

	// Registers used in building the DAG.
	const MCRegister EnvPtrReg = Subtarget.getEnvironmentPointerRegister();
	const MCRegister TOCReg = Subtarget.getTOCPointerRegister();

	// Offsets of descriptor members.
	const unsigned TOCAnchorOffset = Subtarget.descriptorTOCAnchorOffset();
	const unsigned EnvPtrOffset = Subtarget.descriptorEnvironmentPointerOffset();

	const MVT RegVT = Subtarget.isPPC64() ? MVT::i64 : MVT::i32;
	const Align Alignment = Subtarget.isPPC64() ? Align(8) : Align(4);

	// One load for the functions entry point address.
	SDValue LoadFuncPtr = DAG.getLoad(RegVT, dl, LDChain, Callee, MPI,
	Alignment, MMOFlags);

	// One for loading the TOC anchor for the module that contains the called
	// function.
	SDValue TOCOff = DAG.getIntPtrConstant(TOCAnchorOffset, dl);
	SDValue AddTOC = DAG.getNode(ISD::ADD, dl, RegVT, Callee, TOCOff);
	SDValue TOCPtr =
	DAG.getLoad(RegVT, dl, LDChain, AddTOC,
	MPI.getWithOffset(TOCAnchorOffset), Alignment, MMOFlags);

	// One for loading the environment pointer.
	SDValue PtrOff = DAG.getIntPtrConstant(EnvPtrOffset, dl);
	SDValue AddPtr = DAG.getNode(ISD::ADD, dl, RegVT, Callee, PtrOff);
	SDValue LoadEnvPtr =
	DAG.getLoad(RegVT, dl, LDChain, AddPtr,
	MPI.getWithOffset(EnvPtrOffset), Alignment, MMOFlags);


	// Then copy the newly loaded TOC anchor to the TOC pointer.
	SDValue TOCVal = DAG.getCopyToReg(Chain, dl, TOCReg, TOCPtr, Glue);
	Chain = TOCVal.getValue(0);
	Glue = TOCVal.getValue(1);

	// If the function call has an explicit 'nest' parameter, it takes the
	// place of the environment pointer.
	assert((!hasNest \|\| !Subtarget.isAIXABI()) &&
	"Nest parameter is not supported on AIX.");
	if (!hasNest) {
	SDValue EnvVal = DAG.getCopyToReg(Chain, dl, EnvPtrReg, LoadEnvPtr, Glue);
	Chain = EnvVal.getValue(0);
	Glue = EnvVal.getValue(1);
	}

	// The rest of the indirect call sequence is the same as the non-descriptor
	// DAG.
	prepareIndirectCall(DAG, LoadFuncPtr, Glue, Chain, dl);
	}

	static void
	buildCallOperands(SmallVectorImpl<SDValue> &Ops,
	PPCTargetLowering::CallFlags CFlags, const SDLoc &dl,
	SelectionDAG &DAG,
	SmallVector<std::pair<unsigned, SDValue>, 8> &RegsToPass,
	SDValue Glue, SDValue Chain, SDValue &Callee, int SPDiff,
	const PPCSubtarget &Subtarget) {
	const bool IsPPC64 = Subtarget.isPPC64();
	// MVT for a general purpose register.
	const MVT RegVT = IsPPC64 ? MVT::i64 : MVT::i32;

	// First operand is always the chain.
	Ops.push_back(Chain);

	// If it's a direct call pass the callee as the second operand.
	if (!CFlags.IsIndirect)
	Ops.push_back(Callee);
	else {
	assert(!CFlags.IsPatchPoint && "Patch point calls are not indirect.");

	// For the TOC based ABIs, we have saved the TOC pointer to the linkage area
	// on the stack (this would have been done in `LowerCall_64SVR4` or
	// `LowerCall_AIX`). The call instruction is a pseudo instruction that
	// represents both the indirect branch and a load that restores the TOC
	// pointer from the linkage area. The operand for the TOC restore is an add
	// of the TOC save offset to the stack pointer. This must be the second
	// operand: after the chain input but before any other variadic arguments.
	// For 64-bit ELFv2 ABI with PCRel, do not restore the TOC as it is not
	// saved or used.
	if (isTOCSaveRestoreRequired(Subtarget)) {
	const MCRegister StackPtrReg = Subtarget.getStackPointerRegister();

	SDValue StackPtr = DAG.getRegister(StackPtrReg, RegVT);
	unsigned TOCSaveOffset = Subtarget.getFrameLowering()->getTOCSaveOffset();
	SDValue TOCOff = DAG.getIntPtrConstant(TOCSaveOffset, dl);
	SDValue AddTOC = DAG.getNode(ISD::ADD, dl, RegVT, StackPtr, TOCOff);
	Ops.push_back(AddTOC);
	}

	// Add the register used for the environment pointer.
	if (Subtarget.usesFunctionDescriptors() && !CFlags.HasNest)
	Ops.push_back(DAG.getRegister(Subtarget.getEnvironmentPointerRegister(),
	RegVT));


	// Add CTR register as callee so a bctr can be emitted later.
	if (CFlags.IsTailCall)
	Ops.push_back(DAG.getRegister(IsPPC64 ? PPC::CTR8 : PPC::CTR, RegVT));
	}

	// If this is a tail call add stack pointer delta.
	if (CFlags.IsTailCall)
	Ops.push_back(DAG.getConstant(SPDiff, dl, MVT::i32));

	// Add argument registers to the end of the list so that they are known live
	// into the call.
	for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
	Ops.push_back(DAG.getRegister(RegsToPass[i].first,
	RegsToPass[i].second.getValueType()));

	// We cannot add R2/X2 as an operand here for PATCHPOINT, because there is
	// no way to mark dependencies as implicit here.
	// We will add the R2/X2 dependency in EmitInstrWithCustomInserter.
	if ((Subtarget.is64BitELFABI() \|\| Subtarget.isAIXABI()) &&
	!CFlags.IsPatchPoint && !Subtarget.isUsingPCRelativeCalls())
	Ops.push_back(DAG.getRegister(Subtarget.getTOCPointerRegister(), RegVT));

	// Add implicit use of CR bit 6 for 32-bit SVR4 vararg calls
	if (CFlags.IsVarArg && Subtarget.is32BitELFABI())
	Ops.push_back(DAG.getRegister(PPC::CR1EQ, MVT::i32));

	// Add a register mask operand representing the call-preserved registers.
	const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
	const uint32_t *Mask =
	TRI->getCallPreservedMask(DAG.getMachineFunction(), CFlags.CallConv);
	assert(Mask && "Missing call preserved mask for calling convention");
	Ops.push_back(DAG.getRegisterMask(Mask));

	// If the glue is valid, it is the last operand.
	if (Glue.getNode())
	Ops.push_back(Glue);
	}

	SDValue PPCTargetLowering::FinishCall(
	CallFlags CFlags, const SDLoc &dl, SelectionDAG &DAG,
	SmallVector<std::pair<unsigned, SDValue>, 8> &RegsToPass, SDValue Glue,
	SDValue Chain, SDValue CallSeqStart, SDValue &Callee, int SPDiff,
	unsigned NumBytes, const SmallVectorImpl<ISD::InputArg> &Ins,
	SmallVectorImpl<SDValue> &InVals, const CallBase *CB) const {

	if ((Subtarget.is64BitELFABI() && !Subtarget.isUsingPCRelativeCalls()) \|\|
	Subtarget.isAIXABI())
	setUsesTOCBasePtr(DAG);

	unsigned CallOpc =
	getCallOpcode(CFlags, DAG.getMachineFunction().getFunction(), Callee,
	Subtarget, DAG.getTarget(), CB ? CB->isStrictFP() : false);

	if (!CFlags.IsIndirect)
	Callee = transformCallee(Callee, DAG, dl, Subtarget);
	else if (Subtarget.usesFunctionDescriptors())
	prepareDescriptorIndirectCall(DAG, Callee, Glue, Chain, CallSeqStart, CB,
	dl, CFlags.HasNest, Subtarget);
	else
	prepareIndirectCall(DAG, Callee, Glue, Chain, dl);

	// Build the operand list for the call instruction.
	SmallVector<SDValue, 8> Ops;
	buildCallOperands(Ops, CFlags, dl, DAG, RegsToPass, Glue, Chain, Callee,
	SPDiff, Subtarget);

	// Emit tail call.
	if (CFlags.IsTailCall) {
	// Indirect tail call when using PC Relative calls do not have the same
	// constraints.
	assert(((Callee.getOpcode() == ISD::Register &&
	cast<RegisterSDNode>(Callee)->getReg() == PPC::CTR) \|\|
	Callee.getOpcode() == ISD::TargetExternalSymbol \|\|
	Callee.getOpcode() == ISD::TargetGlobalAddress \|\|
	isa<ConstantSDNode>(Callee) \|\|
	(CFlags.IsIndirect && Subtarget.isUsingPCRelativeCalls())) &&
	"Expecting a global address, external symbol, absolute value, "
	"register or an indirect tail call when PC Relative calls are "
	"used.");
	// PC Relative calls also use TC_RETURN as the way to mark tail calls.
	assert(CallOpc == PPCISD::TC_RETURN &&
	"Unexpected call opcode for a tail call.");
	DAG.getMachineFunction().getFrameInfo().setHasTailCall();
	SDValue Ret = DAG.getNode(CallOpc, dl, MVT::Other, Ops);
	DAG.addNoMergeSiteInfo(Ret.getNode(), CFlags.NoMerge);
	return Ret;
	}

	std::array<EVT, 2> ReturnTypes = {{MVT::Other, MVT::Glue}};
	Chain = DAG.getNode(CallOpc, dl, ReturnTypes, Ops);
	DAG.addNoMergeSiteInfo(Chain.getNode(), CFlags.NoMerge);
	Glue = Chain.getValue(1);

	// When performing tail call optimization the callee pops its arguments off
	// the stack. Account for this here so these bytes can be pushed back on in
	// PPCFrameLowering::eliminateCallFramePseudoInstr.
	int BytesCalleePops = (CFlags.CallConv == CallingConv::Fast &&
	getTargetMachine().Options.GuaranteedTailCallOpt)
	? NumBytes
	: 0;

	Chain = DAG.getCALLSEQ_END(Chain, NumBytes, BytesCalleePops, Glue, dl);
	Glue = Chain.getValue(1);

	return LowerCallResult(Chain, Glue, CFlags.CallConv, CFlags.IsVarArg, Ins, dl,
	DAG, InVals);
	}

	bool PPCTargetLowering::supportsTailCallFor(const CallBase *CB) const {
	CallingConv::ID CalleeCC = CB->getCallingConv();
	const Function *CallerFunc = CB->getCaller();
	CallingConv::ID CallerCC = CallerFunc->getCallingConv();
	const Function *CalleeFunc = CB->getCalledFunction();
	if (!CalleeFunc)
	return false;
	const GlobalValue *CalleeGV = dyn_cast<GlobalValue>(CalleeFunc);

	SmallVector<ISD::OutputArg, 2> Outs;
	SmallVector<ISD::InputArg, 2> Ins;

	GetReturnInfo(CalleeCC, CalleeFunc->getReturnType(),
	CalleeFunc->getAttributes(), Outs, *this,
	CalleeFunc->getDataLayout());

	return isEligibleForTCO(CalleeGV, CalleeCC, CallerCC, CB,
	CalleeFunc->isVarArg(), Outs, Ins, CallerFunc,
	false /isCalleeExternalSymbol/);
	}

	bool PPCTargetLowering::isEligibleForTCO(
	const GlobalValue *CalleeGV, CallingConv::ID CalleeCC,
	CallingConv::ID CallerCC, const CallBase *CB, bool isVarArg,
	const SmallVectorImpl<ISD::OutputArg> &Outs,
	const SmallVectorImpl<ISD::InputArg> &Ins, const Function *CallerFunc,
	bool isCalleeExternalSymbol) const {
	if (Subtarget.useLongCalls() && !(CB && CB->isMustTailCall()))
	return false;

	if (Subtarget.isSVR4ABI() && Subtarget.isPPC64())
	return IsEligibleForTailCallOptimization_64SVR4(
	CalleeGV, CalleeCC, CallerCC, CB, isVarArg, Outs, Ins, CallerFunc,
	isCalleeExternalSymbol);
	else
	return IsEligibleForTailCallOptimization(CalleeGV, CalleeCC, CallerCC,
	isVarArg, Ins);
	}

	SDValue
	PPCTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
	SmallVectorImpl<SDValue> &InVals) const {
	SelectionDAG &DAG = CLI.DAG;
	SDLoc &dl = CLI.DL;
	SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
	SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
	SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;
	SDValue Chain = CLI.Chain;
	SDValue Callee = CLI.Callee;
	bool &isTailCall = CLI.IsTailCall;
	CallingConv::ID CallConv = CLI.CallConv;
	bool isVarArg = CLI.IsVarArg;
	bool isPatchPoint = CLI.IsPatchPoint;
	const CallBase *CB = CLI.CB;

	if (isTailCall) {
	MachineFunction &MF = DAG.getMachineFunction();
	CallingConv::ID CallerCC = MF.getFunction().getCallingConv();
	auto *G = dyn_cast<GlobalAddressSDNode>(Callee);
	const GlobalValue *GV = G ? G->getGlobal() : nullptr;
	bool IsCalleeExternalSymbol = isa<ExternalSymbolSDNode>(Callee);

	isTailCall =
	isEligibleForTCO(GV, CallConv, CallerCC, CB, isVarArg, Outs, Ins,
	&(MF.getFunction()), IsCalleeExternalSymbol);
	if (isTailCall) {
	++NumTailCalls;
	if (!getTargetMachine().Options.GuaranteedTailCallOpt)
	++NumSiblingCalls;

	// PC Relative calls no longer guarantee that the callee is a Global
	// Address Node. The callee could be an indirect tail call in which
	// case the SDValue for the callee could be a load (to load the address
	// of a function pointer) or it may be a register copy (to move the
	// address of the callee from a function parameter into a virtual
	// register). It may also be an ExternalSymbolSDNode (ex memcopy).
	assert((Subtarget.isUsingPCRelativeCalls() \|\|
	isa<GlobalAddressSDNode>(Callee)) &&
	"Callee should be an llvm::Function object.");

	LLVM_DEBUG(dbgs() << "TCO caller: " << DAG.getMachineFunction().getName()
	<< "\nTCO callee: ");
	LLVM_DEBUG(Callee.dump());
	}
	}

	if (!isTailCall && CB && CB->isMustTailCall())
	report_fatal_error("failed to perform tail call elimination on a call "
	"site marked musttail");

	// When long calls (i.e. indirect calls) are always used, calls are always
	// made via function pointer. If we have a function name, first translate it
	// into a pointer.
	if (Subtarget.useLongCalls() && isa<GlobalAddressSDNode>(Callee) &&
	!isTailCall)
	Callee = LowerGlobalAddress(Callee, DAG);

	CallFlags CFlags(
	CallConv, isTailCall, isVarArg, isPatchPoint,
	isIndirectCall(Callee, DAG, Subtarget, isPatchPoint),
	// hasNest
	Subtarget.is64BitELFABI() &&
	any_of(Outs, [](ISD::OutputArg Arg) { return Arg.Flags.isNest(); }),
	CLI.NoMerge);

	if (Subtarget.isAIXABI())
	return LowerCall_AIX(Chain, Callee, CFlags, Outs, OutVals, Ins, dl, DAG,
	InVals, CB);

	assert(Subtarget.isSVR4ABI());
	if (Subtarget.isPPC64())
	return LowerCall_64SVR4(Chain, Callee, CFlags, Outs, OutVals, Ins, dl, DAG,
	InVals, CB);
	return LowerCall_32SVR4(Chain, Callee, CFlags, Outs, OutVals, Ins, dl, DAG,
	InVals, CB);
	}

	SDValue PPCTargetLowering::LowerCall_32SVR4(
	SDValue Chain, SDValue Callee, CallFlags CFlags,
	const SmallVectorImpl<ISD::OutputArg> &Outs,
	const SmallVectorImpl<SDValue> &OutVals,
	const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
	SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals,
	const CallBase *CB) const {
	// See PPCTargetLowering::LowerFormalArguments_32SVR4() for a description
	// of the 32-bit SVR4 ABI stack frame layout.

	const CallingConv::ID CallConv = CFlags.CallConv;
	const bool IsVarArg = CFlags.IsVarArg;
	const bool IsTailCall = CFlags.IsTailCall;

	assert((CallConv == CallingConv::C \|\|
	CallConv == CallingConv::Cold \|\|
	CallConv == CallingConv::Fast) && "Unknown calling convention!");

	const Align PtrAlign(4);

	MachineFunction &MF = DAG.getMachineFunction();

	// Mark this function as potentially containing a function that contains a
	// tail call. As a consequence the frame pointer will be used for dynamicalloc
	// and restoring the callers stack pointer in this functions epilog. This is
	// done because by tail calling the called function might overwrite the value
	// in this function's (MF) stack pointer stack slot 0(SP).
	if (getTargetMachine().Options.GuaranteedTailCallOpt &&
	CallConv == CallingConv::Fast)
	MF.getInfo<PPCFunctionInfo>()->setHasFastCall();

	// Count how many bytes are to be pushed on the stack, including the linkage
	// area, parameter list area and the part of the local variable space which
	// contains copies of aggregates which are passed by value.

	// Assign locations to all of the outgoing arguments.
	SmallVector<CCValAssign, 16> ArgLocs;
	PPCCCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());

	// Reserve space for the linkage area on the stack.
	CCInfo.AllocateStack(Subtarget.getFrameLowering()->getLinkageSize(),
	PtrAlign);
	if (useSoftFloat())
	CCInfo.PreAnalyzeCallOperands(Outs);

	if (IsVarArg) {
	// Handle fixed and variable vector arguments differently.
	// Fixed vector arguments go into registers as long as registers are
	// available. Variable vector arguments always go into memory.
	unsigned NumArgs = Outs.size();

	for (unsigned i = 0; i != NumArgs; ++i) {
	MVT ArgVT = Outs[i].VT;
	ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
	bool Result;

	if (Outs[i].IsFixed) {
	Result = CC_PPC32_SVR4(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags,
	CCInfo);
	} else {
	Result = CC_PPC32_SVR4_VarArg(i, ArgVT, ArgVT, CCValAssign::Full,
	ArgFlags, CCInfo);
	}

	if (Result) {
	#ifndef NDEBUG
	errs() << "Call operand #" << i << " has unhandled type "
	<< ArgVT << "\n";
	#endif
	llvm_unreachable(nullptr);
	}
	}
	} else {
	// All arguments are treated the same.
	CCInfo.AnalyzeCallOperands(Outs, CC_PPC32_SVR4);
	}
	CCInfo.clearWasPPCF128();

	// Assign locations to all of the outgoing aggregate by value arguments.
	SmallVector<CCValAssign, 16> ByValArgLocs;
	CCState CCByValInfo(CallConv, IsVarArg, MF, ByValArgLocs, *DAG.getContext());

	// Reserve stack space for the allocations in CCInfo.
	CCByValInfo.AllocateStack(CCInfo.getStackSize(), PtrAlign);

	CCByValInfo.AnalyzeCallOperands(Outs, CC_PPC32_SVR4_ByVal);

	// Size of the linkage area, parameter list area and the part of the local
	// space variable where copies of aggregates which are passed by value are
	// stored.
	unsigned NumBytes = CCByValInfo.getStackSize();

	// Calculate by how many bytes the stack has to be adjusted in case of tail
	// call optimization.
	int SPDiff = CalculateTailCallSPDiff(DAG, IsTailCall, NumBytes);

	// Adjust the stack pointer for the new arguments...
	// These operations are automatically eliminated by the prolog/epilog pass
	Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
	SDValue CallSeqStart = Chain;

	// Load the return address and frame pointer so it can be moved somewhere else
	// later.
	SDValue LROp, FPOp;
	Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, dl);

	// Set up a copy of the stack pointer for use loading and storing any
	// arguments that may not fit in the registers available for argument
	// passing.
	SDValue StackPtr = DAG.getRegister(PPC::R1, MVT::i32);

	SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
	SmallVector<TailCallArgumentInfo, 8> TailCallArguments;
	SmallVector<SDValue, 8> MemOpChains;

	bool seenFloatArg = false;
	// Walk the register/memloc assignments, inserting copies/loads.
	// i - Tracks the index into the list of registers allocated for the call
	// RealArgIdx - Tracks the index into the list of actual function arguments
	// j - Tracks the index into the list of byval arguments
	for (unsigned i = 0, RealArgIdx = 0, j = 0, e = ArgLocs.size();
	i != e;
	++i, ++RealArgIdx) {
	CCValAssign &VA = ArgLocs[i];
	SDValue Arg = OutVals[RealArgIdx];
	ISD::ArgFlagsTy Flags = Outs[RealArgIdx].Flags;

	if (Flags.isByVal()) {
	// Argument is an aggregate which is passed by value, thus we need to
	// create a copy of it in the local variable space of the current stack
	// frame (which is the stack frame of the caller) and pass the address of
	// this copy to the callee.
	assert((j < ByValArgLocs.size()) && "Index out of bounds!");
	CCValAssign &ByValVA = ByValArgLocs[j++];
	assert((VA.getValNo() == ByValVA.getValNo()) && "ValNo mismatch!");

	// Memory reserved in the local variable space of the callers stack frame.
	unsigned LocMemOffset = ByValVA.getLocMemOffset();

	SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
	PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(MF.getDataLayout()),
	StackPtr, PtrOff);

	// Create a copy of the argument in the local area of the current
	// stack frame.
	SDValue MemcpyCall =
	CreateCopyOfByValArgument(Arg, PtrOff,
	CallSeqStart.getNode()->getOperand(0),
	Flags, DAG, dl);

	// This must go outside the CALLSEQ_START..END.
	SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall, NumBytes, 0,
	SDLoc(MemcpyCall));
	DAG.ReplaceAllUsesWith(CallSeqStart.getNode(),
	NewCallSeqStart.getNode());
	Chain = CallSeqStart = NewCallSeqStart;

	// Pass the address of the aggregate copy on the stack either in a
	// physical register or in the parameter list area of the current stack
	// frame to the callee.
	Arg = PtrOff;
	}

	// When useCRBits() is true, there can be i1 arguments.
	// It is because getRegisterType(MVT::i1) => MVT::i1,
	// and for other integer types getRegisterType() => MVT::i32.
	// Extend i1 and ensure callee will get i32.
	if (Arg.getValueType() == MVT::i1)
	Arg = DAG.getNode(Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND,
	dl, MVT::i32, Arg);

	if (VA.isRegLoc()) {
	seenFloatArg \|= VA.getLocVT().isFloatingPoint();
	// Put argument in a physical register.
	if (Subtarget.hasSPE() && Arg.getValueType() == MVT::f64) {
	bool IsLE = Subtarget.isLittleEndian();
	SDValue SVal = DAG.getNode(PPCISD::EXTRACT_SPE, dl, MVT::i32, Arg,
	DAG.getIntPtrConstant(IsLE ? 0 : 1, dl));
	RegsToPass.push_back(std::make_pair(VA.getLocReg(), SVal.getValue(0)));
	SVal = DAG.getNode(PPCISD::EXTRACT_SPE, dl, MVT::i32, Arg,
	DAG.getIntPtrConstant(IsLE ? 1 : 0, dl));
	RegsToPass.push_back(std::make_pair(ArgLocs[++i].getLocReg(),
	SVal.getValue(0)));
	} else
	RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
	} else {
	// Put argument in the parameter list area of the current stack frame.
	assert(VA.isMemLoc());
	unsigned LocMemOffset = VA.getLocMemOffset();

	if (!IsTailCall) {
	SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
	PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(MF.getDataLayout()),
	StackPtr, PtrOff);

	MemOpChains.push_back(
	DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo()));
	} else {
	// Calculate and remember argument location.
	CalculateTailCallArgDest(DAG, MF, false, Arg, SPDiff, LocMemOffset,
	TailCallArguments);
	}
	}
	}

	if (!MemOpChains.empty())
	Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);

	// Build a sequence of copy-to-reg nodes chained together with token chain
	// and flag operands which copy the outgoing args into the appropriate regs.
	SDValue InGlue;
	for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
	Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
	RegsToPass[i].second, InGlue);
	InGlue = Chain.getValue(1);
	}

	// Set CR bit 6 to true if this is a vararg call with floating args passed in
	// registers.
	if (IsVarArg) {
	SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue);
	SDValue Ops[] = { Chain, InGlue };

	Chain = DAG.getNode(seenFloatArg ? PPCISD::CR6SET : PPCISD::CR6UNSET, dl,
	VTs, ArrayRef(Ops, InGlue.getNode() ? 2 : 1));

	InGlue = Chain.getValue(1);
	}

	if (IsTailCall)
	PrepareTailCall(DAG, InGlue, Chain, dl, SPDiff, NumBytes, LROp, FPOp,
	TailCallArguments);

	return FinishCall(CFlags, dl, DAG, RegsToPass, InGlue, Chain, CallSeqStart,
	Callee, SPDiff, NumBytes, Ins, InVals, CB);
	}

	// Copy an argument into memory, being careful to do this outside the
	// call sequence for the call to which the argument belongs.
	SDValue PPCTargetLowering::createMemcpyOutsideCallSeq(
	SDValue Arg, SDValue PtrOff, SDValue CallSeqStart, ISD::ArgFlagsTy Flags,
	SelectionDAG &DAG, const SDLoc &dl) const {
	SDValue MemcpyCall = CreateCopyOfByValArgument(Arg, PtrOff,
	CallSeqStart.getNode()->getOperand(0),
	Flags, DAG, dl);
	// The MEMCPY must go outside the CALLSEQ_START..END.
	int64_t FrameSize = CallSeqStart.getConstantOperandVal(1);
	SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall, FrameSize, 0,
	SDLoc(MemcpyCall));
	DAG.ReplaceAllUsesWith(CallSeqStart.getNode(),
	NewCallSeqStart.getNode());
	return NewCallSeqStart;
	}

	SDValue PPCTargetLowering::LowerCall_64SVR4(
	SDValue Chain, SDValue Callee, CallFlags CFlags,
	const SmallVectorImpl<ISD::OutputArg> &Outs,
	const SmallVectorImpl<SDValue> &OutVals,
	const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
	SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals,
	const CallBase *CB) const {
	bool isELFv2ABI = Subtarget.isELFv2ABI();
	bool isLittleEndian = Subtarget.isLittleEndian();
	unsigned NumOps = Outs.size();
	bool IsSibCall = false;
	bool IsFastCall = CFlags.CallConv == CallingConv::Fast;

	EVT PtrVT = getPointerTy(DAG.getDataLayout());
	unsigned PtrByteSize = 8;

	MachineFunction &MF = DAG.getMachineFunction();

	if (CFlags.IsTailCall && !getTargetMachine().Options.GuaranteedTailCallOpt)
	IsSibCall = true;

	// Mark this function as potentially containing a function that contains a
	// tail call. As a consequence the frame pointer will be used for dynamicalloc
	// and restoring the callers stack pointer in this functions epilog. This is
	// done because by tail calling the called function might overwrite the value
	// in this function's (MF) stack pointer stack slot 0(SP).
	if (getTargetMachine().Options.GuaranteedTailCallOpt && IsFastCall)
	MF.getInfo<PPCFunctionInfo>()->setHasFastCall();

	assert(!(IsFastCall && CFlags.IsVarArg) &&
	"fastcc not supported on varargs functions");

	// Count how many bytes are to be pushed on the stack, including the linkage
	// area, and parameter passing area. On ELFv1, the linkage area is 48 bytes
	// reserved space for [SP][CR][LR][2 x unused][TOC]; on ELFv2, the linkage
	// area is 32 bytes reserved space for [SP][CR][LR][TOC].
	unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
	unsigned NumBytes = LinkageSize;
	unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;

	static const MCPhysReg GPR[] = {
	PPC::X3, PPC::X4, PPC::X5, PPC::X6,
	PPC::X7, PPC::X8, PPC::X9, PPC::X10,
	};
	static const MCPhysReg VR[] = {
	PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
	PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
	};

	const unsigned NumGPRs = std::size(GPR);
	const unsigned NumFPRs = useSoftFloat() ? 0 : 13;
	const unsigned NumVRs = std::size(VR);

	// On ELFv2, we can avoid allocating the parameter area if all the arguments
	// can be passed to the callee in registers.
	// For the fast calling convention, there is another check below.
	// Note: We should keep consistent with LowerFormalArguments_64SVR4()
	bool HasParameterArea = !isELFv2ABI \|\| CFlags.IsVarArg \|\| IsFastCall;
	if (!HasParameterArea) {
	unsigned ParamAreaSize = NumGPRs * PtrByteSize;
	unsigned AvailableFPRs = NumFPRs;
	unsigned AvailableVRs = NumVRs;
	unsigned NumBytesTmp = NumBytes;
	for (unsigned i = 0; i != NumOps; ++i) {
	if (Outs[i].Flags.isNest()) continue;
	if (CalculateStackSlotUsed(Outs[i].VT, Outs[i].ArgVT, Outs[i].Flags,
	PtrByteSize, LinkageSize, ParamAreaSize,
	NumBytesTmp, AvailableFPRs, AvailableVRs))
	HasParameterArea = true;
	}
	}

	// When using the fast calling convention, we don't provide backing for
	// arguments that will be in registers.
	unsigned NumGPRsUsed = 0, NumFPRsUsed = 0, NumVRsUsed = 0;

	// Avoid allocating parameter area for fastcc functions if all the arguments
	// can be passed in the registers.
	if (IsFastCall)
	HasParameterArea = false;

	// Add up all the space actually used.
	for (unsigned i = 0; i != NumOps; ++i) {
	ISD::ArgFlagsTy Flags = Outs[i].Flags;
	EVT ArgVT = Outs[i].VT;
	EVT OrigVT = Outs[i].ArgVT;

	if (Flags.isNest())
	continue;

	if (IsFastCall) {
	if (Flags.isByVal()) {
	NumGPRsUsed += (Flags.getByValSize()+7)/8;
	if (NumGPRsUsed > NumGPRs)
	HasParameterArea = true;
	} else {
	switch (ArgVT.getSimpleVT().SimpleTy) {
	default: llvm_unreachable("Unexpected ValueType for argument!");
	case MVT::i1:
	case MVT::i32:
	case MVT::i64:
	if (++NumGPRsUsed <= NumGPRs)
	continue;
	break;
	case MVT::v4i32:
	case MVT::v8i16:
	case MVT::v16i8:
	case MVT::v2f64:
	case MVT::v2i64:
	case MVT::v1i128:
	case MVT::f128:
	if (++NumVRsUsed <= NumVRs)
	continue;
	break;
	case MVT::v4f32:
	if (++NumVRsUsed <= NumVRs)
	continue;
	break;
	case MVT::f32:
	case MVT::f64:
	if (++NumFPRsUsed <= NumFPRs)
	continue;
	break;
	}
	HasParameterArea = true;
	}
	}

	/* Respect alignment of argument on the stack. */
	auto Alignement =
	CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
	NumBytes = alignTo(NumBytes, Alignement);

	NumBytes += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize);
	if (Flags.isInConsecutiveRegsLast())
	NumBytes = ((NumBytes + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
	}

	unsigned NumBytesActuallyUsed = NumBytes;

	// In the old ELFv1 ABI,
	// the prolog code of the callee may store up to 8 GPR argument registers to
	// the stack, allowing va_start to index over them in memory if its varargs.
	// Because we cannot tell if this is needed on the caller side, we have to
	// conservatively assume that it is needed. As such, make sure we have at
	// least enough stack space for the caller to store the 8 GPRs.
	// In the ELFv2 ABI, we allocate the parameter area iff a callee
	// really requires memory operands, e.g. a vararg function.
	if (HasParameterArea)
	NumBytes = std::max(NumBytes, LinkageSize + 8 * PtrByteSize);
	else
	NumBytes = LinkageSize;

	// Tail call needs the stack to be aligned.
	if (getTargetMachine().Options.GuaranteedTailCallOpt && IsFastCall)
	NumBytes = EnsureStackAlignment(Subtarget.getFrameLowering(), NumBytes);

	int SPDiff = 0;

	// Calculate by how many bytes the stack has to be adjusted in case of tail
	// call optimization.
	if (!IsSibCall)
	SPDiff = CalculateTailCallSPDiff(DAG, CFlags.IsTailCall, NumBytes);

	// To protect arguments on the stack from being clobbered in a tail call,
	// force all the loads to happen before doing any other lowering.
	if (CFlags.IsTailCall)
	Chain = DAG.getStackArgumentTokenFactor(Chain);

	// Adjust the stack pointer for the new arguments...
	// These operations are automatically eliminated by the prolog/epilog pass
	if (!IsSibCall)
	Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
	SDValue CallSeqStart = Chain;

	// Load the return address and frame pointer so it can be move somewhere else
	// later.
	SDValue LROp, FPOp;
	Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, dl);

	// Set up a copy of the stack pointer for use loading and storing any
	// arguments that may not fit in the registers available for argument
	// passing.
	SDValue StackPtr = DAG.getRegister(PPC::X1, MVT::i64);

	// Figure out which arguments are going to go in registers, and which in
	// memory. Also, if this is a vararg function, floating point operations
	// must be stored to our stack, and loaded into integer regs as well, if
	// any integer regs are available for argument passing.
	unsigned ArgOffset = LinkageSize;

	SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
	SmallVector<TailCallArgumentInfo, 8> TailCallArguments;

	SmallVector<SDValue, 8> MemOpChains;
	for (unsigned i = 0; i != NumOps; ++i) {
	SDValue Arg = OutVals[i];
	ISD::ArgFlagsTy Flags = Outs[i].Flags;
	EVT ArgVT = Outs[i].VT;
	EVT OrigVT = Outs[i].ArgVT;

	// PtrOff will be used to store the current argument to the stack if a
	// register cannot be found for it.
	SDValue PtrOff;

	// We re-align the argument offset for each argument, except when using the
	// fast calling convention, when we need to make sure we do that only when
	// we'll actually use a stack slot.
	auto ComputePtrOff = [&]() {
	/* Respect alignment of argument on the stack. */
	auto Alignment =
	CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
	ArgOffset = alignTo(ArgOffset, Alignment);

	PtrOff = DAG.getConstant(ArgOffset, dl, StackPtr.getValueType());

	PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
	};

	if (!IsFastCall) {
	ComputePtrOff();

	/* Compute GPR index associated with argument offset. */
	GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
	GPR_idx = std::min(GPR_idx, NumGPRs);
	}

	// Promote integers to 64-bit values.
	if (Arg.getValueType() == MVT::i32 \|\| Arg.getValueType() == MVT::i1) {
	// FIXME: Should this use ANY_EXTEND if neither sext nor zext?
	unsigned ExtOp = Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
	Arg = DAG.getNode(ExtOp, dl, MVT::i64, Arg);
	}

	// FIXME memcpy is used way more than necessary. Correctness first.
	// Note: "by value" is code for passing a structure by value, not
	// basic types.
	if (Flags.isByVal()) {
	// Note: Size includes alignment padding, so
	// struct x { short a; char b; }
	// will have Size = 4. With #pragma pack(1), it will have Size = 3.
	// These are the proper values we need for right-justifying the
	// aggregate in a parameter register.
	unsigned Size = Flags.getByValSize();

	// An empty aggregate parameter takes up no storage and no
	// registers.
	if (Size == 0)
	continue;

	if (IsFastCall)
	ComputePtrOff();

	// All aggregates smaller than 8 bytes must be passed right-justified.
	if (Size==1 \|\| Size==2 \|\| Size==4) {
	EVT VT = (Size==1) ? MVT::i8 : ((Size==2) ? MVT::i16 : MVT::i32);
	if (GPR_idx != NumGPRs) {
	SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, PtrVT, Chain, Arg,
	MachinePointerInfo(), VT);
	MemOpChains.push_back(Load.getValue(1));
	RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));

	ArgOffset += PtrByteSize;
	continue;
	}
	}

	if (GPR_idx == NumGPRs && Size < 8) {
	SDValue AddPtr = PtrOff;
	if (!isLittleEndian) {
	SDValue Const = DAG.getConstant(PtrByteSize - Size, dl,
	PtrOff.getValueType());
	AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
	}
	Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr,
	CallSeqStart,
	Flags, DAG, dl);
	ArgOffset += PtrByteSize;
	continue;
	}
	// Copy the object to parameter save area if it can not be entirely passed
	// by registers.
	// FIXME: we only need to copy the parts which need to be passed in
	// parameter save area. For the parts passed by registers, we don't need
	// to copy them to the stack although we need to allocate space for them
	// in parameter save area.
	if ((NumGPRs - GPR_idx) * PtrByteSize < Size)
	Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, PtrOff,
	CallSeqStart,
	Flags, DAG, dl);

	// When a register is available, pass a small aggregate right-justified.
	if (Size < 8 && GPR_idx != NumGPRs) {
	// The easiest way to get this right-justified in a register
	// is to copy the structure into the rightmost portion of a
	// local variable slot, then load the whole slot into the
	// register.
	// FIXME: The memcpy seems to produce pretty awful code for
	// small aggregates, particularly for packed ones.
	// FIXME: It would be preferable to use the slot in the
	// parameter save area instead of a new local variable.
	SDValue AddPtr = PtrOff;
	if (!isLittleEndian) {
	SDValue Const = DAG.getConstant(8 - Size, dl, PtrOff.getValueType());
	AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
	}
	Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr,
	CallSeqStart,
	Flags, DAG, dl);

	// Load the slot into the register.
	SDValue Load =
	DAG.getLoad(PtrVT, dl, Chain, PtrOff, MachinePointerInfo());
	MemOpChains.push_back(Load.getValue(1));
	RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));

	// Done with this argument.
	ArgOffset += PtrByteSize;
	continue;
	}

	// For aggregates larger than PtrByteSize, copy the pieces of the
	// object that fit into registers from the parameter save area.
	for (unsigned j=0; j<Size; j+=PtrByteSize) {
	SDValue Const = DAG.getConstant(j, dl, PtrOff.getValueType());
	SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const);
	if (GPR_idx != NumGPRs) {
	unsigned LoadSizeInBits = std::min(PtrByteSize, (Size - j)) * 8;
	EVT ObjType = EVT::getIntegerVT(*DAG.getContext(), LoadSizeInBits);
	SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, PtrVT, Chain, AddArg,
	MachinePointerInfo(), ObjType);

	MemOpChains.push_back(Load.getValue(1));
	RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
	ArgOffset += PtrByteSize;
	} else {
	ArgOffset += ((Size - j + PtrByteSize-1)/PtrByteSize)*PtrByteSize;
	break;
	}
	}
	continue;
	}

	switch (Arg.getSimpleValueType().SimpleTy) {
	default: llvm_unreachable("Unexpected ValueType for argument!");
	case MVT::i1:
	case MVT::i32:
	case MVT::i64:
	if (Flags.isNest()) {
	// The 'nest' parameter, if any, is passed in R11.
	RegsToPass.push_back(std::make_pair(PPC::X11, Arg));
	break;
	}

	// These can be scalar arguments or elements of an integer array type
	// passed directly. Clang may use those instead of "byval" aggregate
	// types to avoid forcing arguments to memory unnecessarily.
	if (GPR_idx != NumGPRs) {
	RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Arg));
	} else {
	if (IsFastCall)
	ComputePtrOff();

	assert(HasParameterArea &&
	"Parameter area must exist to pass an argument in memory.");
	LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
	true, CFlags.IsTailCall, false, MemOpChains,
	TailCallArguments, dl);
	if (IsFastCall)
	ArgOffset += PtrByteSize;
	}
	if (!IsFastCall)
	ArgOffset += PtrByteSize;
	break;
	case MVT::f32:
	case MVT::f64: {
	// These can be scalar arguments or elements of a float array type
	// passed directly. The latter are used to implement ELFv2 homogenous
	// float aggregates.

	// Named arguments go into FPRs first, and once they overflow, the
	// remaining arguments go into GPRs and then the parameter save area.
	// Unnamed arguments for vararg functions always go to GPRs and
	// then the parameter save area. For now, put all arguments to vararg
	// routines always in both locations (FPR and GPR or stack slot).
	bool NeedGPROrStack = CFlags.IsVarArg \|\| FPR_idx == NumFPRs;
	bool NeededLoad = false;

	// First load the argument into the next available FPR.
	if (FPR_idx != NumFPRs)
	RegsToPass.push_back(std::make_pair(FPR[FPR_idx++], Arg));

	// Next, load the argument into GPR or stack slot if needed.
	if (!NeedGPROrStack)
	;
	else if (GPR_idx != NumGPRs && !IsFastCall) {
	// FIXME: We may want to re-enable this for CallingConv::Fast on the P8
	// once we support fp <-> gpr moves.

	// In the non-vararg case, this can only ever happen in the
	// presence of f32 array types, since otherwise we never run
	// out of FPRs before running out of GPRs.
	SDValue ArgVal;

	// Double values are always passed in a single GPR.
	if (Arg.getValueType() != MVT::f32) {
	ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i64, Arg);

	// Non-array float values are extended and passed in a GPR.
	} else if (!Flags.isInConsecutiveRegs()) {
	ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);
	ArgVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i64, ArgVal);

	// If we have an array of floats, we collect every odd element
	// together with its predecessor into one GPR.
	} else if (ArgOffset % PtrByteSize != 0) {
	SDValue Lo, Hi;
	Lo = DAG.getNode(ISD::BITCAST, dl, MVT::i32, OutVals[i - 1]);
	Hi = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);
	if (!isLittleEndian)
	std::swap(Lo, Hi);
	ArgVal = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi);

	// The final element, if even, goes into the first half of a GPR.
	} else if (Flags.isInConsecutiveRegsLast()) {
	ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);
	ArgVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i64, ArgVal);
	if (!isLittleEndian)
	ArgVal = DAG.getNode(ISD::SHL, dl, MVT::i64, ArgVal,
	DAG.getConstant(32, dl, MVT::i32));

	// Non-final even elements are skipped; they will be handled
	// together the with subsequent argument on the next go-around.
	} else
	ArgVal = SDValue();

	if (ArgVal.getNode())
	RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], ArgVal));
	} else {
	if (IsFastCall)
	ComputePtrOff();

	// Single-precision floating-point values are mapped to the
	// second (rightmost) word of the stack doubleword.
	if (Arg.getValueType() == MVT::f32 &&
	!isLittleEndian && !Flags.isInConsecutiveRegs()) {
	SDValue ConstFour = DAG.getConstant(4, dl, PtrOff.getValueType());
	PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour);
	}

	assert(HasParameterArea &&
	"Parameter area must exist to pass an argument in memory.");
	LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
	true, CFlags.IsTailCall, false, MemOpChains,
	TailCallArguments, dl);

	NeededLoad = true;
	}
	// When passing an array of floats, the array occupies consecutive
	// space in the argument area; only round up to the next doubleword
	// at the end of the array. Otherwise, each float takes 8 bytes.
	if (!IsFastCall \|\| NeededLoad) {
	ArgOffset += (Arg.getValueType() == MVT::f32 &&
	Flags.isInConsecutiveRegs()) ? 4 : 8;
	if (Flags.isInConsecutiveRegsLast())
	ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
	}
	break;
	}
	case MVT::v4f32:
	case MVT::v4i32:
	case MVT::v8i16:
	case MVT::v16i8:
	case MVT::v2f64:
	case MVT::v2i64:
	case MVT::v1i128:
	case MVT::f128:
	// These can be scalar arguments or elements of a vector array type
	// passed directly. The latter are used to implement ELFv2 homogenous
	// vector aggregates.

	// For a varargs call, named arguments go into VRs or on the stack as
	// usual; unnamed arguments always go to the stack or the corresponding
	// GPRs when within range. For now, we always put the value in both
	// locations (or even all three).
	if (CFlags.IsVarArg) {
	assert(HasParameterArea &&
	"Parameter area must exist if we have a varargs call.");
	// We could elide this store in the case where the object fits
	// entirely in R registers. Maybe later.
	SDValue Store =
	DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo());
	MemOpChains.push_back(Store);
	if (VR_idx != NumVRs) {
	SDValue Load =
	DAG.getLoad(MVT::v4f32, dl, Store, PtrOff, MachinePointerInfo());
	MemOpChains.push_back(Load.getValue(1));
	RegsToPass.push_back(std::make_pair(VR[VR_idx++], Load));
	}
	ArgOffset += 16;
	for (unsigned i=0; i<16; i+=PtrByteSize) {
	if (GPR_idx == NumGPRs)
	break;
	SDValue Ix = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff,
	DAG.getConstant(i, dl, PtrVT));
	SDValue Load =
	DAG.getLoad(PtrVT, dl, Store, Ix, MachinePointerInfo());
	MemOpChains.push_back(Load.getValue(1));
	RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
	}
	break;
	}

	// Non-varargs Altivec params go into VRs or on the stack.
	if (VR_idx != NumVRs) {
	RegsToPass.push_back(std::make_pair(VR[VR_idx++], Arg));
	} else {
	if (IsFastCall)
	ComputePtrOff();

	assert(HasParameterArea &&
	"Parameter area must exist to pass an argument in memory.");
	LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
	true, CFlags.IsTailCall, true, MemOpChains,
	TailCallArguments, dl);
	if (IsFastCall)
	ArgOffset += 16;
	}

	if (!IsFastCall)
	ArgOffset += 16;
	break;
	}
	}

	assert((!HasParameterArea \|\| NumBytesActuallyUsed == ArgOffset) &&
	"mismatch in size of parameter area");
	(void)NumBytesActuallyUsed;

	if (!MemOpChains.empty())
	Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);

	// Check if this is an indirect call (MTCTR/BCTRL).
	// See prepareDescriptorIndirectCall and buildCallOperands for more
	// information about calls through function pointers in the 64-bit SVR4 ABI.
	if (CFlags.IsIndirect) {
	// For 64-bit ELFv2 ABI with PCRel, do not save the TOC of the
	// caller in the TOC save area.
	if (isTOCSaveRestoreRequired(Subtarget)) {
	assert(!CFlags.IsTailCall && "Indirect tails calls not supported");
	// Load r2 into a virtual register and store it to the TOC save area.
	setUsesTOCBasePtr(DAG);
	SDValue Val = DAG.getCopyFromReg(Chain, dl, PPC::X2, MVT::i64);
	// TOC save area offset.
	unsigned TOCSaveOffset = Subtarget.getFrameLowering()->getTOCSaveOffset();
	SDValue PtrOff = DAG.getIntPtrConstant(TOCSaveOffset, dl);
	SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
	Chain = DAG.getStore(Val.getValue(1), dl, Val, AddPtr,
	MachinePointerInfo::getStack(
	DAG.getMachineFunction(), TOCSaveOffset));
	}
	// In the ELFv2 ABI, R12 must contain the address of an indirect callee.
	// This does not mean the MTCTR instruction must use R12; it's easier
	// to model this as an extra parameter, so do that.
	if (isELFv2ABI && !CFlags.IsPatchPoint)
	RegsToPass.push_back(std::make_pair((unsigned)PPC::X12, Callee));
	}

	// Build a sequence of copy-to-reg nodes chained together with token chain
	// and flag operands which copy the outgoing args into the appropriate regs.
	SDValue InGlue;
	for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
	Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
	RegsToPass[i].second, InGlue);
	InGlue = Chain.getValue(1);
	}

	if (CFlags.IsTailCall && !IsSibCall)
	PrepareTailCall(DAG, InGlue, Chain, dl, SPDiff, NumBytes, LROp, FPOp,
	TailCallArguments);

	return FinishCall(CFlags, dl, DAG, RegsToPass, InGlue, Chain, CallSeqStart,
	Callee, SPDiff, NumBytes, Ins, InVals, CB);
	}

	// Returns true when the shadow of a general purpose argument register
	// in the parameter save area is aligned to at least 'RequiredAlign'.
	static bool isGPRShadowAligned(MCPhysReg Reg, Align RequiredAlign) {
	assert(RequiredAlign.value() <= 16 &&
	"Required alignment greater than stack alignment.");
	switch (Reg) {
	default:
	report_fatal_error("called on invalid register.");
	case PPC::R5:
	case PPC::R9:
	case PPC::X3:
	case PPC::X5:
	case PPC::X7:
	case PPC::X9:
	// These registers are 16 byte aligned which is the most strict aligment
	// we can support.
	return true;
	case PPC::R3:
	case PPC::R7:
	case PPC::X4:
	case PPC::X6:
	case PPC::X8:
	case PPC::X10:
	// The shadow of these registers in the PSA is 8 byte aligned.
	return RequiredAlign <= 8;
	case PPC::R4:
	case PPC::R6:
	case PPC::R8:
	case PPC::R10:
	return RequiredAlign <= 4;
	}
	}

	static bool CC_AIX(unsigned ValNo, MVT ValVT, MVT LocVT,
	CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags,
	CCState &S) {
	AIXCCState &State = static_cast<AIXCCState &>(S);
	const PPCSubtarget &Subtarget = static_cast<const PPCSubtarget &>(
	State.getMachineFunction().getSubtarget());
	const bool IsPPC64 = Subtarget.isPPC64();
	const unsigned PtrSize = IsPPC64 ? 8 : 4;
	const Align PtrAlign(PtrSize);
	const Align StackAlign(16);
	const MVT RegVT = IsPPC64 ? MVT::i64 : MVT::i32;

	if (ValVT == MVT::f128)
	report_fatal_error("f128 is unimplemented on AIX.");

	if (ArgFlags.isNest())
	report_fatal_error("Nest arguments are unimplemented.");

	static const MCPhysReg GPR_32[] = {// 32-bit registers.
	PPC::R3, PPC::R4, PPC::R5, PPC::R6,
	PPC::R7, PPC::R8, PPC::R9, PPC::R10};
	static const MCPhysReg GPR_64[] = {// 64-bit registers.
	PPC::X3, PPC::X4, PPC::X5, PPC::X6,
	PPC::X7, PPC::X8, PPC::X9, PPC::X10};

	static const MCPhysReg VR[] = {// Vector registers.
	PPC::V2, PPC::V3, PPC::V4, PPC::V5,
	PPC::V6, PPC::V7, PPC::V8, PPC::V9,
	PPC::V10, PPC::V11, PPC::V12, PPC::V13};

	const ArrayRef<MCPhysReg> GPRs = IsPPC64 ? GPR_64 : GPR_32;

	if (ArgFlags.isByVal()) {
	const Align ByValAlign(ArgFlags.getNonZeroByValAlign());
	if (ByValAlign > StackAlign)
	report_fatal_error("Pass-by-value arguments with alignment greater than "
	"16 are not supported.");

	const unsigned ByValSize = ArgFlags.getByValSize();
	const Align ObjAlign = ByValAlign > PtrAlign ? ByValAlign : PtrAlign;

	// An empty aggregate parameter takes up no storage and no registers,
	// but needs a MemLoc for a stack slot for the formal arguments side.
	if (ByValSize == 0) {
	State.addLoc(CCValAssign::getMem(ValNo, MVT::INVALID_SIMPLE_VALUE_TYPE,
	State.getStackSize(), RegVT, LocInfo));
	return false;
	}

	// Shadow allocate any registers that are not properly aligned.
	unsigned NextReg = State.getFirstUnallocated(GPRs);
	while (NextReg != GPRs.size() &&
	!isGPRShadowAligned(GPRs[NextReg], ObjAlign)) {
	// Shadow allocate next registers since its aligment is not strict enough.
	unsigned Reg = State.AllocateReg(GPRs);
	// Allocate the stack space shadowed by said register.
	State.AllocateStack(PtrSize, PtrAlign);
	assert(Reg && "Alocating register unexpectedly failed.");
	(void)Reg;
	NextReg = State.getFirstUnallocated(GPRs);
	}

	const unsigned StackSize = alignTo(ByValSize, ObjAlign);
	unsigned Offset = State.AllocateStack(StackSize, ObjAlign);
	for (const unsigned E = Offset + StackSize; Offset < E; Offset += PtrSize) {
	if (unsigned Reg = State.AllocateReg(GPRs))
	State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, RegVT, LocInfo));
	else {
	State.addLoc(CCValAssign::getMem(ValNo, MVT::INVALID_SIMPLE_VALUE_TYPE,
	Offset, MVT::INVALID_SIMPLE_VALUE_TYPE,
	LocInfo));
	break;
	}
	}
	return false;
	}

	// Arguments always reserve parameter save area.
	switch (ValVT.SimpleTy) {
	default:
	report_fatal_error("Unhandled value type for argument.");
	case MVT::i64:
	// i64 arguments should have been split to i32 for PPC32.
	assert(IsPPC64 && "PPC32 should have split i64 values.");
	[[fallthrough]];
	case MVT::i1:
	case MVT::i32: {
	const unsigned Offset = State.AllocateStack(PtrSize, PtrAlign);
	// AIX integer arguments are always passed in register width.
	if (ValVT.getFixedSizeInBits() < RegVT.getFixedSizeInBits())
	LocInfo = ArgFlags.isSExt() ? CCValAssign::LocInfo::SExt
	: CCValAssign::LocInfo::ZExt;
	if (unsigned Reg = State.AllocateReg(GPRs))
	State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, RegVT, LocInfo));
	else
	State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, RegVT, LocInfo));

	return false;
	}
	case MVT::f32:
	case MVT::f64: {
	// Parameter save area (PSA) is reserved even if the float passes in fpr.
	const unsigned StoreSize = LocVT.getStoreSize();
	// Floats are always 4-byte aligned in the PSA on AIX.
	// This includes f64 in 64-bit mode for ABI compatibility.
	const unsigned Offset =
	State.AllocateStack(IsPPC64 ? 8 : StoreSize, Align(4));
	unsigned FReg = State.AllocateReg(FPR);
	if (FReg)
	State.addLoc(CCValAssign::getReg(ValNo, ValVT, FReg, LocVT, LocInfo));

	// Reserve and initialize GPRs or initialize the PSA as required.
	for (unsigned I = 0; I < StoreSize; I += PtrSize) {
	if (unsigned Reg = State.AllocateReg(GPRs)) {
	assert(FReg && "An FPR should be available when a GPR is reserved.");
	if (State.isVarArg()) {
	// Successfully reserved GPRs are only initialized for vararg calls.
	// Custom handling is required for:
	// f64 in PPC32 needs to be split into 2 GPRs.
	// f32 in PPC64 needs to occupy only lower 32 bits of 64-bit GPR.
	State.addLoc(
	CCValAssign::getCustomReg(ValNo, ValVT, Reg, RegVT, LocInfo));
	}
	} else {
	// If there are insufficient GPRs, the PSA needs to be initialized.
	// Initialization occurs even if an FPR was initialized for
	// compatibility with the AIX XL compiler. The full memory for the
	// argument will be initialized even if a prior word is saved in GPR.
	// A custom memLoc is used when the argument also passes in FPR so
	// that the callee handling can skip over it easily.
	State.addLoc(
	FReg ? CCValAssign::getCustomMem(ValNo, ValVT, Offset, LocVT,
	LocInfo)
	: CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
	break;
	}
	}

	return false;
	}
	case MVT::v4f32:
	case MVT::v4i32:
	case MVT::v8i16:
	case MVT::v16i8:
	case MVT::v2i64:
	case MVT::v2f64:
	case MVT::v1i128: {
	const unsigned VecSize = 16;
	const Align VecAlign(VecSize);

	if (!State.isVarArg()) {
	// If there are vector registers remaining we don't consume any stack
	// space.
	if (unsigned VReg = State.AllocateReg(VR)) {
	State.addLoc(CCValAssign::getReg(ValNo, ValVT, VReg, LocVT, LocInfo));
	return false;
	}
	// Vectors passed on the stack do not shadow GPRs or FPRs even though they
	// might be allocated in the portion of the PSA that is shadowed by the
	// GPRs.
	const unsigned Offset = State.AllocateStack(VecSize, VecAlign);
	State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
	return false;
	}

	unsigned NextRegIndex = State.getFirstUnallocated(GPRs);
	// Burn any underaligned registers and their shadowed stack space until
	// we reach the required alignment.
	while (NextRegIndex != GPRs.size() &&
	!isGPRShadowAligned(GPRs[NextRegIndex], VecAlign)) {
	// Shadow allocate register and its stack shadow.
	unsigned Reg = State.AllocateReg(GPRs);
	State.AllocateStack(PtrSize, PtrAlign);
	assert(Reg && "Allocating register unexpectedly failed.");
	(void)Reg;
	NextRegIndex = State.getFirstUnallocated(GPRs);
	}

	// Vectors that are passed as fixed arguments are handled differently.
	// They are passed in VRs if any are available (unlike arguments passed
	// through ellipses) and shadow GPRs (unlike arguments to non-vaarg
	// functions)
	if (State.isFixed(ValNo)) {
	if (unsigned VReg = State.AllocateReg(VR)) {
	State.addLoc(CCValAssign::getReg(ValNo, ValVT, VReg, LocVT, LocInfo));
	// Shadow allocate GPRs and stack space even though we pass in a VR.
	for (unsigned I = 0; I != VecSize; I += PtrSize)
	State.AllocateReg(GPRs);
	State.AllocateStack(VecSize, VecAlign);
	return false;
	}
	// No vector registers remain so pass on the stack.
	const unsigned Offset = State.AllocateStack(VecSize, VecAlign);
	State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
	return false;
	}

	// If all GPRS are consumed then we pass the argument fully on the stack.
	if (NextRegIndex == GPRs.size()) {
	const unsigned Offset = State.AllocateStack(VecSize, VecAlign);
	State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
	return false;
	}

	// Corner case for 32-bit codegen. We have 2 registers to pass the first
	// half of the argument, and then need to pass the remaining half on the
	// stack.
	if (GPRs[NextRegIndex] == PPC::R9) {
	const unsigned Offset = State.AllocateStack(VecSize, VecAlign);
	State.addLoc(
	CCValAssign::getCustomMem(ValNo, ValVT, Offset, LocVT, LocInfo));

	const unsigned FirstReg = State.AllocateReg(PPC::R9);
	const unsigned SecondReg = State.AllocateReg(PPC::R10);
	assert(FirstReg && SecondReg &&
	"Allocating R9 or R10 unexpectedly failed.");
	State.addLoc(
	CCValAssign::getCustomReg(ValNo, ValVT, FirstReg, RegVT, LocInfo));
	State.addLoc(
	CCValAssign::getCustomReg(ValNo, ValVT, SecondReg, RegVT, LocInfo));
	return false;
	}

	// We have enough GPRs to fully pass the vector argument, and we have
	// already consumed any underaligned registers. Start with the custom
	// MemLoc and then the custom RegLocs.
	const unsigned Offset = State.AllocateStack(VecSize, VecAlign);
	State.addLoc(
	CCValAssign::getCustomMem(ValNo, ValVT, Offset, LocVT, LocInfo));
	for (unsigned I = 0; I != VecSize; I += PtrSize) {
	const unsigned Reg = State.AllocateReg(GPRs);
	assert(Reg && "Failed to allocated register for vararg vector argument");
	State.addLoc(
	CCValAssign::getCustomReg(ValNo, ValVT, Reg, RegVT, LocInfo));
	}
	return false;
	}
	}
	return true;
	}

	// So far, this function is only used by LowerFormalArguments_AIX()
	static const TargetRegisterClass *getRegClassForSVT(MVT::SimpleValueType SVT,
	bool IsPPC64,
	bool HasP8Vector,
	bool HasVSX) {
	assert((IsPPC64 \|\| SVT != MVT::i64) &&
	"i64 should have been split for 32-bit codegen.");

	switch (SVT) {
	default:
	report_fatal_error("Unexpected value type for formal argument");
	case MVT::i1:
	case MVT::i32:
	case MVT::i64:
	return IsPPC64 ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
	case MVT::f32:
	return HasP8Vector ? &PPC::VSSRCRegClass : &PPC::F4RCRegClass;
	case MVT::f64:
	return HasVSX ? &PPC::VSFRCRegClass : &PPC::F8RCRegClass;
	case MVT::v4f32:
	case MVT::v4i32:
	case MVT::v8i16:
	case MVT::v16i8:
	case MVT::v2i64:
	case MVT::v2f64:
	case MVT::v1i128:
	return &PPC::VRRCRegClass;
	}
	}

	static SDValue truncateScalarIntegerArg(ISD::ArgFlagsTy Flags, EVT ValVT,
	SelectionDAG &DAG, SDValue ArgValue,
	MVT LocVT, const SDLoc &dl) {
	assert(ValVT.isScalarInteger() && LocVT.isScalarInteger());
	assert(ValVT.getFixedSizeInBits() < LocVT.getFixedSizeInBits());

	if (Flags.isSExt())
	ArgValue = DAG.getNode(ISD::AssertSext, dl, LocVT, ArgValue,
	DAG.getValueType(ValVT));
	else if (Flags.isZExt())
	ArgValue = DAG.getNode(ISD::AssertZext, dl, LocVT, ArgValue,
	DAG.getValueType(ValVT));

	return DAG.getNode(ISD::TRUNCATE, dl, ValVT, ArgValue);
	}

	static unsigned mapArgRegToOffsetAIX(unsigned Reg, const PPCFrameLowering *FL) {
	const unsigned LASize = FL->getLinkageSize();

	if (PPC::GPRCRegClass.contains(Reg)) {
	assert(Reg >= PPC::R3 && Reg <= PPC::R10 &&
	"Reg must be a valid argument register!");
	return LASize + 4 * (Reg - PPC::R3);
	}

	if (PPC::G8RCRegClass.contains(Reg)) {
	assert(Reg >= PPC::X3 && Reg <= PPC::X10 &&
	"Reg must be a valid argument register!");
	return LASize + 8 * (Reg - PPC::X3);
	}

	llvm_unreachable("Only general purpose registers expected.");
	}

	// AIX ABI Stack Frame Layout:
	//
	// Low Memory +--------------------------------------------+
	// SP +---> \| Back chain \| ---+
	// \| +--------------------------------------------+ \|
	// \| \| Saved Condition Register \| \|
	// \| +--------------------------------------------+ \|
	// \| \| Saved Linkage Register \| \|
	// \| +--------------------------------------------+ \| Linkage Area
	// \| \| Reserved for compilers \| \|
	// \| +--------------------------------------------+ \|
	// \| \| Reserved for binders \| \|
	// \| +--------------------------------------------+ \|
	// \| \| Saved TOC pointer \| ---+
	// \| +--------------------------------------------+
	// \| \| Parameter save area \|
	// \| +--------------------------------------------+
	// \| \| Alloca space \|
	// \| +--------------------------------------------+
	// \| \| Local variable space \|
	// \| +--------------------------------------------+
	// \| \| Float/int conversion temporary \|
	// \| +--------------------------------------------+
	// \| \| Save area for AltiVec registers \|
	// \| +--------------------------------------------+
	// \| \| AltiVec alignment padding \|
	// \| +--------------------------------------------+
	// \| \| Save area for VRSAVE register \|
	// \| +--------------------------------------------+
	// \| \| Save area for General Purpose registers \|
	// \| +--------------------------------------------+
	// \| \| Save area for Floating Point registers \|
	// \| +--------------------------------------------+
	// +---- \| Back chain \|
	// High Memory +--------------------------------------------+
	//
	// Specifications:
	// AIX 7.2 Assembler Language Reference
	// Subroutine linkage convention

	SDValue PPCTargetLowering::LowerFormalArguments_AIX(
	SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
	const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
	SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {

	assert((CallConv == CallingConv::C \|\| CallConv == CallingConv::Cold \|\|
	CallConv == CallingConv::Fast) &&
	"Unexpected calling convention!");

	if (getTargetMachine().Options.GuaranteedTailCallOpt)
	report_fatal_error("Tail call support is unimplemented on AIX.");

	if (useSoftFloat())
	report_fatal_error("Soft float support is unimplemented on AIX.");

	const PPCSubtarget &Subtarget = DAG.getSubtarget<PPCSubtarget>();

	const bool IsPPC64 = Subtarget.isPPC64();
	const unsigned PtrByteSize = IsPPC64 ? 8 : 4;

	// Assign locations to all of the incoming arguments.
	SmallVector<CCValAssign, 16> ArgLocs;
	MachineFunction &MF = DAG.getMachineFunction();
	MachineFrameInfo &MFI = MF.getFrameInfo();
	PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
	AIXCCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext());

	const EVT PtrVT = getPointerTy(MF.getDataLayout());
	// Reserve space for the linkage area on the stack.
	const unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
	CCInfo.AllocateStack(LinkageSize, Align(PtrByteSize));
	CCInfo.AnalyzeFormalArguments(Ins, CC_AIX);

	SmallVector<SDValue, 8> MemOps;

	for (size_t I = 0, End = ArgLocs.size(); I != End; /* No increment here */) {
	CCValAssign &VA = ArgLocs[I++];
	MVT LocVT = VA.getLocVT();
	MVT ValVT = VA.getValVT();
	ISD::ArgFlagsTy Flags = Ins[VA.getValNo()].Flags;
	// For compatibility with the AIX XL compiler, the float args in the
	// parameter save area are initialized even if the argument is available
	// in register. The caller is required to initialize both the register
	// and memory, however, the callee can choose to expect it in either.
	// The memloc is dismissed here because the argument is retrieved from
	// the register.
	if (VA.isMemLoc() && VA.needsCustom() && ValVT.isFloatingPoint())
	continue;

	auto HandleMemLoc = [&]() {
	const unsigned LocSize = LocVT.getStoreSize();
	const unsigned ValSize = ValVT.getStoreSize();
	assert((ValSize <= LocSize) &&
	"Object size is larger than size of MemLoc");
	int CurArgOffset = VA.getLocMemOffset();
	// Objects are right-justified because AIX is big-endian.
	if (LocSize > ValSize)
	CurArgOffset += LocSize - ValSize;
	// Potential tail calls could cause overwriting of argument stack slots.
	const bool IsImmutable =
	!(getTargetMachine().Options.GuaranteedTailCallOpt &&
	(CallConv == CallingConv::Fast));
	int FI = MFI.CreateFixedObject(ValSize, CurArgOffset, IsImmutable);
	SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
	SDValue ArgValue =
	DAG.getLoad(ValVT, dl, Chain, FIN, MachinePointerInfo());
	InVals.push_back(ArgValue);
	};

	// Vector arguments to VaArg functions are passed both on the stack, and
	// in any available GPRs. Load the value from the stack and add the GPRs
	// as live ins.
	if (VA.isMemLoc() && VA.needsCustom()) {
	assert(ValVT.isVector() && "Unexpected Custom MemLoc type.");
	assert(isVarArg && "Only use custom memloc for vararg.");
	// ValNo of the custom MemLoc, so we can compare it to the ValNo of the
	// matching custom RegLocs.
	const unsigned OriginalValNo = VA.getValNo();
	(void)OriginalValNo;

	auto HandleCustomVecRegLoc = [&]() {
	assert(I != End && ArgLocs[I].isRegLoc() && ArgLocs[I].needsCustom() &&
	"Missing custom RegLoc.");
	VA = ArgLocs[I++];
	assert(VA.getValVT().isVector() &&
	"Unexpected Val type for custom RegLoc.");
	assert(VA.getValNo() == OriginalValNo &&
	"ValNo mismatch between custom MemLoc and RegLoc.");
	MVT::SimpleValueType SVT = VA.getLocVT().SimpleTy;
	MF.addLiveIn(VA.getLocReg(),
	getRegClassForSVT(SVT, IsPPC64, Subtarget.hasP8Vector(),
	Subtarget.hasVSX()));
	};

	HandleMemLoc();
	// In 64-bit there will be exactly 2 custom RegLocs that follow, and in
	// in 32-bit there will be 2 custom RegLocs if we are passing in R9 and
	// R10.
	HandleCustomVecRegLoc();
	HandleCustomVecRegLoc();

	// If we are targeting 32-bit, there might be 2 extra custom RegLocs if
	// we passed the vector in R5, R6, R7 and R8.
	if (I != End && ArgLocs[I].isRegLoc() && ArgLocs[I].needsCustom()) {
	assert(!IsPPC64 &&
	"Only 2 custom RegLocs expected for 64-bit codegen.");
	HandleCustomVecRegLoc();
	HandleCustomVecRegLoc();
	}

	continue;
	}

	if (VA.isRegLoc()) {
	if (VA.getValVT().isScalarInteger())
	FuncInfo->appendParameterType(PPCFunctionInfo::FixedType);
	else if (VA.getValVT().isFloatingPoint() && !VA.getValVT().isVector()) {
	switch (VA.getValVT().SimpleTy) {
	default:
	report_fatal_error("Unhandled value type for argument.");
	case MVT::f32:
	FuncInfo->appendParameterType(PPCFunctionInfo::ShortFloatingPoint);
	break;
	case MVT::f64:
	FuncInfo->appendParameterType(PPCFunctionInfo::LongFloatingPoint);
	break;
	}
	} else if (VA.getValVT().isVector()) {
	switch (VA.getValVT().SimpleTy) {
	default:
	report_fatal_error("Unhandled value type for argument.");
	case MVT::v16i8:
	FuncInfo->appendParameterType(PPCFunctionInfo::VectorChar);
	break;
	case MVT::v8i16:
	FuncInfo->appendParameterType(PPCFunctionInfo::VectorShort);
	break;
	case MVT::v4i32:
	case MVT::v2i64:
	case MVT::v1i128:
	FuncInfo->appendParameterType(PPCFunctionInfo::VectorInt);
	break;
	case MVT::v4f32:
	case MVT::v2f64:
	FuncInfo->appendParameterType(PPCFunctionInfo::VectorFloat);
	break;
	}
	}
	}

	if (Flags.isByVal() && VA.isMemLoc()) {
	const unsigned Size =
	alignTo(Flags.getByValSize() ? Flags.getByValSize() : PtrByteSize,
	PtrByteSize);
	const int FI = MF.getFrameInfo().CreateFixedObject(
	Size, VA.getLocMemOffset(), /* IsImmutable */ false,
	/* IsAliased */ true);
	SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
	InVals.push_back(FIN);

	continue;
	}

	if (Flags.isByVal()) {
	assert(VA.isRegLoc() && "MemLocs should already be handled.");

	const MCPhysReg ArgReg = VA.getLocReg();
	const PPCFrameLowering *FL = Subtarget.getFrameLowering();

	const unsigned StackSize = alignTo(Flags.getByValSize(), PtrByteSize);
	const int FI = MF.getFrameInfo().CreateFixedObject(
	StackSize, mapArgRegToOffsetAIX(ArgReg, FL), /* IsImmutable */ false,
	/* IsAliased */ true);
	SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
	InVals.push_back(FIN);

	// Add live ins for all the RegLocs for the same ByVal.
	const TargetRegisterClass *RegClass =
	IsPPC64 ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;

	auto HandleRegLoc = [&, RegClass, LocVT](const MCPhysReg PhysReg,
	unsigned Offset) {
	const Register VReg = MF.addLiveIn(PhysReg, RegClass);
	// Since the callers side has left justified the aggregate in the
	// register, we can simply store the entire register into the stack
	// slot.
	SDValue CopyFrom = DAG.getCopyFromReg(Chain, dl, VReg, LocVT);
	// The store to the fixedstack object is needed becuase accessing a
	// field of the ByVal will use a gep and load. Ideally we will optimize
	// to extracting the value from the register directly, and elide the
	// stores when the arguments address is not taken, but that will need to
	// be future work.
	SDValue Store = DAG.getStore(
	CopyFrom.getValue(1), dl, CopyFrom,
	DAG.getObjectPtrOffset(dl, FIN, TypeSize::getFixed(Offset)),
	MachinePointerInfo::getFixedStack(MF, FI, Offset));

	MemOps.push_back(Store);
	};

	unsigned Offset = 0;
	HandleRegLoc(VA.getLocReg(), Offset);
	Offset += PtrByteSize;
	for (; Offset != StackSize && ArgLocs[I].isRegLoc();
	Offset += PtrByteSize) {
	assert(ArgLocs[I].getValNo() == VA.getValNo() &&
	"RegLocs should be for ByVal argument.");

	const CCValAssign RL = ArgLocs[I++];
	HandleRegLoc(RL.getLocReg(), Offset);
	FuncInfo->appendParameterType(PPCFunctionInfo::FixedType);
	}

	if (Offset != StackSize) {
	assert(ArgLocs[I].getValNo() == VA.getValNo() &&
	"Expected MemLoc for remaining bytes.");
	assert(ArgLocs[I].isMemLoc() && "Expected MemLoc for remaining bytes.");
	// Consume the MemLoc.The InVal has already been emitted, so nothing
	// more needs to be done.
	++I;
	}

	continue;
	}

	if (VA.isRegLoc() && !VA.needsCustom()) {
	MVT::SimpleValueType SVT = ValVT.SimpleTy;
	Register VReg =
	MF.addLiveIn(VA.getLocReg(),
	getRegClassForSVT(SVT, IsPPC64, Subtarget.hasP8Vector(),
	Subtarget.hasVSX()));
	SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, VReg, LocVT);
	if (ValVT.isScalarInteger() &&
	(ValVT.getFixedSizeInBits() < LocVT.getFixedSizeInBits())) {
	ArgValue =
	truncateScalarIntegerArg(Flags, ValVT, DAG, ArgValue, LocVT, dl);
	}
	InVals.push_back(ArgValue);
	continue;
	}
	if (VA.isMemLoc()) {
	HandleMemLoc();
	continue;
	}
	}

	// On AIX a minimum of 8 words is saved to the parameter save area.
	const unsigned MinParameterSaveArea = 8 * PtrByteSize;
	// Area that is at least reserved in the caller of this function.
	unsigned CallerReservedArea = std::max<unsigned>(
	CCInfo.getStackSize(), LinkageSize + MinParameterSaveArea);

	// Set the size that is at least reserved in caller of this function. Tail
	// call optimized function's reserved stack space needs to be aligned so
	// that taking the difference between two stack areas will result in an
	// aligned stack.
	CallerReservedArea =
	EnsureStackAlignment(Subtarget.getFrameLowering(), CallerReservedArea);
	FuncInfo->setMinReservedArea(CallerReservedArea);

	if (isVarArg) {
	FuncInfo->setVarArgsFrameIndex(
	MFI.CreateFixedObject(PtrByteSize, CCInfo.getStackSize(), true));
	SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);

	static const MCPhysReg GPR_32[] = {PPC::R3, PPC::R4, PPC::R5, PPC::R6,
	PPC::R7, PPC::R8, PPC::R9, PPC::R10};

	static const MCPhysReg GPR_64[] = {PPC::X3, PPC::X4, PPC::X5, PPC::X6,
	PPC::X7, PPC::X8, PPC::X9, PPC::X10};
	const unsigned NumGPArgRegs = std::size(IsPPC64 ? GPR_64 : GPR_32);

	// The fixed integer arguments of a variadic function are stored to the
	// VarArgsFrameIndex on the stack so that they may be loaded by
	// dereferencing the result of va_next.
	for (unsigned GPRIndex =
	(CCInfo.getStackSize() - LinkageSize) / PtrByteSize;
	GPRIndex < NumGPArgRegs; ++GPRIndex) {

	const Register VReg =
	IsPPC64 ? MF.addLiveIn(GPR_64[GPRIndex], &PPC::G8RCRegClass)
	: MF.addLiveIn(GPR_32[GPRIndex], &PPC::GPRCRegClass);

	SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
	SDValue Store =
	DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
	MemOps.push_back(Store);
	// Increment the address for the next argument to store.
	SDValue PtrOff = DAG.getConstant(PtrByteSize, dl, PtrVT);
	FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
	}
	}

	if (!MemOps.empty())
	Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);

	return Chain;
	}

	SDValue PPCTargetLowering::LowerCall_AIX(
	SDValue Chain, SDValue Callee, CallFlags CFlags,
	const SmallVectorImpl<ISD::OutputArg> &Outs,
	const SmallVectorImpl<SDValue> &OutVals,
	const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
	SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals,
	const CallBase *CB) const {
	// See PPCTargetLowering::LowerFormalArguments_AIX() for a description of the
	// AIX ABI stack frame layout.

	assert((CFlags.CallConv == CallingConv::C \|\|
	CFlags.CallConv == CallingConv::Cold \|\|
	CFlags.CallConv == CallingConv::Fast) &&
	"Unexpected calling convention!");

	if (CFlags.IsPatchPoint)
	report_fatal_error("This call type is unimplemented on AIX.");

	const PPCSubtarget &Subtarget = DAG.getSubtarget<PPCSubtarget>();

	MachineFunction &MF = DAG.getMachineFunction();
	SmallVector<CCValAssign, 16> ArgLocs;
	AIXCCState CCInfo(CFlags.CallConv, CFlags.IsVarArg, MF, ArgLocs,
	*DAG.getContext());

	// Reserve space for the linkage save area (LSA) on the stack.
	// In both PPC32 and PPC64 there are 6 reserved slots in the LSA:
	// [SP][CR][LR][2 x reserved][TOC].
	// The LSA is 24 bytes (6x4) in PPC32 and 48 bytes (6x8) in PPC64.
	const unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
	const bool IsPPC64 = Subtarget.isPPC64();
	const EVT PtrVT = getPointerTy(DAG.getDataLayout());
	const unsigned PtrByteSize = IsPPC64 ? 8 : 4;
	CCInfo.AllocateStack(LinkageSize, Align(PtrByteSize));
	CCInfo.AnalyzeCallOperands(Outs, CC_AIX);

	// The prolog code of the callee may store up to 8 GPR argument registers to
	// the stack, allowing va_start to index over them in memory if the callee
	// is variadic.
	// Because we cannot tell if this is needed on the caller side, we have to
	// conservatively assume that it is needed. As such, make sure we have at
	// least enough stack space for the caller to store the 8 GPRs.
	const unsigned MinParameterSaveAreaSize = 8 * PtrByteSize;
	const unsigned NumBytes = std::max<unsigned>(
	LinkageSize + MinParameterSaveAreaSize, CCInfo.getStackSize());

	// Adjust the stack pointer for the new arguments...
	// These operations are automatically eliminated by the prolog/epilog pass.
	Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
	SDValue CallSeqStart = Chain;

	SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
	SmallVector<SDValue, 8> MemOpChains;

	// Set up a copy of the stack pointer for loading and storing any
	// arguments that may not fit in the registers available for argument
	// passing.
	const SDValue StackPtr = IsPPC64 ? DAG.getRegister(PPC::X1, MVT::i64)
	: DAG.getRegister(PPC::R1, MVT::i32);

	for (unsigned I = 0, E = ArgLocs.size(); I != E;) {
	const unsigned ValNo = ArgLocs[I].getValNo();
	SDValue Arg = OutVals[ValNo];
	ISD::ArgFlagsTy Flags = Outs[ValNo].Flags;

	if (Flags.isByVal()) {
	const unsigned ByValSize = Flags.getByValSize();

	// Nothing to do for zero-sized ByVals on the caller side.
	if (!ByValSize) {
	++I;
	continue;
	}

	auto GetLoad = [&](EVT VT, unsigned LoadOffset) {
	return DAG.getExtLoad(ISD::ZEXTLOAD, dl, PtrVT, Chain,
	(LoadOffset != 0)
	? DAG.getObjectPtrOffset(
	dl, Arg, TypeSize::getFixed(LoadOffset))
	: Arg,
	MachinePointerInfo(), VT);
	};

	unsigned LoadOffset = 0;

	// Initialize registers, which are fully occupied by the by-val argument.
	while (LoadOffset + PtrByteSize <= ByValSize && ArgLocs[I].isRegLoc()) {
	SDValue Load = GetLoad(PtrVT, LoadOffset);
	MemOpChains.push_back(Load.getValue(1));
	LoadOffset += PtrByteSize;
	const CCValAssign &ByValVA = ArgLocs[I++];
	assert(ByValVA.getValNo() == ValNo &&
	"Unexpected location for pass-by-value argument.");
	RegsToPass.push_back(std::make_pair(ByValVA.getLocReg(), Load));
	}

	if (LoadOffset == ByValSize)
	continue;

	// There must be one more loc to handle the remainder.
	assert(ArgLocs[I].getValNo() == ValNo &&
	"Expected additional location for by-value argument.");

	if (ArgLocs[I].isMemLoc()) {
	assert(LoadOffset < ByValSize && "Unexpected memloc for by-val arg.");
	const CCValAssign &ByValVA = ArgLocs[I++];
	ISD::ArgFlagsTy MemcpyFlags = Flags;
	// Only memcpy the bytes that don't pass in register.
	MemcpyFlags.setByValSize(ByValSize - LoadOffset);
	Chain = CallSeqStart = createMemcpyOutsideCallSeq(
	(LoadOffset != 0) ? DAG.getObjectPtrOffset(
	dl, Arg, TypeSize::getFixed(LoadOffset))
	: Arg,
	DAG.getObjectPtrOffset(
	dl, StackPtr, TypeSize::getFixed(ByValVA.getLocMemOffset())),
	CallSeqStart, MemcpyFlags, DAG, dl);
	continue;
	}

	// Initialize the final register residue.
	// Any residue that occupies the final by-val arg register must be
	// left-justified on AIX. Loads must be a power-of-2 size and cannot be
	// larger than the ByValSize. For example: a 7 byte by-val arg requires 4,
	// 2 and 1 byte loads.
	const unsigned ResidueBytes = ByValSize % PtrByteSize;
	assert(ResidueBytes != 0 && LoadOffset + PtrByteSize > ByValSize &&
	"Unexpected register residue for by-value argument.");
	SDValue ResidueVal;
	for (unsigned Bytes = 0; Bytes != ResidueBytes;) {
	const unsigned N = llvm::bit_floor(ResidueBytes - Bytes);
	const MVT VT =
	N == 1 ? MVT::i8
	: ((N == 2) ? MVT::i16 : (N == 4 ? MVT::i32 : MVT::i64));
	SDValue Load = GetLoad(VT, LoadOffset);
	MemOpChains.push_back(Load.getValue(1));
	LoadOffset += N;
	Bytes += N;

	// By-val arguments are passed left-justfied in register.
	// Every load here needs to be shifted, otherwise a full register load
	// should have been used.
	assert(PtrVT.getSimpleVT().getSizeInBits() > (Bytes * 8) &&
	"Unexpected load emitted during handling of pass-by-value "
	"argument.");
	unsigned NumSHLBits = PtrVT.getSimpleVT().getSizeInBits() - (Bytes * 8);
	EVT ShiftAmountTy =
	getShiftAmountTy(Load->getValueType(0), DAG.getDataLayout());
	SDValue SHLAmt = DAG.getConstant(NumSHLBits, dl, ShiftAmountTy);
	SDValue ShiftedLoad =
	DAG.getNode(ISD::SHL, dl, Load.getValueType(), Load, SHLAmt);
	ResidueVal = ResidueVal ? DAG.getNode(ISD::OR, dl, PtrVT, ResidueVal,
	ShiftedLoad)
	: ShiftedLoad;
	}

	const CCValAssign &ByValVA = ArgLocs[I++];
	RegsToPass.push_back(std::make_pair(ByValVA.getLocReg(), ResidueVal));
	continue;
	}

	CCValAssign &VA = ArgLocs[I++];
	const MVT LocVT = VA.getLocVT();
	const MVT ValVT = VA.getValVT();

	switch (VA.getLocInfo()) {
	default:
	report_fatal_error("Unexpected argument extension type.");
	case CCValAssign::Full:
	break;
	case CCValAssign::ZExt:
	Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
	break;
	case CCValAssign::SExt:
	Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
	break;
	}

	if (VA.isRegLoc() && !VA.needsCustom()) {
	RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
	continue;
	}

	// Vector arguments passed to VarArg functions need custom handling when
	// they are passed (at least partially) in GPRs.
	if (VA.isMemLoc() && VA.needsCustom() && ValVT.isVector()) {
	assert(CFlags.IsVarArg && "Custom MemLocs only used for Vector args.");
	// Store value to its stack slot.
	SDValue PtrOff =
	DAG.getConstant(VA.getLocMemOffset(), dl, StackPtr.getValueType());
	PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
	SDValue Store =
	DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo());
	MemOpChains.push_back(Store);
	const unsigned OriginalValNo = VA.getValNo();
	// Then load the GPRs from the stack
	unsigned LoadOffset = 0;
	auto HandleCustomVecRegLoc = [&]() {
	assert(I != E && "Unexpected end of CCvalAssigns.");
	assert(ArgLocs[I].isRegLoc() && ArgLocs[I].needsCustom() &&
	"Expected custom RegLoc.");
	CCValAssign RegVA = ArgLocs[I++];
	assert(RegVA.getValNo() == OriginalValNo &&
	"Custom MemLoc ValNo and custom RegLoc ValNo must match.");
	SDValue Add = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff,
	DAG.getConstant(LoadOffset, dl, PtrVT));
	SDValue Load = DAG.getLoad(PtrVT, dl, Store, Add, MachinePointerInfo());
	MemOpChains.push_back(Load.getValue(1));
	RegsToPass.push_back(std::make_pair(RegVA.getLocReg(), Load));
	LoadOffset += PtrByteSize;
	};

	// In 64-bit there will be exactly 2 custom RegLocs that follow, and in
	// in 32-bit there will be 2 custom RegLocs if we are passing in R9 and
	// R10.
	HandleCustomVecRegLoc();
	HandleCustomVecRegLoc();

	if (I != E && ArgLocs[I].isRegLoc() && ArgLocs[I].needsCustom() &&
	ArgLocs[I].getValNo() == OriginalValNo) {
	assert(!IsPPC64 &&
	"Only 2 custom RegLocs expected for 64-bit codegen.");
	HandleCustomVecRegLoc();
	HandleCustomVecRegLoc();
	}

	continue;
	}

	if (VA.isMemLoc()) {
	SDValue PtrOff =
	DAG.getConstant(VA.getLocMemOffset(), dl, StackPtr.getValueType());
	PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
	MemOpChains.push_back(
	DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo()));

	continue;
	}

	if (!ValVT.isFloatingPoint())
	report_fatal_error(
	"Unexpected register handling for calling convention.");

	// Custom handling is used for GPR initializations for vararg float
	// arguments.
	assert(VA.isRegLoc() && VA.needsCustom() && CFlags.IsVarArg &&
	LocVT.isInteger() &&
	"Custom register handling only expected for VarArg.");

	SDValue ArgAsInt =
	DAG.getBitcast(MVT::getIntegerVT(ValVT.getSizeInBits()), Arg);

	if (Arg.getValueType().getStoreSize() == LocVT.getStoreSize())
	// f32 in 32-bit GPR
	// f64 in 64-bit GPR
	RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgAsInt));
	else if (Arg.getValueType().getFixedSizeInBits() <
	LocVT.getFixedSizeInBits())
	// f32 in 64-bit GPR.
	RegsToPass.push_back(std::make_pair(
	VA.getLocReg(), DAG.getZExtOrTrunc(ArgAsInt, dl, LocVT)));
	else {
	// f64 in two 32-bit GPRs
	// The 2 GPRs are marked custom and expected to be adjacent in ArgLocs.
	assert(Arg.getValueType() == MVT::f64 && CFlags.IsVarArg && !IsPPC64 &&
	"Unexpected custom register for argument!");
	CCValAssign &GPR1 = VA;
	SDValue MSWAsI64 = DAG.getNode(ISD::SRL, dl, MVT::i64, ArgAsInt,
	DAG.getConstant(32, dl, MVT::i8));
	RegsToPass.push_back(std::make_pair(
	GPR1.getLocReg(), DAG.getZExtOrTrunc(MSWAsI64, dl, MVT::i32)));

	if (I != E) {
	// If only 1 GPR was available, there will only be one custom GPR and
	// the argument will also pass in memory.
	CCValAssign &PeekArg = ArgLocs[I];
	if (PeekArg.isRegLoc() && PeekArg.getValNo() == PeekArg.getValNo()) {
	assert(PeekArg.needsCustom() && "A second custom GPR is expected.");
	CCValAssign &GPR2 = ArgLocs[I++];
	RegsToPass.push_back(std::make_pair(
	GPR2.getLocReg(), DAG.getZExtOrTrunc(ArgAsInt, dl, MVT::i32)));
	}
	}
	}
	}

	if (!MemOpChains.empty())
	Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);

	// For indirect calls, we need to save the TOC base to the stack for
	// restoration after the call.
	if (CFlags.IsIndirect) {
	assert(!CFlags.IsTailCall && "Indirect tail-calls not supported.");
	const MCRegister TOCBaseReg = Subtarget.getTOCPointerRegister();
	const MCRegister StackPtrReg = Subtarget.getStackPointerRegister();
	const MVT PtrVT = Subtarget.isPPC64() ? MVT::i64 : MVT::i32;
	const unsigned TOCSaveOffset =
	Subtarget.getFrameLowering()->getTOCSaveOffset();

	setUsesTOCBasePtr(DAG);
	SDValue Val = DAG.getCopyFromReg(Chain, dl, TOCBaseReg, PtrVT);
	SDValue PtrOff = DAG.getIntPtrConstant(TOCSaveOffset, dl);
	SDValue StackPtr = DAG.getRegister(StackPtrReg, PtrVT);
	SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
	Chain = DAG.getStore(
	Val.getValue(1), dl, Val, AddPtr,
	MachinePointerInfo::getStack(DAG.getMachineFunction(), TOCSaveOffset));
	}

	// Build a sequence of copy-to-reg nodes chained together with token chain
	// and flag operands which copy the outgoing args into the appropriate regs.
	SDValue InGlue;
	for (auto Reg : RegsToPass) {
	Chain = DAG.getCopyToReg(Chain, dl, Reg.first, Reg.second, InGlue);
	InGlue = Chain.getValue(1);
	}

	const int SPDiff = 0;
	return FinishCall(CFlags, dl, DAG, RegsToPass, InGlue, Chain, CallSeqStart,
	Callee, SPDiff, NumBytes, Ins, InVals, CB);
	}

	bool
	PPCTargetLowering::CanLowerReturn(CallingConv::ID CallConv,
	MachineFunction &MF, bool isVarArg,
	const SmallVectorImpl<ISD::OutputArg> &Outs,
	LLVMContext &Context) const {
	SmallVector<CCValAssign, 16> RVLocs;
	CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
	return CCInfo.CheckReturn(
	Outs, (Subtarget.isSVR4ABI() && CallConv == CallingConv::Cold)
	? RetCC_PPC_Cold
	: RetCC_PPC);
	}

	SDValue
	PPCTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
	bool isVarArg,
	const SmallVectorImpl<ISD::OutputArg> &Outs,
	const SmallVectorImpl<SDValue> &OutVals,
	const SDLoc &dl, SelectionDAG &DAG) const {
	SmallVector<CCValAssign, 16> RVLocs;
	CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
	*DAG.getContext());
	CCInfo.AnalyzeReturn(Outs,
	(Subtarget.isSVR4ABI() && CallConv == CallingConv::Cold)
	? RetCC_PPC_Cold
	: RetCC_PPC);

	SDValue Glue;
	SmallVector<SDValue, 4> RetOps(1, Chain);

	// Copy the result values into the output registers.
	for (unsigned i = 0, RealResIdx = 0; i != RVLocs.size(); ++i, ++RealResIdx) {
	CCValAssign &VA = RVLocs[i];
	assert(VA.isRegLoc() && "Can only return in registers!");

	SDValue Arg = OutVals[RealResIdx];

	switch (VA.getLocInfo()) {
	default: llvm_unreachable("Unknown loc info!");
	case CCValAssign::Full: break;
	case CCValAssign::AExt:
	Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg);
	break;
	case CCValAssign::ZExt:
	Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
	break;
	case CCValAssign::SExt:
	Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
	break;
	}
	if (Subtarget.hasSPE() && VA.getLocVT() == MVT::f64) {
	bool isLittleEndian = Subtarget.isLittleEndian();
	// Legalize ret f64 -> ret 2 x i32.
	SDValue SVal =
	DAG.getNode(PPCISD::EXTRACT_SPE, dl, MVT::i32, Arg,
	DAG.getIntPtrConstant(isLittleEndian ? 0 : 1, dl));
	Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), SVal, Glue);
	RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
	SVal = DAG.getNode(PPCISD::EXTRACT_SPE, dl, MVT::i32, Arg,
	DAG.getIntPtrConstant(isLittleEndian ? 1 : 0, dl));
	Glue = Chain.getValue(1);
	VA = RVLocs[++i]; // skip ahead to next loc
	Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), SVal, Glue);
	} else
	Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Glue);
	Glue = Chain.getValue(1);
	RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
	}

	RetOps[0] = Chain; // Update chain.

	// Add the glue if we have it.
	if (Glue.getNode())
	RetOps.push_back(Glue);

	return DAG.getNode(PPCISD::RET_GLUE, dl, MVT::Other, RetOps);
	}

	SDValue
	PPCTargetLowering::LowerGET_DYNAMIC_AREA_OFFSET(SDValue Op,
	SelectionDAG &DAG) const {
	SDLoc dl(Op);

	// Get the correct type for integers.
	EVT IntVT = Op.getValueType();

	// Get the inputs.
	SDValue Chain = Op.getOperand(0);
	SDValue FPSIdx = getFramePointerFrameIndex(DAG);
	// Build a DYNAREAOFFSET node.
	SDValue Ops[2] = {Chain, FPSIdx};
	SDVTList VTs = DAG.getVTList(IntVT);
	return DAG.getNode(PPCISD::DYNAREAOFFSET, dl, VTs, Ops);
	}

	SDValue PPCTargetLowering::LowerSTACKRESTORE(SDValue Op,
	SelectionDAG &DAG) const {
	// When we pop the dynamic allocation we need to restore the SP link.
	SDLoc dl(Op);

	// Get the correct type for pointers.
	EVT PtrVT = getPointerTy(DAG.getDataLayout());

	// Construct the stack pointer operand.
	bool isPPC64 = Subtarget.isPPC64();
	unsigned SP = isPPC64 ? PPC::X1 : PPC::R1;
	SDValue StackPtr = DAG.getRegister(SP, PtrVT);

	// Get the operands for the STACKRESTORE.
	SDValue Chain = Op.getOperand(0);
	SDValue SaveSP = Op.getOperand(1);

	// Load the old link SP.
	SDValue LoadLinkSP =
	DAG.getLoad(PtrVT, dl, Chain, StackPtr, MachinePointerInfo());

	// Restore the stack pointer.
	Chain = DAG.getCopyToReg(LoadLinkSP.getValue(1), dl, SP, SaveSP);

	// Store the old link SP.
	return DAG.getStore(Chain, dl, LoadLinkSP, StackPtr, MachinePointerInfo());
	}

	SDValue PPCTargetLowering::getReturnAddrFrameIndex(SelectionDAG &DAG) const {
	MachineFunction &MF = DAG.getMachineFunction();
	bool isPPC64 = Subtarget.isPPC64();
	EVT PtrVT = getPointerTy(MF.getDataLayout());

	// Get current frame pointer save index. The users of this index will be
	// primarily DYNALLOC instructions.
	PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
	int RASI = FI->getReturnAddrSaveIndex();

	// If the frame pointer save index hasn't been defined yet.
	if (!RASI) {
	// Find out what the fix offset of the frame pointer save area.
	int LROffset = Subtarget.getFrameLowering()->getReturnSaveOffset();
	// Allocate the frame index for frame pointer save area.
	RASI = MF.getFrameInfo().CreateFixedObject(isPPC64? 8 : 4, LROffset, false);
	// Save the result.
	FI->setReturnAddrSaveIndex(RASI);
	}
	return DAG.getFrameIndex(RASI, PtrVT);
	}

	SDValue
	PPCTargetLowering::getFramePointerFrameIndex(SelectionDAG & DAG) const {
	MachineFunction &MF = DAG.getMachineFunction();
	bool isPPC64 = Subtarget.isPPC64();
	EVT PtrVT = getPointerTy(MF.getDataLayout());

	// Get current frame pointer save index. The users of this index will be
	// primarily DYNALLOC instructions.
	PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
	int FPSI = FI->getFramePointerSaveIndex();

	// If the frame pointer save index hasn't been defined yet.
	if (!FPSI) {
	// Find out what the fix offset of the frame pointer save area.
	int FPOffset = Subtarget.getFrameLowering()->getFramePointerSaveOffset();
	// Allocate the frame index for frame pointer save area.
	FPSI = MF.getFrameInfo().CreateFixedObject(isPPC64? 8 : 4, FPOffset, true);
	// Save the result.
	FI->setFramePointerSaveIndex(FPSI);
	}
	return DAG.getFrameIndex(FPSI, PtrVT);
	}

	SDValue PPCTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
	SelectionDAG &DAG) const {
	MachineFunction &MF = DAG.getMachineFunction();
	// Get the inputs.
	SDValue Chain = Op.getOperand(0);
	SDValue Size = Op.getOperand(1);
	SDLoc dl(Op);

	// Get the correct type for pointers.
	EVT PtrVT = getPointerTy(DAG.getDataLayout());
	// Negate the size.
	SDValue NegSize = DAG.getNode(ISD::SUB, dl, PtrVT,
	DAG.getConstant(0, dl, PtrVT), Size);
	// Construct a node for the frame pointer save index.
	SDValue FPSIdx = getFramePointerFrameIndex(DAG);
	SDValue Ops[3] = { Chain, NegSize, FPSIdx };
	SDVTList VTs = DAG.getVTList(PtrVT, MVT::Other);
	if (hasInlineStackProbe(MF))
	return DAG.getNode(PPCISD::PROBED_ALLOCA, dl, VTs, Ops);
	return DAG.getNode(PPCISD::DYNALLOC, dl, VTs, Ops);
	}

	SDValue PPCTargetLowering::LowerEH_DWARF_CFA(SDValue Op,
	SelectionDAG &DAG) const {
	MachineFunction &MF = DAG.getMachineFunction();

	bool isPPC64 = Subtarget.isPPC64();
	EVT PtrVT = getPointerTy(DAG.getDataLayout());

	int FI = MF.getFrameInfo().CreateFixedObject(isPPC64 ? 8 : 4, 0, false);
	return DAG.getFrameIndex(FI, PtrVT);
	}

	SDValue PPCTargetLowering::lowerEH_SJLJ_SETJMP(SDValue Op,
	SelectionDAG &DAG) const {
	SDLoc DL(Op);
	return DAG.getNode(PPCISD::EH_SJLJ_SETJMP, DL,
	DAG.getVTList(MVT::i32, MVT::Other),
	Op.getOperand(0), Op.getOperand(1));
	}

	SDValue PPCTargetLowering::lowerEH_SJLJ_LONGJMP(SDValue Op,
	SelectionDAG &DAG) const {
	SDLoc DL(Op);
	return DAG.getNode(PPCISD::EH_SJLJ_LONGJMP, DL, MVT::Other,
	Op.getOperand(0), Op.getOperand(1));
	}

	SDValue PPCTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
	if (Op.getValueType().isVector())
	return LowerVectorLoad(Op, DAG);

	assert(Op.getValueType() == MVT::i1 &&
	"Custom lowering only for i1 loads");

	// First, load 8 bits into 32 bits, then truncate to 1 bit.

	SDLoc dl(Op);
	LoadSDNode *LD = cast<LoadSDNode>(Op);

	SDValue Chain = LD->getChain();
	SDValue BasePtr = LD->getBasePtr();
	MachineMemOperand *MMO = LD->getMemOperand();

	SDValue NewLD =
	DAG.getExtLoad(ISD::EXTLOAD, dl, getPointerTy(DAG.getDataLayout()), Chain,
	BasePtr, MVT::i8, MMO);
	SDValue Result = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, NewLD);

	SDValue Ops[] = { Result, SDValue(NewLD.getNode(), 1) };
	return DAG.getMergeValues(Ops, dl);
	}

	SDValue PPCTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
	if (Op.getOperand(1).getValueType().isVector())
	return LowerVectorStore(Op, DAG);

	assert(Op.getOperand(1).getValueType() == MVT::i1 &&
	"Custom lowering only for i1 stores");

	// First, zero extend to 32 bits, then use a truncating store to 8 bits.

	SDLoc dl(Op);
	StoreSDNode *ST = cast<StoreSDNode>(Op);

	SDValue Chain = ST->getChain();
	SDValue BasePtr = ST->getBasePtr();
	SDValue Value = ST->getValue();
	MachineMemOperand *MMO = ST->getMemOperand();

	Value = DAG.getNode(ISD::ZERO_EXTEND, dl, getPointerTy(DAG.getDataLayout()),
	Value);
	return DAG.getTruncStore(Chain, dl, Value, BasePtr, MVT::i8, MMO);
	}

	// FIXME: Remove this once the ANDI glue bug is fixed:
	SDValue PPCTargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const {
	assert(Op.getValueType() == MVT::i1 &&
	"Custom lowering only for i1 results");

	SDLoc DL(Op);
	return DAG.getNode(PPCISD::ANDI_rec_1_GT_BIT, DL, MVT::i1, Op.getOperand(0));
	}

	SDValue PPCTargetLowering::LowerTRUNCATEVector(SDValue Op,
	SelectionDAG &DAG) const {

	// Implements a vector truncate that fits in a vector register as a shuffle.
	// We want to legalize vector truncates down to where the source fits in
	// a vector register (and target is therefore smaller than vector register
	// size). At that point legalization will try to custom lower the sub-legal
	// result and get here - where we can contain the truncate as a single target
	// operation.

	// For example a trunc <2 x i16> to <2 x i8> could be visualized as follows:
	// <MSB1\|LSB1, MSB2\|LSB2> to <LSB1, LSB2>
	//
	// We will implement it for big-endian ordering as this (where x denotes
	// undefined):
	// < MSB1\|LSB1, MSB2\|LSB2, uu, uu, uu, uu, uu, uu> to
	// < LSB1, LSB2, u, u, u, u, u, u, u, u, u, u, u, u, u, u>
	//
	// The same operation in little-endian ordering will be:
	// <uu, uu, uu, uu, uu, uu, LSB2\|MSB2, LSB1\|MSB1> to
	// <u, u, u, u, u, u, u, u, u, u, u, u, u, u, LSB2, LSB1>

	EVT TrgVT = Op.getValueType();
	assert(TrgVT.isVector() && "Vector type expected.");
	unsigned TrgNumElts = TrgVT.getVectorNumElements();
	EVT EltVT = TrgVT.getVectorElementType();
	if (!isOperationCustom(Op.getOpcode(), TrgVT) \|\|
	TrgVT.getSizeInBits() > 128 \|\| !isPowerOf2_32(TrgNumElts) \|\|
	!llvm::has_single_bit<uint32_t>(EltVT.getSizeInBits()))
	return SDValue();

	SDValue N1 = Op.getOperand(0);
	EVT SrcVT = N1.getValueType();
	unsigned SrcSize = SrcVT.getSizeInBits();
	if (SrcSize > 256 \|\| !isPowerOf2_32(SrcVT.getVectorNumElements()) \|\|
	!llvm::has_single_bit<uint32_t>(
	SrcVT.getVectorElementType().getSizeInBits()))
	return SDValue();
	if (SrcSize == 256 && SrcVT.getVectorNumElements() < 2)
	return SDValue();

	unsigned WideNumElts = 128 / EltVT.getSizeInBits();
	EVT WideVT = EVT::getVectorVT(*DAG.getContext(), EltVT, WideNumElts);

	SDLoc DL(Op);
	SDValue Op1, Op2;
	if (SrcSize == 256) {
	EVT VecIdxTy = getVectorIdxTy(DAG.getDataLayout());
	EVT SplitVT =
	N1.getValueType().getHalfNumVectorElementsVT(*DAG.getContext());
	unsigned SplitNumElts = SplitVT.getVectorNumElements();
	Op1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, N1,
	DAG.getConstant(0, DL, VecIdxTy));
	Op2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, N1,
	DAG.getConstant(SplitNumElts, DL, VecIdxTy));
	}
	else {
	Op1 = SrcSize == 128 ? N1 : widenVec(DAG, N1, DL);
	Op2 = DAG.getUNDEF(WideVT);
	}

	// First list the elements we want to keep.
	unsigned SizeMult = SrcSize / TrgVT.getSizeInBits();
	SmallVector<int, 16> ShuffV;
	if (Subtarget.isLittleEndian())
	for (unsigned i = 0; i < TrgNumElts; ++i)
	ShuffV.push_back(i * SizeMult);
	else
	for (unsigned i = 1; i <= TrgNumElts; ++i)
	ShuffV.push_back(i * SizeMult - 1);

	// Populate the remaining elements with undefs.
	for (unsigned i = TrgNumElts; i < WideNumElts; ++i)
	// ShuffV.push_back(i + WideNumElts);
	ShuffV.push_back(WideNumElts + 1);

	Op1 = DAG.getNode(ISD::BITCAST, DL, WideVT, Op1);
	Op2 = DAG.getNode(ISD::BITCAST, DL, WideVT, Op2);
	return DAG.getVectorShuffle(WideVT, DL, Op1, Op2, ShuffV);
	}

	/// LowerSELECT_CC - Lower floating point select_cc's into fsel instruction when
	/// possible.
	SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
	ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
	EVT ResVT = Op.getValueType();
	EVT CmpVT = Op.getOperand(0).getValueType();
	SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
	SDValue TV = Op.getOperand(2), FV = Op.getOperand(3);
	SDLoc dl(Op);

	// Without power9-vector, we don't have native instruction for f128 comparison.
	// Following transformation to libcall is needed for setcc:
	// select_cc lhs, rhs, tv, fv, cc -> select_cc (setcc cc, x, y), 0, tv, fv, NE
	if (!Subtarget.hasP9Vector() && CmpVT == MVT::f128) {
	SDValue Z = DAG.getSetCC(
	dl, getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), CmpVT),
	LHS, RHS, CC);
	SDValue Zero = DAG.getConstant(0, dl, Z.getValueType());
	return DAG.getSelectCC(dl, Z, Zero, TV, FV, ISD::SETNE);
	}

	// Not FP, or using SPE? Not a fsel.
	if (!CmpVT.isFloatingPoint() \|\| !TV.getValueType().isFloatingPoint() \|\|
	Subtarget.hasSPE())
	return Op;

	SDNodeFlags Flags = Op.getNode()->getFlags();

	// We have xsmaxc[dq]p/xsminc[dq]p which are OK to emit even in the
	// presence of infinities.
	if (Subtarget.hasP9Vector() && LHS == TV && RHS == FV) {
	switch (CC) {
	default:
	break;
	case ISD::SETOGT:
	case ISD::SETGT:
	return DAG.getNode(PPCISD::XSMAXC, dl, Op.getValueType(), LHS, RHS);
	case ISD::SETOLT:
	case ISD::SETLT:
	return DAG.getNode(PPCISD::XSMINC, dl, Op.getValueType(), LHS, RHS);
	}
	}

	// We might be able to do better than this under some circumstances, but in
	// general, fsel-based lowering of select is a finite-math-only optimization.
	// For more information, see section F.3 of the 2.06 ISA specification.
	// With ISA 3.0
	if ((!DAG.getTarget().Options.NoInfsFPMath && !Flags.hasNoInfs()) \|\|
	(!DAG.getTarget().Options.NoNaNsFPMath && !Flags.hasNoNaNs()) \|\|
	ResVT == MVT::f128)
	return Op;

	// If the RHS of the comparison is a 0.0, we don't need to do the
	// subtraction at all.
	SDValue Sel1;
	if (isFloatingPointZero(RHS))
	switch (CC) {
	default: break; // SETUO etc aren't handled by fsel.
	case ISD::SETNE:
	std::swap(TV, FV);
	[[fallthrough]];
	case ISD::SETEQ:
	if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits
	LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
	Sel1 = DAG.getNode(PPCISD::FSEL, dl, ResVT, LHS, TV, FV);
	if (Sel1.getValueType() == MVT::f32) // Comparison is always 64-bits
	Sel1 = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Sel1);
	return DAG.getNode(PPCISD::FSEL, dl, ResVT,
	DAG.getNode(ISD::FNEG, dl, MVT::f64, LHS), Sel1, FV);
	case ISD::SETULT:
	case ISD::SETLT:
	std::swap(TV, FV); // fsel is natively setge, swap operands for setlt
	[[fallthrough]];
	case ISD::SETOGE:
	case ISD::SETGE:
	if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits
	LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
	return DAG.getNode(PPCISD::FSEL, dl, ResVT, LHS, TV, FV);
	case ISD::SETUGT:
	case ISD::SETGT:
	std::swap(TV, FV); // fsel is natively setge, swap operands for setlt
	[[fallthrough]];
	case ISD::SETOLE:
	case ISD::SETLE:
	if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits
	LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
	return DAG.getNode(PPCISD::FSEL, dl, ResVT,
	DAG.getNode(ISD::FNEG, dl, MVT::f64, LHS), TV, FV);
	}

	SDValue Cmp;
	switch (CC) {
	default: break; // SETUO etc aren't handled by fsel.
	case ISD::SETNE:
	std::swap(TV, FV);
	[[fallthrough]];
	case ISD::SETEQ:
	Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, Flags);
	if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
	Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
	Sel1 = DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
	if (Sel1.getValueType() == MVT::f32) // Comparison is always 64-bits
	Sel1 = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Sel1);
	return DAG.getNode(PPCISD::FSEL, dl, ResVT,
	DAG.getNode(ISD::FNEG, dl, MVT::f64, Cmp), Sel1, FV);
	case ISD::SETULT:
	case ISD::SETLT:
	Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, Flags);
	if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
	Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
	return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV);
	case ISD::SETOGE:
	case ISD::SETGE:
	Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, Flags);
	if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
	Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
	return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
	case ISD::SETUGT:
	case ISD::SETGT:
	Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS, Flags);
	if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
	Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
	return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV);
	case ISD::SETOLE:
	case ISD::SETLE:
	Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS, Flags);
	if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
	Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
	return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
	}
	return Op;
	}

	static unsigned getPPCStrictOpcode(unsigned Opc) {
	switch (Opc) {
	default:
	llvm_unreachable("No strict version of this opcode!");
	case PPCISD::FCTIDZ:
	return PPCISD::STRICT_FCTIDZ;
	case PPCISD::FCTIWZ:
	return PPCISD::STRICT_FCTIWZ;
	case PPCISD::FCTIDUZ:
	return PPCISD::STRICT_FCTIDUZ;
	case PPCISD::FCTIWUZ:
	return PPCISD::STRICT_FCTIWUZ;
	case PPCISD::FCFID:
	return PPCISD::STRICT_FCFID;
	case PPCISD::FCFIDU:
	return PPCISD::STRICT_FCFIDU;
	case PPCISD::FCFIDS:
	return PPCISD::STRICT_FCFIDS;
	case PPCISD::FCFIDUS:
	return PPCISD::STRICT_FCFIDUS;
	}
	}

	static SDValue convertFPToInt(SDValue Op, SelectionDAG &DAG,
	const PPCSubtarget &Subtarget) {
	SDLoc dl(Op);
	bool IsStrict = Op->isStrictFPOpcode();
	bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT \|\|
	Op.getOpcode() == ISD::STRICT_FP_TO_SINT;

	// TODO: Any other flags to propagate?
	SDNodeFlags Flags;
	Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());

	// For strict nodes, source is the second operand.
	SDValue Src = Op.getOperand(IsStrict ? 1 : 0);
	SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue();
	MVT DestTy = Op.getSimpleValueType();
	assert(Src.getValueType().isFloatingPoint() &&
	(DestTy == MVT::i8 \|\| DestTy == MVT::i16 \|\| DestTy == MVT::i32 \|\|
	DestTy == MVT::i64) &&
	"Invalid FP_TO_INT types");
	if (Src.getValueType() == MVT::f32) {
	if (IsStrict) {
	Src =
	DAG.getNode(ISD::STRICT_FP_EXTEND, dl,
	DAG.getVTList(MVT::f64, MVT::Other), {Chain, Src}, Flags);
	Chain = Src.getValue(1);
	} else
	Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src);
	}
	if ((DestTy == MVT::i8 \|\| DestTy == MVT::i16) && Subtarget.hasP9Vector())
	DestTy = Subtarget.isPPC64() ? MVT::i64 : MVT::i32;
	unsigned Opc = ISD::DELETED_NODE;
	switch (DestTy.SimpleTy) {
	default: llvm_unreachable("Unhandled FP_TO_INT type in custom expander!");
	case MVT::i32:
	Opc = IsSigned ? PPCISD::FCTIWZ
	: (Subtarget.hasFPCVT() ? PPCISD::FCTIWUZ : PPCISD::FCTIDZ);
	break;
	case MVT::i64:
	assert((IsSigned \|\| Subtarget.hasFPCVT()) &&
	"i64 FP_TO_UINT is supported only with FPCVT");
	Opc = IsSigned ? PPCISD::FCTIDZ : PPCISD::FCTIDUZ;
	}
	EVT ConvTy = Src.getValueType() == MVT::f128 ? MVT::f128 : MVT::f64;
	SDValue Conv;
	if (IsStrict) {
	Opc = getPPCStrictOpcode(Opc);
	Conv = DAG.getNode(Opc, dl, DAG.getVTList(ConvTy, MVT::Other), {Chain, Src},
	Flags);
	} else {
	Conv = DAG.getNode(Opc, dl, ConvTy, Src);
	}
	return Conv;
	}

	void PPCTargetLowering::LowerFP_TO_INTForReuse(SDValue Op, ReuseLoadInfo &RLI,
	SelectionDAG &DAG,
	const SDLoc &dl) const {
	SDValue Tmp = convertFPToInt(Op, DAG, Subtarget);
	bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT \|\|
	Op.getOpcode() == ISD::STRICT_FP_TO_SINT;
	bool IsStrict = Op->isStrictFPOpcode();

	// Convert the FP value to an int value through memory.
	bool i32Stack = Op.getValueType() == MVT::i32 && Subtarget.hasSTFIWX() &&
	(IsSigned \|\| Subtarget.hasFPCVT());
	SDValue FIPtr = DAG.CreateStackTemporary(i32Stack ? MVT::i32 : MVT::f64);
	int FI = cast<FrameIndexSDNode>(FIPtr)->getIndex();
	MachinePointerInfo MPI =
	MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI);

	// Emit a store to the stack slot.
	SDValue Chain = IsStrict ? Tmp.getValue(1) : DAG.getEntryNode();
	Align Alignment(DAG.getEVTAlign(Tmp.getValueType()));
	if (i32Stack) {
	MachineFunction &MF = DAG.getMachineFunction();
	Alignment = Align(4);
	MachineMemOperand *MMO =
	MF.getMachineMemOperand(MPI, MachineMemOperand::MOStore, 4, Alignment);
	SDValue Ops[] = { Chain, Tmp, FIPtr };
	Chain = DAG.getMemIntrinsicNode(PPCISD::STFIWX, dl,
	DAG.getVTList(MVT::Other), Ops, MVT::i32, MMO);
	} else
	Chain = DAG.getStore(Chain, dl, Tmp, FIPtr, MPI, Alignment);

	// Result is a load from the stack slot. If loading 4 bytes, make sure to
	// add in a bias on big endian.
	if (Op.getValueType() == MVT::i32 && !i32Stack) {
	FIPtr = DAG.getNode(ISD::ADD, dl, FIPtr.getValueType(), FIPtr,
	DAG.getConstant(4, dl, FIPtr.getValueType()));
	MPI = MPI.getWithOffset(Subtarget.isLittleEndian() ? 0 : 4);
	}

	RLI.Chain = Chain;
	RLI.Ptr = FIPtr;
	RLI.MPI = MPI;
	RLI.Alignment = Alignment;
	}

	/// Custom lowers floating point to integer conversions to use
	/// the direct move instructions available in ISA 2.07 to avoid the
	/// need for load/store combinations.
	SDValue PPCTargetLowering::LowerFP_TO_INTDirectMove(SDValue Op,
	SelectionDAG &DAG,
	const SDLoc &dl) const {
	SDValue Conv = convertFPToInt(Op, DAG, Subtarget);
	SDValue Mov = DAG.getNode(PPCISD::MFVSR, dl, Op.getValueType(), Conv);
	if (Op->isStrictFPOpcode())
	return DAG.getMergeValues({Mov, Conv.getValue(1)}, dl);
	else
	return Mov;
	}

	SDValue PPCTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,
	const SDLoc &dl) const {
	bool IsStrict = Op->isStrictFPOpcode();
	bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT \|\|
	Op.getOpcode() == ISD::STRICT_FP_TO_SINT;
	SDValue Src = Op.getOperand(IsStrict ? 1 : 0);
	EVT SrcVT = Src.getValueType();
	EVT DstVT = Op.getValueType();

	// FP to INT conversions are legal for f128.
	if (SrcVT == MVT::f128)
	return Subtarget.hasP9Vector() ? Op : SDValue();

	// Expand ppcf128 to i32 by hand for the benefit of llvm-gcc bootstrap on
	// PPC (the libcall is not available).
	if (SrcVT == MVT::ppcf128) {
	if (DstVT == MVT::i32) {
	// TODO: Conservatively pass only nofpexcept flag here. Need to check and
	// set other fast-math flags to FP operations in both strict and
	// non-strict cases. (FP_TO_SINT, FSUB)
	SDNodeFlags Flags;
	Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());

	if (IsSigned) {
	SDValue Lo, Hi;
	std::tie(Lo, Hi) = DAG.SplitScalar(Src, dl, MVT::f64, MVT::f64);

	// Add the two halves of the long double in round-to-zero mode, and use
	// a smaller FP_TO_SINT.
	if (IsStrict) {
	SDValue Res = DAG.getNode(PPCISD::STRICT_FADDRTZ, dl,
	DAG.getVTList(MVT::f64, MVT::Other),
	{Op.getOperand(0), Lo, Hi}, Flags);
	return DAG.getNode(ISD::STRICT_FP_TO_SINT, dl,
	DAG.getVTList(MVT::i32, MVT::Other),
	{Res.getValue(1), Res}, Flags);
	} else {
	SDValue Res = DAG.getNode(PPCISD::FADDRTZ, dl, MVT::f64, Lo, Hi);
	return DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, Res);
	}
	} else {
	const uint64_t TwoE31[] = {0x41e0000000000000LL, 0};
	APFloat APF = APFloat(APFloat::PPCDoubleDouble(), APInt(128, TwoE31));
	SDValue Cst = DAG.getConstantFP(APF, dl, SrcVT);
	SDValue SignMask = DAG.getConstant(0x80000000, dl, DstVT);
	if (IsStrict) {
	// Sel = Src < 0x80000000
	// FltOfs = select Sel, 0.0, 0x80000000
	// IntOfs = select Sel, 0, 0x80000000
	// Result = fp_to_sint(Src - FltOfs) ^ IntOfs
	SDValue Chain = Op.getOperand(0);
	EVT SetCCVT =
	getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), SrcVT);
	EVT DstSetCCVT =
	getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), DstVT);
	SDValue Sel = DAG.getSetCC(dl, SetCCVT, Src, Cst, ISD::SETLT,
	Chain, true);
	Chain = Sel.getValue(1);

	SDValue FltOfs = DAG.getSelect(
	dl, SrcVT, Sel, DAG.getConstantFP(0.0, dl, SrcVT), Cst);
	Sel = DAG.getBoolExtOrTrunc(Sel, dl, DstSetCCVT, DstVT);

	SDValue Val = DAG.getNode(ISD::STRICT_FSUB, dl,
	DAG.getVTList(SrcVT, MVT::Other),
	{Chain, Src, FltOfs}, Flags);
	Chain = Val.getValue(1);
	SDValue SInt = DAG.getNode(ISD::STRICT_FP_TO_SINT, dl,
	DAG.getVTList(DstVT, MVT::Other),
	{Chain, Val}, Flags);
	Chain = SInt.getValue(1);
	SDValue IntOfs = DAG.getSelect(
	dl, DstVT, Sel, DAG.getConstant(0, dl, DstVT), SignMask);
	SDValue Result = DAG.getNode(ISD::XOR, dl, DstVT, SInt, IntOfs);
	return DAG.getMergeValues({Result, Chain}, dl);
	} else {
	// X>=2^31 ? (int)(X-2^31)+0x80000000 : (int)X
	// FIXME: generated code sucks.
	SDValue True = DAG.getNode(ISD::FSUB, dl, MVT::ppcf128, Src, Cst);
	True = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, True);
	True = DAG.getNode(ISD::ADD, dl, MVT::i32, True, SignMask);
	SDValue False = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, Src);
	return DAG.getSelectCC(dl, Src, Cst, True, False, ISD::SETGE);
	}
	}
	}

	return SDValue();
	}

	if (Subtarget.hasDirectMove() && Subtarget.isPPC64())
	return LowerFP_TO_INTDirectMove(Op, DAG, dl);

	ReuseLoadInfo RLI;
	LowerFP_TO_INTForReuse(Op, RLI, DAG, dl);

	return DAG.getLoad(Op.getValueType(), dl, RLI.Chain, RLI.Ptr, RLI.MPI,
	RLI.Alignment, RLI.MMOFlags(), RLI.AAInfo, RLI.Ranges);
	}

	// We're trying to insert a regular store, S, and then a load, L. If the
	// incoming value, O, is a load, we might just be able to have our load use the
	// address used by O. However, we don't know if anything else will store to
	// that address before we can load from it. To prevent this situation, we need
	// to insert our load, L, into the chain as a peer of O. To do this, we give L
	// the same chain operand as O, we create a token factor from the chain results
	// of O and L, and we replace all uses of O's chain result with that token
	// factor (see spliceIntoChain below for this last part).
	bool PPCTargetLowering::canReuseLoadAddress(SDValue Op, EVT MemVT,
	ReuseLoadInfo &RLI,
	SelectionDAG &DAG,
	ISD::LoadExtType ET) const {
	// Conservatively skip reusing for constrained FP nodes.
	if (Op->isStrictFPOpcode())
	return false;

	SDLoc dl(Op);
	bool ValidFPToUint = Op.getOpcode() == ISD::FP_TO_UINT &&
	(Subtarget.hasFPCVT() \|\| Op.getValueType() == MVT::i32);
	if (ET == ISD::NON_EXTLOAD &&
	(ValidFPToUint \|\| Op.getOpcode() == ISD::FP_TO_SINT) &&
	isOperationLegalOrCustom(Op.getOpcode(),
	Op.getOperand(0).getValueType())) {

	LowerFP_TO_INTForReuse(Op, RLI, DAG, dl);
	return true;
	}

	LoadSDNode *LD = dyn_cast<LoadSDNode>(Op);
	if (!LD \|\| LD->getExtensionType() != ET \|\| LD->isVolatile() \|\|
	LD->isNonTemporal())
	return false;
	if (LD->getMemoryVT() != MemVT)
	return false;

	// If the result of the load is an illegal type, then we can't build a
	// valid chain for reuse since the legalised loads and token factor node that
	// ties the legalised loads together uses a different output chain then the
	// illegal load.
	if (!isTypeLegal(LD->getValueType(0)))
	return false;

	RLI.Ptr = LD->getBasePtr();
	if (LD->isIndexed() && !LD->getOffset().isUndef()) {
	assert(LD->getAddressingMode() == ISD::PRE_INC &&
	"Non-pre-inc AM on PPC?");
	RLI.Ptr = DAG.getNode(ISD::ADD, dl, RLI.Ptr.getValueType(), RLI.Ptr,
	LD->getOffset());
	}

	RLI.Chain = LD->getChain();
	RLI.MPI = LD->getPointerInfo();
	RLI.IsDereferenceable = LD->isDereferenceable();
	RLI.IsInvariant = LD->isInvariant();
	RLI.Alignment = LD->getAlign();
	RLI.AAInfo = LD->getAAInfo();
	RLI.Ranges = LD->getRanges();

	RLI.ResChain = SDValue(LD, LD->isIndexed() ? 2 : 1);
	return true;
	}

	// Given the head of the old chain, ResChain, insert a token factor containing
	// it and NewResChain, and make users of ResChain now be users of that token
	// factor.
	// TODO: Remove and use DAG::makeEquivalentMemoryOrdering() instead.
	void PPCTargetLowering::spliceIntoChain(SDValue ResChain,
	SDValue NewResChain,
	SelectionDAG &DAG) const {
	if (!ResChain)
	return;

	SDLoc dl(NewResChain);

	SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
	NewResChain, DAG.getUNDEF(MVT::Other));
	assert(TF.getNode() != NewResChain.getNode() &&
	"A new TF really is required here");

	DAG.ReplaceAllUsesOfValueWith(ResChain, TF);
	DAG.UpdateNodeOperands(TF.getNode(), ResChain, NewResChain);
	}

	/// Analyze profitability of direct move
	/// prefer float load to int load plus direct move
	/// when there is no integer use of int load
	bool PPCTargetLowering::directMoveIsProfitable(const SDValue &Op) const {
	SDNode *Origin = Op.getOperand(Op->isStrictFPOpcode() ? 1 : 0).getNode();
	if (Origin->getOpcode() != ISD::LOAD)
	return true;

	// If there is no LXSIBZX/LXSIHZX, like Power8,
	// prefer direct move if the memory size is 1 or 2 bytes.
	MachineMemOperand *MMO = cast<LoadSDNode>(Origin)->getMemOperand();
	if (!Subtarget.hasP9Vector() &&
	(!MMO->getSize().hasValue() \|\| MMO->getSize().getValue() <= 2))
	return true;

	for (SDNode::use_iterator UI = Origin->use_begin(),
	UE = Origin->use_end();
	UI != UE; ++UI) {

	// Only look at the users of the loaded value.
	if (UI.getUse().get().getResNo() != 0)
	continue;

	if (UI->getOpcode() != ISD::SINT_TO_FP &&
	UI->getOpcode() != ISD::UINT_TO_FP &&
	UI->getOpcode() != ISD::STRICT_SINT_TO_FP &&
	UI->getOpcode() != ISD::STRICT_UINT_TO_FP)
	return true;
	}

	return false;
	}

	static SDValue convertIntToFP(SDValue Op, SDValue Src, SelectionDAG &DAG,
	const PPCSubtarget &Subtarget,
	SDValue Chain = SDValue()) {
	bool IsSigned = Op.getOpcode() == ISD::SINT_TO_FP \|\|
	Op.getOpcode() == ISD::STRICT_SINT_TO_FP;
	SDLoc dl(Op);

	// TODO: Any other flags to propagate?
	SDNodeFlags Flags;
	Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());

	// If we have FCFIDS, then use it when converting to single-precision.
	// Otherwise, convert to double-precision and then round.
	bool IsSingle = Op.getValueType() == MVT::f32 && Subtarget.hasFPCVT();
	unsigned ConvOpc = IsSingle ? (IsSigned ? PPCISD::FCFIDS : PPCISD::FCFIDUS)
	: (IsSigned ? PPCISD::FCFID : PPCISD::FCFIDU);
	EVT ConvTy = IsSingle ? MVT::f32 : MVT::f64;
	if (Op->isStrictFPOpcode()) {
	if (!Chain)
	Chain = Op.getOperand(0);
	return DAG.getNode(getPPCStrictOpcode(ConvOpc), dl,
	DAG.getVTList(ConvTy, MVT::Other), {Chain, Src}, Flags);
	} else
	return DAG.getNode(ConvOpc, dl, ConvTy, Src);
	}

	/// Custom lowers integer to floating point conversions to use
	/// the direct move instructions available in ISA 2.07 to avoid the
	/// need for load/store combinations.
	SDValue PPCTargetLowering::LowerINT_TO_FPDirectMove(SDValue Op,
	SelectionDAG &DAG,
	const SDLoc &dl) const {
	assert((Op.getValueType() == MVT::f32 \|\|
	Op.getValueType() == MVT::f64) &&
	"Invalid floating point type as target of conversion");
	assert(Subtarget.hasFPCVT() &&
	"Int to FP conversions with direct moves require FPCVT");
	SDValue Src = Op.getOperand(Op->isStrictFPOpcode() ? 1 : 0);
	bool WordInt = Src.getSimpleValueType().SimpleTy == MVT::i32;
	bool Signed = Op.getOpcode() == ISD::SINT_TO_FP \|\|
	Op.getOpcode() == ISD::STRICT_SINT_TO_FP;
	unsigned MovOpc = (WordInt && !Signed) ? PPCISD::MTVSRZ : PPCISD::MTVSRA;
	SDValue Mov = DAG.getNode(MovOpc, dl, MVT::f64, Src);
	return convertIntToFP(Op, Mov, DAG, Subtarget);
	}

	static SDValue widenVec(SelectionDAG &DAG, SDValue Vec, const SDLoc &dl) {

	EVT VecVT = Vec.getValueType();
	assert(VecVT.isVector() && "Expected a vector type.");
	assert(VecVT.getSizeInBits() < 128 && "Vector is already full width.");

	EVT EltVT = VecVT.getVectorElementType();
	unsigned WideNumElts = 128 / EltVT.getSizeInBits();
	EVT WideVT = EVT::getVectorVT(*DAG.getContext(), EltVT, WideNumElts);

	unsigned NumConcat = WideNumElts / VecVT.getVectorNumElements();
	SmallVector<SDValue, 16> Ops(NumConcat);
	Ops[0] = Vec;
	SDValue UndefVec = DAG.getUNDEF(VecVT);
	for (unsigned i = 1; i < NumConcat; ++i)
	Ops[i] = UndefVec;

	return DAG.getNode(ISD::CONCAT_VECTORS, dl, WideVT, Ops);
	}

	SDValue PPCTargetLowering::LowerINT_TO_FPVector(SDValue Op, SelectionDAG &DAG,
	const SDLoc &dl) const {
	bool IsStrict = Op->isStrictFPOpcode();
	unsigned Opc = Op.getOpcode();
	SDValue Src = Op.getOperand(IsStrict ? 1 : 0);
	assert((Opc == ISD::UINT_TO_FP \|\| Opc == ISD::SINT_TO_FP \|\|
	Opc == ISD::STRICT_UINT_TO_FP \|\| Opc == ISD::STRICT_SINT_TO_FP) &&
	"Unexpected conversion type");
	assert((Op.getValueType() == MVT::v2f64 \|\| Op.getValueType() == MVT::v4f32) &&
	"Supports conversions to v2f64/v4f32 only.");

	// TODO: Any other flags to propagate?
	SDNodeFlags Flags;
	Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());

	bool SignedConv = Opc == ISD::SINT_TO_FP \|\| Opc == ISD::STRICT_SINT_TO_FP;
	bool FourEltRes = Op.getValueType() == MVT::v4f32;

	SDValue Wide = widenVec(DAG, Src, dl);
	EVT WideVT = Wide.getValueType();
	unsigned WideNumElts = WideVT.getVectorNumElements();
	MVT IntermediateVT = FourEltRes ? MVT::v4i32 : MVT::v2i64;

	SmallVector<int, 16> ShuffV;
	for (unsigned i = 0; i < WideNumElts; ++i)
	ShuffV.push_back(i + WideNumElts);

	int Stride = FourEltRes ? WideNumElts / 4 : WideNumElts / 2;
	int SaveElts = FourEltRes ? 4 : 2;
	if (Subtarget.isLittleEndian())
	for (int i = 0; i < SaveElts; i++)
	ShuffV[i * Stride] = i;
	else
	for (int i = 1; i <= SaveElts; i++)
	ShuffV[i * Stride - 1] = i - 1;

	SDValue ShuffleSrc2 =
	SignedConv ? DAG.getUNDEF(WideVT) : DAG.getConstant(0, dl, WideVT);
	SDValue Arrange = DAG.getVectorShuffle(WideVT, dl, Wide, ShuffleSrc2, ShuffV);

	SDValue Extend;
	if (SignedConv) {
	Arrange = DAG.getBitcast(IntermediateVT, Arrange);
	EVT ExtVT = Src.getValueType();
	if (Subtarget.hasP9Altivec())
	ExtVT = EVT::getVectorVT(*DAG.getContext(), WideVT.getVectorElementType(),
	IntermediateVT.getVectorNumElements());

	Extend = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, IntermediateVT, Arrange,
	DAG.getValueType(ExtVT));
	} else
	Extend = DAG.getNode(ISD::BITCAST, dl, IntermediateVT, Arrange);

	if (IsStrict)
	return DAG.getNode(Opc, dl, DAG.getVTList(Op.getValueType(), MVT::Other),
	{Op.getOperand(0), Extend}, Flags);

	return DAG.getNode(Opc, dl, Op.getValueType(), Extend);
	}

	SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op,
	SelectionDAG &DAG) const {
	SDLoc dl(Op);
	bool IsSigned = Op.getOpcode() == ISD::SINT_TO_FP \|\|
	Op.getOpcode() == ISD::STRICT_SINT_TO_FP;
	bool IsStrict = Op->isStrictFPOpcode();
	SDValue Src = Op.getOperand(IsStrict ? 1 : 0);
	SDValue Chain = IsStrict ? Op.getOperand(0) : DAG.getEntryNode();

	// TODO: Any other flags to propagate?
	SDNodeFlags Flags;
	Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());

	EVT InVT = Src.getValueType();
	EVT OutVT = Op.getValueType();
	if (OutVT.isVector() && OutVT.isFloatingPoint() &&
	isOperationCustom(Op.getOpcode(), InVT))
	return LowerINT_TO_FPVector(Op, DAG, dl);

	// Conversions to f128 are legal.
	if (Op.getValueType() == MVT::f128)
	return Subtarget.hasP9Vector() ? Op : SDValue();

	// Don't handle ppc_fp128 here; let it be lowered to a libcall.
	if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64)
	return SDValue();

	if (Src.getValueType() == MVT::i1) {
	SDValue Sel = DAG.getNode(ISD::SELECT, dl, Op.getValueType(), Src,
	DAG.getConstantFP(1.0, dl, Op.getValueType()),
	DAG.getConstantFP(0.0, dl, Op.getValueType()));
	if (IsStrict)
	return DAG.getMergeValues({Sel, Chain}, dl);
	else
	return Sel;
	}

	// If we have direct moves, we can do all the conversion, skip the store/load
	// however, without FPCVT we can't do most conversions.
	if (Subtarget.hasDirectMove() && directMoveIsProfitable(Op) &&
	Subtarget.isPPC64() && Subtarget.hasFPCVT())
	return LowerINT_TO_FPDirectMove(Op, DAG, dl);

	assert((IsSigned \|\| Subtarget.hasFPCVT()) &&
	"UINT_TO_FP is supported only with FPCVT");

	if (Src.getValueType() == MVT::i64) {
	SDValue SINT = Src;
	// When converting to single-precision, we actually need to convert
	// to double-precision first and then round to single-precision.
	// To avoid double-rounding effects during that operation, we have
	// to prepare the input operand. Bits that might be truncated when
	// converting to double-precision are replaced by a bit that won't
	// be lost at this stage, but is below the single-precision rounding
	// position.
	//
	// However, if -enable-unsafe-fp-math is in effect, accept double
	// rounding to avoid the extra overhead.
	if (Op.getValueType() == MVT::f32 &&
	!Subtarget.hasFPCVT() &&
	!DAG.getTarget().Options.UnsafeFPMath) {

	// Twiddle input to make sure the low 11 bits are zero. (If this
	// is the case, we are guaranteed the value will fit into the 53 bit
	// mantissa of an IEEE double-precision value without rounding.)
	// If any of those low 11 bits were not zero originally, make sure
	// bit 12 (value 2048) is set instead, so that the final rounding
	// to single-precision gets the correct result.
	SDValue Round = DAG.getNode(ISD::AND, dl, MVT::i64,
	SINT, DAG.getConstant(2047, dl, MVT::i64));
	Round = DAG.getNode(ISD::ADD, dl, MVT::i64,
	Round, DAG.getConstant(2047, dl, MVT::i64));
	Round = DAG.getNode(ISD::OR, dl, MVT::i64, Round, SINT);
	Round = DAG.getNode(ISD::AND, dl, MVT::i64,
	Round, DAG.getConstant(-2048, dl, MVT::i64));

	// However, we cannot use that value unconditionally: if the magnitude
	// of the input value is small, the bit-twiddling we did above might
	// end up visibly changing the output. Fortunately, in that case, we
	// don't need to twiddle bits since the original input will convert
	// exactly to double-precision floating-point already. Therefore,
	// construct a conditional to use the original value if the top 11
	// bits are all sign-bit copies, and use the rounded value computed
	// above otherwise.
	SDValue Cond = DAG.getNode(ISD::SRA, dl, MVT::i64,
	SINT, DAG.getConstant(53, dl, MVT::i32));
	Cond = DAG.getNode(ISD::ADD, dl, MVT::i64,
	Cond, DAG.getConstant(1, dl, MVT::i64));
	Cond = DAG.getSetCC(
	dl,
	getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), MVT::i64),
	Cond, DAG.getConstant(1, dl, MVT::i64), ISD::SETUGT);

	SINT = DAG.getNode(ISD::SELECT, dl, MVT::i64, Cond, Round, SINT);
	}

	ReuseLoadInfo RLI;
	SDValue Bits;

	MachineFunction &MF = DAG.getMachineFunction();
	if (canReuseLoadAddress(SINT, MVT::i64, RLI, DAG)) {
	Bits = DAG.getLoad(MVT::f64, dl, RLI.Chain, RLI.Ptr, RLI.MPI,
	RLI.Alignment, RLI.MMOFlags(), RLI.AAInfo, RLI.Ranges);
	spliceIntoChain(RLI.ResChain, Bits.getValue(1), DAG);
	} else if (Subtarget.hasLFIWAX() &&
	canReuseLoadAddress(SINT, MVT::i32, RLI, DAG, ISD::SEXTLOAD)) {
	MachineMemOperand *MMO =
	MF.getMachineMemOperand(RLI.MPI, MachineMemOperand::MOLoad, 4,
	RLI.Alignment, RLI.AAInfo, RLI.Ranges);
	SDValue Ops[] = { RLI.Chain, RLI.Ptr };
	Bits = DAG.getMemIntrinsicNode(PPCISD::LFIWAX, dl,
	DAG.getVTList(MVT::f64, MVT::Other),
	Ops, MVT::i32, MMO);
	spliceIntoChain(RLI.ResChain, Bits.getValue(1), DAG);
	} else if (Subtarget.hasFPCVT() &&
	canReuseLoadAddress(SINT, MVT::i32, RLI, DAG, ISD::ZEXTLOAD)) {
	MachineMemOperand *MMO =
	MF.getMachineMemOperand(RLI.MPI, MachineMemOperand::MOLoad, 4,
	RLI.Alignment, RLI.AAInfo, RLI.Ranges);
	SDValue Ops[] = { RLI.Chain, RLI.Ptr };
	Bits = DAG.getMemIntrinsicNode(PPCISD::LFIWZX, dl,
	DAG.getVTList(MVT::f64, MVT::Other),
	Ops, MVT::i32, MMO);
	spliceIntoChain(RLI.ResChain, Bits.getValue(1), DAG);
	} else if (((Subtarget.hasLFIWAX() &&
	SINT.getOpcode() == ISD::SIGN_EXTEND) \|\|
	(Subtarget.hasFPCVT() &&
	SINT.getOpcode() == ISD::ZERO_EXTEND)) &&
	SINT.getOperand(0).getValueType() == MVT::i32) {
	MachineFrameInfo &MFI = MF.getFrameInfo();
	EVT PtrVT = getPointerTy(DAG.getDataLayout());

	int FrameIdx = MFI.CreateStackObject(4, Align(4), false);
	SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);

	SDValue Store = DAG.getStore(Chain, dl, SINT.getOperand(0), FIdx,
	MachinePointerInfo::getFixedStack(
	DAG.getMachineFunction(), FrameIdx));
	Chain = Store;

	assert(cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32 &&
	"Expected an i32 store");

	RLI.Ptr = FIdx;
	RLI.Chain = Chain;
	RLI.MPI =
	MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx);
	RLI.Alignment = Align(4);

	MachineMemOperand *MMO =
	MF.getMachineMemOperand(RLI.MPI, MachineMemOperand::MOLoad, 4,
	RLI.Alignment, RLI.AAInfo, RLI.Ranges);
	SDValue Ops[] = { RLI.Chain, RLI.Ptr };
	Bits = DAG.getMemIntrinsicNode(SINT.getOpcode() == ISD::ZERO_EXTEND ?
	PPCISD::LFIWZX : PPCISD::LFIWAX,
	dl, DAG.getVTList(MVT::f64, MVT::Other),
	Ops, MVT::i32, MMO);
	Chain = Bits.getValue(1);
	} else
	Bits = DAG.getNode(ISD::BITCAST, dl, MVT::f64, SINT);

	SDValue FP = convertIntToFP(Op, Bits, DAG, Subtarget, Chain);
	if (IsStrict)
	Chain = FP.getValue(1);

	if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) {
	if (IsStrict)
	FP = DAG.getNode(ISD::STRICT_FP_ROUND, dl,
	DAG.getVTList(MVT::f32, MVT::Other),
	{Chain, FP, DAG.getIntPtrConstant(0, dl)}, Flags);
	else
	FP = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, FP,
	DAG.getIntPtrConstant(0, dl, /isTarget=/true));
	}
	return FP;
	}

	assert(Src.getValueType() == MVT::i32 &&
	"Unhandled INT_TO_FP type in custom expander!");
	// Since we only generate this in 64-bit mode, we can take advantage of
	// 64-bit registers. In particular, sign extend the input value into the
	// 64-bit register with extsw, store the WHOLE 64-bit value into the stack
	// then lfd it and fcfid it.
	MachineFunction &MF = DAG.getMachineFunction();
	MachineFrameInfo &MFI = MF.getFrameInfo();
	EVT PtrVT = getPointerTy(MF.getDataLayout());

	SDValue Ld;
	if (Subtarget.hasLFIWAX() \|\| Subtarget.hasFPCVT()) {
	ReuseLoadInfo RLI;
	bool ReusingLoad;
	if (!(ReusingLoad = canReuseLoadAddress(Src, MVT::i32, RLI, DAG))) {
	int FrameIdx = MFI.CreateStackObject(4, Align(4), false);
	SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);

	SDValue Store = DAG.getStore(Chain, dl, Src, FIdx,
	MachinePointerInfo::getFixedStack(
	DAG.getMachineFunction(), FrameIdx));
	Chain = Store;

	assert(cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32 &&
	"Expected an i32 store");

	RLI.Ptr = FIdx;
	RLI.Chain = Chain;
	RLI.MPI =
	MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx);
	RLI.Alignment = Align(4);
	}

	MachineMemOperand *MMO =
	MF.getMachineMemOperand(RLI.MPI, MachineMemOperand::MOLoad, 4,
	RLI.Alignment, RLI.AAInfo, RLI.Ranges);
	SDValue Ops[] = { RLI.Chain, RLI.Ptr };
	Ld = DAG.getMemIntrinsicNode(IsSigned ? PPCISD::LFIWAX : PPCISD::LFIWZX, dl,
	DAG.getVTList(MVT::f64, MVT::Other), Ops,
	MVT::i32, MMO);
	Chain = Ld.getValue(1);
	if (ReusingLoad)
	spliceIntoChain(RLI.ResChain, Ld.getValue(1), DAG);
	} else {
	assert(Subtarget.isPPC64() &&
	"i32->FP without LFIWAX supported only on PPC64");

	int FrameIdx = MFI.CreateStackObject(8, Align(8), false);
	SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);

	SDValue Ext64 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i64, Src);

	// STD the extended value into the stack slot.
	SDValue Store = DAG.getStore(
	Chain, dl, Ext64, FIdx,
	MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx));
	Chain = Store;

	// Load the value as a double.
	Ld = DAG.getLoad(
	MVT::f64, dl, Chain, FIdx,
	MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx));
	Chain = Ld.getValue(1);
	}

	// FCFID it and return it.
	SDValue FP = convertIntToFP(Op, Ld, DAG, Subtarget, Chain);
	if (IsStrict)
	Chain = FP.getValue(1);
	if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) {
	if (IsStrict)
	FP = DAG.getNode(ISD::STRICT_FP_ROUND, dl,
	DAG.getVTList(MVT::f32, MVT::Other),
	{Chain, FP, DAG.getIntPtrConstant(0, dl)}, Flags);
	else
	FP = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, FP,
	DAG.getIntPtrConstant(0, dl, /isTarget=/true));
	}
	return FP;
	}

	SDValue PPCTargetLowering::LowerGET_ROUNDING(SDValue Op,
	SelectionDAG &DAG) const {
	SDLoc dl(Op);
	/*
	The rounding mode is in bits 30:31 of FPSR, and has the following
	settings:
	00 Round to nearest
	01 Round to 0
	10 Round to +inf
	11 Round to -inf

	GET_ROUNDING, on the other hand, expects the following:
	-1 Undefined
	0 Round to 0
	1 Round to nearest
	2 Round to +inf
	3 Round to -inf

	To perform the conversion, we do:
	((FPSCR & 0x3) ^ ((~FPSCR & 0x3) >> 1))
	*/

	MachineFunction &MF = DAG.getMachineFunction();
	EVT VT = Op.getValueType();
	EVT PtrVT = getPointerTy(MF.getDataLayout());

	// Save FP Control Word to register
	SDValue Chain = Op.getOperand(0);
	SDValue MFFS = DAG.getNode(PPCISD::MFFS, dl, {MVT::f64, MVT::Other}, Chain);
	Chain = MFFS.getValue(1);

	SDValue CWD;
	if (isTypeLegal(MVT::i64)) {
	CWD = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32,
	DAG.getNode(ISD::BITCAST, dl, MVT::i64, MFFS));
	} else {
	// Save FP register to stack slot
	int SSFI = MF.getFrameInfo().CreateStackObject(8, Align(8), false);
	SDValue StackSlot = DAG.getFrameIndex(SSFI, PtrVT);
	Chain = DAG.getStore(Chain, dl, MFFS, StackSlot, MachinePointerInfo());

	// Load FP Control Word from low 32 bits of stack slot.
	assert(hasBigEndianPartOrdering(MVT::i64, MF.getDataLayout()) &&
	"Stack slot adjustment is valid only on big endian subtargets!");
	SDValue Four = DAG.getConstant(4, dl, PtrVT);
	SDValue Addr = DAG.getNode(ISD::ADD, dl, PtrVT, StackSlot, Four);
	CWD = DAG.getLoad(MVT::i32, dl, Chain, Addr, MachinePointerInfo());
	Chain = CWD.getValue(1);
	}

	// Transform as necessary
	SDValue CWD1 =
	DAG.getNode(ISD::AND, dl, MVT::i32,
	CWD, DAG.getConstant(3, dl, MVT::i32));
	SDValue CWD2 =
	DAG.getNode(ISD::SRL, dl, MVT::i32,
	DAG.getNode(ISD::AND, dl, MVT::i32,
	DAG.getNode(ISD::XOR, dl, MVT::i32,
	CWD, DAG.getConstant(3, dl, MVT::i32)),
	DAG.getConstant(3, dl, MVT::i32)),
	DAG.getConstant(1, dl, MVT::i32));

	SDValue RetVal =
	DAG.getNode(ISD::XOR, dl, MVT::i32, CWD1, CWD2);

	RetVal =
	DAG.getNode((VT.getSizeInBits() < 16 ? ISD::TRUNCATE : ISD::ZERO_EXTEND),
	dl, VT, RetVal);

	return DAG.getMergeValues({RetVal, Chain}, dl);
	}

	SDValue PPCTargetLowering::LowerSHL_PARTS(SDValue Op, SelectionDAG &DAG) const {
	EVT VT = Op.getValueType();
	unsigned BitWidth = VT.getSizeInBits();
	SDLoc dl(Op);
	assert(Op.getNumOperands() == 3 &&
	VT == Op.getOperand(1).getValueType() &&
	"Unexpected SHL!");

	// Expand into a bunch of logical ops. Note that these ops
	// depend on the PPC behavior for oversized shift amounts.
	SDValue Lo = Op.getOperand(0);
	SDValue Hi = Op.getOperand(1);
	SDValue Amt = Op.getOperand(2);
	EVT AmtVT = Amt.getValueType();

	SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
	DAG.getConstant(BitWidth, dl, AmtVT), Amt);
	SDValue Tmp2 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Amt);
	SDValue Tmp3 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Tmp1);
	SDValue Tmp4 = DAG.getNode(ISD::OR , dl, VT, Tmp2, Tmp3);
	SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
	DAG.getConstant(-BitWidth, dl, AmtVT));
	SDValue Tmp6 = DAG.getNode(PPCISD::SHL, dl, VT, Lo, Tmp5);
	SDValue OutHi = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp6);
	SDValue OutLo = DAG.getNode(PPCISD::SHL, dl, VT, Lo, Amt);
	SDValue OutOps[] = { OutLo, OutHi };
	return DAG.getMergeValues(OutOps, dl);
	}

	SDValue PPCTargetLowering::LowerSRL_PARTS(SDValue Op, SelectionDAG &DAG) const {
	EVT VT = Op.getValueType();
	SDLoc dl(Op);
	unsigned BitWidth = VT.getSizeInBits();
	assert(Op.getNumOperands() == 3 &&
	VT == Op.getOperand(1).getValueType() &&
	"Unexpected SRL!");

	// Expand into a bunch of logical ops. Note that these ops
	// depend on the PPC behavior for oversized shift amounts.
	SDValue Lo = Op.getOperand(0);
	SDValue Hi = Op.getOperand(1);
	SDValue Amt = Op.getOperand(2);
	EVT AmtVT = Amt.getValueType();

	SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
	DAG.getConstant(BitWidth, dl, AmtVT), Amt);
	SDValue Tmp2 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Amt);
	SDValue Tmp3 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Tmp1);
	SDValue Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
	SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
	DAG.getConstant(-BitWidth, dl, AmtVT));
	SDValue Tmp6 = DAG.getNode(PPCISD::SRL, dl, VT, Hi, Tmp5);
	SDValue OutLo = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp6);
	SDValue OutHi = DAG.getNode(PPCISD::SRL, dl, VT, Hi, Amt);
	SDValue OutOps[] = { OutLo, OutHi };
	return DAG.getMergeValues(OutOps, dl);
	}

	SDValue PPCTargetLowering::LowerSRA_PARTS(SDValue Op, SelectionDAG &DAG) const {
	SDLoc dl(Op);
	EVT VT = Op.getValueType();
	unsigned BitWidth = VT.getSizeInBits();
	assert(Op.getNumOperands() == 3 &&
	VT == Op.getOperand(1).getValueType() &&
	"Unexpected SRA!");

	// Expand into a bunch of logical ops, followed by a select_cc.
	SDValue Lo = Op.getOperand(0);
	SDValue Hi = Op.getOperand(1);
	SDValue Amt = Op.getOperand(2);
	EVT AmtVT = Amt.getValueType();

	SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
	DAG.getConstant(BitWidth, dl, AmtVT), Amt);
	SDValue Tmp2 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Amt);
	SDValue Tmp3 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Tmp1);
	SDValue Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
	SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
	DAG.getConstant(-BitWidth, dl, AmtVT));
	SDValue Tmp6 = DAG.getNode(PPCISD::SRA, dl, VT, Hi, Tmp5);
	SDValue OutHi = DAG.getNode(PPCISD::SRA, dl, VT, Hi, Amt);
	SDValue OutLo = DAG.getSelectCC(dl, Tmp5, DAG.getConstant(0, dl, AmtVT),
	Tmp4, Tmp6, ISD::SETLE);
	SDValue OutOps[] = { OutLo, OutHi };
	return DAG.getMergeValues(OutOps, dl);
	}

	SDValue PPCTargetLowering::LowerFunnelShift(SDValue Op,
	SelectionDAG &DAG) const {
	SDLoc dl(Op);
	EVT VT = Op.getValueType();
	unsigned BitWidth = VT.getSizeInBits();

	bool IsFSHL = Op.getOpcode() == ISD::FSHL;
	SDValue X = Op.getOperand(0);
	SDValue Y = Op.getOperand(1);
	SDValue Z = Op.getOperand(2);
	EVT AmtVT = Z.getValueType();

	// fshl: (X << (Z % BW)) \| (Y >> (BW - (Z % BW)))
	// fshr: (X << (BW - (Z % BW))) \| (Y >> (Z % BW))
	// This is simpler than TargetLowering::expandFunnelShift because we can rely
	// on PowerPC shift by BW being well defined.
	Z = DAG.getNode(ISD::AND, dl, AmtVT, Z,
	DAG.getConstant(BitWidth - 1, dl, AmtVT));
	SDValue SubZ =
	DAG.getNode(ISD::SUB, dl, AmtVT, DAG.getConstant(BitWidth, dl, AmtVT), Z);
	X = DAG.getNode(PPCISD::SHL, dl, VT, X, IsFSHL ? Z : SubZ);
	Y = DAG.getNode(PPCISD::SRL, dl, VT, Y, IsFSHL ? SubZ : Z);
	return DAG.getNode(ISD::OR, dl, VT, X, Y);
	}

	//===----------------------------------------------------------------------===//
	// Vector related lowering.
	//

	/// getCanonicalConstSplat - Build a canonical splat immediate of Val with an
	/// element size of SplatSize. Cast the result to VT.
	static SDValue getCanonicalConstSplat(uint64_t Val, unsigned SplatSize, EVT VT,
	SelectionDAG &DAG, const SDLoc &dl) {
	static const MVT VTys[] = { // canonical VT to use for each size.
	MVT::v16i8, MVT::v8i16, MVT::Other, MVT::v4i32
	};

	EVT ReqVT = VT != MVT::Other ? VT : VTys[SplatSize-1];

	// For a splat with all ones, turn it to vspltisb 0xFF to canonicalize.
	if (Val == ((1LLU << (SplatSize * 8)) - 1)) {
	SplatSize = 1;
	Val = 0xFF;
	}

	EVT CanonicalVT = VTys[SplatSize-1];

	// Build a canonical splat for this value.
	return DAG.getBitcast(ReqVT, DAG.getConstant(Val, dl, CanonicalVT));
	}

	/// BuildIntrinsicOp - Return a unary operator intrinsic node with the
	/// specified intrinsic ID.
	static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op, SelectionDAG &DAG,
	const SDLoc &dl, EVT DestVT = MVT::Other) {
	if (DestVT == MVT::Other) DestVT = Op.getValueType();
	return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
	DAG.getConstant(IID, dl, MVT::i32), Op);
	}

	/// BuildIntrinsicOp - Return a binary operator intrinsic node with the
	/// specified intrinsic ID.
	static SDValue BuildIntrinsicOp(unsigned IID, SDValue LHS, SDValue RHS,
	SelectionDAG &DAG, const SDLoc &dl,
	EVT DestVT = MVT::Other) {
	if (DestVT == MVT::Other) DestVT = LHS.getValueType();
	return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
	DAG.getConstant(IID, dl, MVT::i32), LHS, RHS);
	}

	/// BuildIntrinsicOp - Return a ternary operator intrinsic node with the
	/// specified intrinsic ID.
	static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op0, SDValue Op1,
	SDValue Op2, SelectionDAG &DAG, const SDLoc &dl,
	EVT DestVT = MVT::Other) {
	if (DestVT == MVT::Other) DestVT = Op0.getValueType();
	return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
	DAG.getConstant(IID, dl, MVT::i32), Op0, Op1, Op2);
	}

	/// BuildVSLDOI - Return a VECTOR_SHUFFLE that is a vsldoi of the specified
	/// amount. The result has the specified value type.
	static SDValue BuildVSLDOI(SDValue LHS, SDValue RHS, unsigned Amt, EVT VT,
	SelectionDAG &DAG, const SDLoc &dl) {
	// Force LHS/RHS to be the right type.
	LHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, LHS);
	RHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, RHS);

	int Ops[16];
	for (unsigned i = 0; i != 16; ++i)
	Ops[i] = i + Amt;
	SDValue T = DAG.getVectorShuffle(MVT::v16i8, dl, LHS, RHS, Ops);
	return DAG.getNode(ISD::BITCAST, dl, VT, T);
	}

	/// Do we have an efficient pattern in a .td file for this node?
	///
	/// \param V - pointer to the BuildVectorSDNode being matched
	/// \param HasDirectMove - does this subtarget have VSR <-> GPR direct moves?
	///
	/// There are some patterns where it is beneficial to keep a BUILD_VECTOR
	/// node as a BUILD_VECTOR node rather than expanding it. The patterns where
	/// the opposite is true (expansion is beneficial) are:
	/// - The node builds a vector out of integers that are not 32 or 64-bits
	/// - The node builds a vector out of constants
	/// - The node is a "load-and-splat"
	/// In all other cases, we will choose to keep the BUILD_VECTOR.
	static bool haveEfficientBuildVectorPattern(BuildVectorSDNode *V,
	bool HasDirectMove,
	bool HasP8Vector) {
	EVT VecVT = V->getValueType(0);
	bool RightType = VecVT == MVT::v2f64 \|\|
	(HasP8Vector && VecVT == MVT::v4f32) \|\|
	(HasDirectMove && (VecVT == MVT::v2i64 \|\| VecVT == MVT::v4i32));
	if (!RightType)
	return false;

	bool IsSplat = true;
	bool IsLoad = false;
	SDValue Op0 = V->getOperand(0);

	// This function is called in a block that confirms the node is not a constant
	// splat. So a constant BUILD_VECTOR here means the vector is built out of
	// different constants.
	if (V->isConstant())
	return false;
	for (int i = 0, e = V->getNumOperands(); i < e; ++i) {
	if (V->getOperand(i).isUndef())
	return false;
	// We want to expand nodes that represent load-and-splat even if the
	// loaded value is a floating point truncation or conversion to int.
	if (V->getOperand(i).getOpcode() == ISD::LOAD \|\|
	(V->getOperand(i).getOpcode() == ISD::FP_ROUND &&
	V->getOperand(i).getOperand(0).getOpcode() == ISD::LOAD) \|\|
	(V->getOperand(i).getOpcode() == ISD::FP_TO_SINT &&
	V->getOperand(i).getOperand(0).getOpcode() == ISD::LOAD) \|\|
	(V->getOperand(i).getOpcode() == ISD::FP_TO_UINT &&
	V->getOperand(i).getOperand(0).getOpcode() == ISD::LOAD))
	IsLoad = true;
	// If the operands are different or the input is not a load and has more
	// uses than just this BV node, then it isn't a splat.
	if (V->getOperand(i) != Op0 \|\|
	(!IsLoad && !V->isOnlyUserOf(V->getOperand(i).getNode())))
	IsSplat = false;
	}
	return !(IsSplat && IsLoad);
	}

	// Lower BITCAST(f128, (build_pair i64, i64)) to BUILD_FP128.
	SDValue PPCTargetLowering::LowerBITCAST(SDValue Op, SelectionDAG &DAG) const {

	SDLoc dl(Op);
	SDValue Op0 = Op->getOperand(0);

	+ if (!Subtarget.isPPC64() \|\| (Op0.getOpcode() != ISD::BUILD_PAIR) \|\|
	+ (Op.getValueType() != MVT::f128))
	+ return SDValue();
	+
	SDValue Lo = Op0.getOperand(0);
	SDValue Hi = Op0.getOperand(1);
	-
	- if ((Op.getValueType() != MVT::f128) \|\|
	- (Op0.getOpcode() != ISD::BUILD_PAIR) \|\| (Lo.getValueType() != MVT::i64) \|\|
	- (Hi.getValueType() != MVT::i64) \|\| !Subtarget.isPPC64())
	+ if ((Lo.getValueType() != MVT::i64) \|\| (Hi.getValueType() != MVT::i64))
	return SDValue();

	if (!Subtarget.isLittleEndian())
	std::swap(Lo, Hi);

	return DAG.getNode(PPCISD::BUILD_FP128, dl, MVT::f128, Lo, Hi);
	}

	static const SDValue *getNormalLoadInput(const SDValue &Op, bool &IsPermuted) {
	const SDValue *InputLoad = &Op;
	while (InputLoad->getOpcode() == ISD::BITCAST)
	InputLoad = &InputLoad->getOperand(0);
	if (InputLoad->getOpcode() == ISD::SCALAR_TO_VECTOR \|\|
	InputLoad->getOpcode() == PPCISD::SCALAR_TO_VECTOR_PERMUTED) {
	IsPermuted = InputLoad->getOpcode() == PPCISD::SCALAR_TO_VECTOR_PERMUTED;
	InputLoad = &InputLoad->getOperand(0);
	}
	if (InputLoad->getOpcode() != ISD::LOAD)
	return nullptr;
	LoadSDNode LD = cast<LoadSDNode>(InputLoad);
	return ISD::isNormalLoad(LD) ? InputLoad : nullptr;
	}

	// Convert the argument APFloat to a single precision APFloat if there is no
	// loss in information during the conversion to single precision APFloat and the
	// resulting number is not a denormal number. Return true if successful.
	bool llvm::convertToNonDenormSingle(APFloat &ArgAPFloat) {
	APFloat APFloatToConvert = ArgAPFloat;
	bool LosesInfo = true;
	APFloatToConvert.convert(APFloat::IEEEsingle(), APFloat::rmNearestTiesToEven,
	&LosesInfo);
	bool Success = (!LosesInfo && !APFloatToConvert.isDenormal());
	if (Success)
	ArgAPFloat = APFloatToConvert;
	return Success;
	}

	// Bitcast the argument APInt to a double and convert it to a single precision
	// APFloat, bitcast the APFloat to an APInt and assign it to the original
	// argument if there is no loss in information during the conversion from
	// double to single precision APFloat and the resulting number is not a denormal
	// number. Return true if successful.
	bool llvm::convertToNonDenormSingle(APInt &ArgAPInt) {
	double DpValue = ArgAPInt.bitsToDouble();
	APFloat APFloatDp(DpValue);
	bool Success = convertToNonDenormSingle(APFloatDp);
	if (Success)
	ArgAPInt = APFloatDp.bitcastToAPInt();
	return Success;
	}

	// Nondestructive check for convertTonNonDenormSingle.
	bool llvm::checkConvertToNonDenormSingle(APFloat &ArgAPFloat) {
	// Only convert if it loses info, since XXSPLTIDP should
	// handle the other case.
	APFloat APFloatToConvert = ArgAPFloat;
	bool LosesInfo = true;
	APFloatToConvert.convert(APFloat::IEEEsingle(), APFloat::rmNearestTiesToEven,
	&LosesInfo);

	return (!LosesInfo && !APFloatToConvert.isDenormal());
	}

	static bool isValidSplatLoad(const PPCSubtarget &Subtarget, const SDValue &Op,
	unsigned &Opcode) {
	LoadSDNode *InputNode = dyn_cast<LoadSDNode>(Op.getOperand(0));
	if (!InputNode \|\| !Subtarget.hasVSX() \|\| !ISD::isUNINDEXEDLoad(InputNode))
	return false;

	EVT Ty = Op->getValueType(0);
	// For v2f64, v4f32 and v4i32 types, we require the load to be non-extending
	// as we cannot handle extending loads for these types.
	if ((Ty == MVT::v2f64 \|\| Ty == MVT::v4f32 \|\| Ty == MVT::v4i32) &&
	ISD::isNON_EXTLoad(InputNode))
	return true;

	EVT MemVT = InputNode->getMemoryVT();
	// For v8i16 and v16i8 types, extending loads can be handled as long as the
	// memory VT is the same vector element VT type.
	// The loads feeding into the v8i16 and v16i8 types will be extending because
	// scalar i8/i16 are not legal types.
	if ((Ty == MVT::v8i16 \|\| Ty == MVT::v16i8) && ISD::isEXTLoad(InputNode) &&
	(MemVT == Ty.getVectorElementType()))
	return true;

	if (Ty == MVT::v2i64) {
	// Check the extend type, when the input type is i32, and the output vector
	// type is v2i64.
	if (MemVT == MVT::i32) {
	if (ISD::isZEXTLoad(InputNode))
	Opcode = PPCISD::ZEXT_LD_SPLAT;
	if (ISD::isSEXTLoad(InputNode))
	Opcode = PPCISD::SEXT_LD_SPLAT;
	}
	return true;
	}
	return false;
	}

	// If this is a case we can't handle, return null and let the default
	// expansion code take care of it. If we CAN select this case, and if it
	// selects to a single instruction, return Op. Otherwise, if we can codegen
	// this case more efficiently than a constant pool load, lower it to the
	// sequence of ops that should be used.
	SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
	SelectionDAG &DAG) const {
	SDLoc dl(Op);
	BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(Op.getNode());
	assert(BVN && "Expected a BuildVectorSDNode in LowerBUILD_VECTOR");

	// Check if this is a splat of a constant value.
	APInt APSplatBits, APSplatUndef;
	unsigned SplatBitSize;
	bool HasAnyUndefs;
	bool BVNIsConstantSplat =
	BVN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize,
	HasAnyUndefs, 0, !Subtarget.isLittleEndian());

	// If it is a splat of a double, check if we can shrink it to a 32 bit
	// non-denormal float which when converted back to double gives us the same
	// double. This is to exploit the XXSPLTIDP instruction.
	// If we lose precision, we use XXSPLTI32DX.
	if (BVNIsConstantSplat && (SplatBitSize == 64) &&
	Subtarget.hasPrefixInstrs() && Subtarget.hasP10Vector()) {
	// Check the type first to short-circuit so we don't modify APSplatBits if
	// this block isn't executed.
	if ((Op->getValueType(0) == MVT::v2f64) &&
	convertToNonDenormSingle(APSplatBits)) {
	SDValue SplatNode = DAG.getNode(
	PPCISD::XXSPLTI_SP_TO_DP, dl, MVT::v2f64,
	DAG.getTargetConstant(APSplatBits.getZExtValue(), dl, MVT::i32));
	return DAG.getBitcast(Op.getValueType(), SplatNode);
	} else {
	// We may lose precision, so we have to use XXSPLTI32DX.

	uint32_t Hi =
	(uint32_t)((APSplatBits.getZExtValue() & 0xFFFFFFFF00000000LL) >> 32);
	uint32_t Lo =
	(uint32_t)(APSplatBits.getZExtValue() & 0xFFFFFFFF);
	SDValue SplatNode = DAG.getUNDEF(MVT::v2i64);

	if (!Hi \|\| !Lo)
	// If either load is 0, then we should generate XXLXOR to set to 0.
	SplatNode = DAG.getTargetConstant(0, dl, MVT::v2i64);

	if (Hi)
	SplatNode = DAG.getNode(
	PPCISD::XXSPLTI32DX, dl, MVT::v2i64, SplatNode,
	DAG.getTargetConstant(0, dl, MVT::i32),
	DAG.getTargetConstant(Hi, dl, MVT::i32));

	if (Lo)
	SplatNode =
	DAG.getNode(PPCISD::XXSPLTI32DX, dl, MVT::v2i64, SplatNode,
	DAG.getTargetConstant(1, dl, MVT::i32),
	DAG.getTargetConstant(Lo, dl, MVT::i32));

	return DAG.getBitcast(Op.getValueType(), SplatNode);
	}
	}

	if (!BVNIsConstantSplat \|\| SplatBitSize > 32) {
	unsigned NewOpcode = PPCISD::LD_SPLAT;

	// Handle load-and-splat patterns as we have instructions that will do this
	// in one go.
	if (DAG.isSplatValue(Op, true) &&
	isValidSplatLoad(Subtarget, Op, NewOpcode)) {
	const SDValue *InputLoad = &Op.getOperand(0);
	LoadSDNode LD = cast<LoadSDNode>(InputLoad);

	// If the input load is an extending load, it will be an i32 -> i64
	// extending load and isValidSplatLoad() will update NewOpcode.
	unsigned MemorySize = LD->getMemoryVT().getScalarSizeInBits();
	unsigned ElementSize =
	MemorySize * ((NewOpcode == PPCISD::LD_SPLAT) ? 1 : 2);

	assert(((ElementSize == 2 * MemorySize)
	? (NewOpcode == PPCISD::ZEXT_LD_SPLAT \|\|
	NewOpcode == PPCISD::SEXT_LD_SPLAT)
	: (NewOpcode == PPCISD::LD_SPLAT)) &&
	"Unmatched element size and opcode!\n");

	// Checking for a single use of this load, we have to check for vector
	// width (128 bits) / ElementSize uses (since each operand of the
	// BUILD_VECTOR is a separate use of the value.
	unsigned NumUsesOfInputLD = 128 / ElementSize;
	for (SDValue BVInOp : Op->ops())
	if (BVInOp.isUndef())
	NumUsesOfInputLD--;

	// Exclude somes case where LD_SPLAT is worse than scalar_to_vector:
	// Below cases should also happen for "lfiwzx/lfiwax + LE target + index
	// 1" and "lxvrhx + BE target + index 7" and "lxvrbx + BE target + index
	// 15", but function IsValidSplatLoad() now will only return true when
	// the data at index 0 is not nullptr. So we will not get into trouble for
	// these cases.
	//
	// case 1 - lfiwzx/lfiwax
	// 1.1: load result is i32 and is sign/zero extend to i64;
	// 1.2: build a v2i64 vector type with above loaded value;
	// 1.3: the vector has only one value at index 0, others are all undef;
	// 1.4: on BE target, so that lfiwzx/lfiwax does not need any permute.
	if (NumUsesOfInputLD == 1 &&
	(Op->getValueType(0) == MVT::v2i64 && NewOpcode != PPCISD::LD_SPLAT &&
	!Subtarget.isLittleEndian() && Subtarget.hasVSX() &&
	Subtarget.hasLFIWAX()))
	return SDValue();

	// case 2 - lxvr[hb]x
	// 2.1: load result is at most i16;
	// 2.2: build a vector with above loaded value;
	// 2.3: the vector has only one value at index 0, others are all undef;
	// 2.4: on LE target, so that lxvr[hb]x does not need any permute.
	if (NumUsesOfInputLD == 1 && Subtarget.isLittleEndian() &&
	Subtarget.isISA3_1() && ElementSize <= 16)
	return SDValue();

	assert(NumUsesOfInputLD > 0 && "No uses of input LD of a build_vector?");
	if (InputLoad->getNode()->hasNUsesOfValue(NumUsesOfInputLD, 0) &&
	Subtarget.hasVSX()) {
	SDValue Ops[] = {
	LD->getChain(), // Chain
	LD->getBasePtr(), // Ptr
	DAG.getValueType(Op.getValueType()) // VT
	};
	SDValue LdSplt = DAG.getMemIntrinsicNode(
	NewOpcode, dl, DAG.getVTList(Op.getValueType(), MVT::Other), Ops,
	LD->getMemoryVT(), LD->getMemOperand());
	// Replace all uses of the output chain of the original load with the
	// output chain of the new load.
	DAG.ReplaceAllUsesOfValueWith(InputLoad->getValue(1),
	LdSplt.getValue(1));
	return LdSplt;
	}
	}

	// In 64BIT mode BUILD_VECTOR nodes that are not constant splats of up to
	// 32-bits can be lowered to VSX instructions under certain conditions.
	// Without VSX, there is no pattern more efficient than expanding the node.
	if (Subtarget.hasVSX() && Subtarget.isPPC64() &&
	haveEfficientBuildVectorPattern(BVN, Subtarget.hasDirectMove(),
	Subtarget.hasP8Vector()))
	return Op;
	return SDValue();
	}

	uint64_t SplatBits = APSplatBits.getZExtValue();
	uint64_t SplatUndef = APSplatUndef.getZExtValue();
	unsigned SplatSize = SplatBitSize / 8;

	// First, handle single instruction cases.

	// All zeros?
	if (SplatBits == 0) {
	// Canonicalize all zero vectors to be v4i32.
	if (Op.getValueType() != MVT::v4i32 \|\| HasAnyUndefs) {
	SDValue Z = DAG.getConstant(0, dl, MVT::v4i32);
	Op = DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Z);
	}
	return Op;
	}

	// We have XXSPLTIW for constant splats four bytes wide.
	// Given vector length is a multiple of 4, 2-byte splats can be replaced
	// with 4-byte splats. We replicate the SplatBits in case of 2-byte splat to
	// make a 4-byte splat element. For example: 2-byte splat of 0xABAB can be
	// turned into a 4-byte splat of 0xABABABAB.
	if (Subtarget.hasPrefixInstrs() && Subtarget.hasP10Vector() && SplatSize == 2)
	return getCanonicalConstSplat(SplatBits \| (SplatBits << 16), SplatSize * 2,
	Op.getValueType(), DAG, dl);

	if (Subtarget.hasPrefixInstrs() && Subtarget.hasP10Vector() && SplatSize == 4)
	return getCanonicalConstSplat(SplatBits, SplatSize, Op.getValueType(), DAG,
	dl);

	// We have XXSPLTIB for constant splats one byte wide.
	if (Subtarget.hasP9Vector() && SplatSize == 1)
	return getCanonicalConstSplat(SplatBits, SplatSize, Op.getValueType(), DAG,
	dl);

	// If the sign extended value is in the range [-16,15], use VSPLTI[bhw].
	int32_t SextVal= (int32_t(SplatBits << (32-SplatBitSize)) >>
	(32-SplatBitSize));
	if (SextVal >= -16 && SextVal <= 15)
	return getCanonicalConstSplat(SextVal, SplatSize, Op.getValueType(), DAG,
	dl);

	// Two instruction sequences.

	// If this value is in the range [-32,30] and is even, use:
	// VSPLTI[bhw](val/2) + VSPLTI[bhw](val/2)
	// If this value is in the range [17,31] and is odd, use:
	// VSPLTI[bhw](val-16) - VSPLTI[bhw](-16)
	// If this value is in the range [-31,-17] and is odd, use:
	// VSPLTI[bhw](val+16) + VSPLTI[bhw](-16)
	// Note the last two are three-instruction sequences.
	if (SextVal >= -32 && SextVal <= 31) {
	// To avoid having these optimizations undone by constant folding,
	// we convert to a pseudo that will be expanded later into one of
	// the above forms.
	SDValue Elt = DAG.getConstant(SextVal, dl, MVT::i32);
	EVT VT = (SplatSize == 1 ? MVT::v16i8 :
	(SplatSize == 2 ? MVT::v8i16 : MVT::v4i32));
	SDValue EltSize = DAG.getConstant(SplatSize, dl, MVT::i32);
	SDValue RetVal = DAG.getNode(PPCISD::VADD_SPLAT, dl, VT, Elt, EltSize);
	if (VT == Op.getValueType())
	return RetVal;
	else
	return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), RetVal);
	}

	// If this is 0x8000_0000 x 4, turn into vspltisw + vslw. If it is
	// 0x7FFF_FFFF x 4, turn it into not(0x8000_0000). This is important
	// for fneg/fabs.
	if (SplatSize == 4 && SplatBits == (0x7FFFFFFF&~SplatUndef)) {
	// Make -1 and vspltisw -1:
	SDValue OnesV = getCanonicalConstSplat(-1, 4, MVT::v4i32, DAG, dl);

	// Make the VSLW intrinsic, computing 0x8000_0000.
	SDValue Res = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, OnesV,
	OnesV, DAG, dl);

	// xor by OnesV to invert it.
	Res = DAG.getNode(ISD::XOR, dl, MVT::v4i32, Res, OnesV);
	return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
	}

	// Check to see if this is a wide variety of vsplti*, binop self cases.
	static const signed char SplatCsts[] = {
	-1, 1, -2, 2, -3, 3, -4, 4, -5, 5, -6, 6, -7, 7,
	-8, 8, -9, 9, -10, 10, -11, 11, -12, 12, -13, 13, 14, -14, 15, -15, -16
	};

	for (unsigned idx = 0; idx < std::size(SplatCsts); ++idx) {
	// Indirect through the SplatCsts array so that we favor 'vsplti -1' for
	// cases which are ambiguous (e.g. formation of 0x8000_0000). 'vsplti -1'
	int i = SplatCsts[idx];

	// Figure out what shift amount will be used by altivec if shifted by i in
	// this splat size.
	unsigned TypeShiftAmt = i & (SplatBitSize-1);

	// vsplti + shl self.
	if (SextVal == (int)((unsigned)i << TypeShiftAmt)) {
	SDValue Res = getCanonicalConstSplat(i, SplatSize, MVT::Other, DAG, dl);
	static const unsigned IIDs[] = { // Intrinsic to use for each size.
	Intrinsic::ppc_altivec_vslb, Intrinsic::ppc_altivec_vslh, 0,
	Intrinsic::ppc_altivec_vslw
	};
	Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
	return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
	}

	// vsplti + srl self.
	if (SextVal == (int)((unsigned)i >> TypeShiftAmt)) {
	SDValue Res = getCanonicalConstSplat(i, SplatSize, MVT::Other, DAG, dl);
	static const unsigned IIDs[] = { // Intrinsic to use for each size.
	Intrinsic::ppc_altivec_vsrb, Intrinsic::ppc_altivec_vsrh, 0,
	Intrinsic::ppc_altivec_vsrw
	};
	Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
	return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
	}

	// vsplti + rol self.
	if (SextVal == (int)(((unsigned)i << TypeShiftAmt) \|
	((unsigned)i >> (SplatBitSize-TypeShiftAmt)))) {
	SDValue Res = getCanonicalConstSplat(i, SplatSize, MVT::Other, DAG, dl);
	static const unsigned IIDs[] = { // Intrinsic to use for each size.
	Intrinsic::ppc_altivec_vrlb, Intrinsic::ppc_altivec_vrlh, 0,
	Intrinsic::ppc_altivec_vrlw
	};
	Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
	return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
	}

	// t = vsplti c, result = vsldoi t, t, 1
	if (SextVal == (int)(((unsigned)i << 8) \| (i < 0 ? 0xFF : 0))) {
	SDValue T = getCanonicalConstSplat(i, SplatSize, MVT::v16i8, DAG, dl);
	unsigned Amt = Subtarget.isLittleEndian() ? 15 : 1;
	return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl);
	}
	// t = vsplti c, result = vsldoi t, t, 2
	if (SextVal == (int)(((unsigned)i << 16) \| (i < 0 ? 0xFFFF : 0))) {
	SDValue T = getCanonicalConstSplat(i, SplatSize, MVT::v16i8, DAG, dl);
	unsigned Amt = Subtarget.isLittleEndian() ? 14 : 2;
	return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl);
	}
	// t = vsplti c, result = vsldoi t, t, 3
	if (SextVal == (int)(((unsigned)i << 24) \| (i < 0 ? 0xFFFFFF : 0))) {
	SDValue T = getCanonicalConstSplat(i, SplatSize, MVT::v16i8, DAG, dl);
	unsigned Amt = Subtarget.isLittleEndian() ? 13 : 3;
	return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl);
	}
	}

	return SDValue();
	}

	/// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit
	/// the specified operations to build the shuffle.
	static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS,
	SDValue RHS, SelectionDAG &DAG,
	const SDLoc &dl) {
	unsigned OpNum = (PFEntry >> 26) & 0x0F;
	unsigned LHSID = (PFEntry >> 13) & ((1 << 13)-1);
	unsigned RHSID = (PFEntry >> 0) & ((1 << 13)-1);

	enum {
	OP_COPY = 0, // Copy, used for things like <u,u,u,3> to say it is <0,1,2,3>
	OP_VMRGHW,
	OP_VMRGLW,
	OP_VSPLTISW0,
	OP_VSPLTISW1,
	OP_VSPLTISW2,
	OP_VSPLTISW3,
	OP_VSLDOI4,
	OP_VSLDOI8,
	OP_VSLDOI12
	};

	if (OpNum == OP_COPY) {
	if (LHSID == (19+2)9+3) return LHS;
	assert(LHSID == ((49+5)9+6)*9+7 && "Illegal OP_COPY!");
	return RHS;
	}

	SDValue OpLHS, OpRHS;
	OpLHS = GeneratePerfectShuffle(PerfectShuffleTable[LHSID], LHS, RHS, DAG, dl);
	OpRHS = GeneratePerfectShuffle(PerfectShuffleTable[RHSID], LHS, RHS, DAG, dl);

	int ShufIdxs[16];
	switch (OpNum) {
	default: llvm_unreachable("Unknown i32 permute!");
	case OP_VMRGHW:
	ShufIdxs[ 0] = 0; ShufIdxs[ 1] = 1; ShufIdxs[ 2] = 2; ShufIdxs[ 3] = 3;
	ShufIdxs[ 4] = 16; ShufIdxs[ 5] = 17; ShufIdxs[ 6] = 18; ShufIdxs[ 7] = 19;
	ShufIdxs[ 8] = 4; ShufIdxs[ 9] = 5; ShufIdxs[10] = 6; ShufIdxs[11] = 7;
	ShufIdxs[12] = 20; ShufIdxs[13] = 21; ShufIdxs[14] = 22; ShufIdxs[15] = 23;
	break;
	case OP_VMRGLW:
	ShufIdxs[ 0] = 8; ShufIdxs[ 1] = 9; ShufIdxs[ 2] = 10; ShufIdxs[ 3] = 11;
	ShufIdxs[ 4] = 24; ShufIdxs[ 5] = 25; ShufIdxs[ 6] = 26; ShufIdxs[ 7] = 27;
	ShufIdxs[ 8] = 12; ShufIdxs[ 9] = 13; ShufIdxs[10] = 14; ShufIdxs[11] = 15;
	ShufIdxs[12] = 28; ShufIdxs[13] = 29; ShufIdxs[14] = 30; ShufIdxs[15] = 31;
	break;
	case OP_VSPLTISW0:
	for (unsigned i = 0; i != 16; ++i)
	ShufIdxs[i] = (i&3)+0;
	break;
	case OP_VSPLTISW1:
	for (unsigned i = 0; i != 16; ++i)
	ShufIdxs[i] = (i&3)+4;
	break;
	case OP_VSPLTISW2:
	for (unsigned i = 0; i != 16; ++i)
	ShufIdxs[i] = (i&3)+8;
	break;
	case OP_VSPLTISW3:
	for (unsigned i = 0; i != 16; ++i)
	ShufIdxs[i] = (i&3)+12;
	break;
	case OP_VSLDOI4:
	return BuildVSLDOI(OpLHS, OpRHS, 4, OpLHS.getValueType(), DAG, dl);
	case OP_VSLDOI8:
	return BuildVSLDOI(OpLHS, OpRHS, 8, OpLHS.getValueType(), DAG, dl);
	case OP_VSLDOI12:
	return BuildVSLDOI(OpLHS, OpRHS, 12, OpLHS.getValueType(), DAG, dl);
	}
	EVT VT = OpLHS.getValueType();
	OpLHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OpLHS);
	OpRHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OpRHS);
	SDValue T = DAG.getVectorShuffle(MVT::v16i8, dl, OpLHS, OpRHS, ShufIdxs);
	return DAG.getNode(ISD::BITCAST, dl, VT, T);
	}

	/// lowerToVINSERTB - Return the SDValue if this VECTOR_SHUFFLE can be handled
	/// by the VINSERTB instruction introduced in ISA 3.0, else just return default
	/// SDValue.
	SDValue PPCTargetLowering::lowerToVINSERTB(ShuffleVectorSDNode *N,
	SelectionDAG &DAG) const {
	const unsigned BytesInVector = 16;
	bool IsLE = Subtarget.isLittleEndian();
	SDLoc dl(N);
	SDValue V1 = N->getOperand(0);
	SDValue V2 = N->getOperand(1);
	unsigned ShiftElts = 0, InsertAtByte = 0;
	bool Swap = false;

	// Shifts required to get the byte we want at element 7.
	unsigned LittleEndianShifts[] = {8, 7, 6, 5, 4, 3, 2, 1,
	0, 15, 14, 13, 12, 11, 10, 9};
	unsigned BigEndianShifts[] = {9, 10, 11, 12, 13, 14, 15, 0,
	1, 2, 3, 4, 5, 6, 7, 8};

	ArrayRef<int> Mask = N->getMask();
	int OriginalOrder[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};

	// For each mask element, find out if we're just inserting something
	// from V2 into V1 or vice versa.
	// Possible permutations inserting an element from V2 into V1:
	// X, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
	// 0, X, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
	// ...
	// 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, X
	// Inserting from V1 into V2 will be similar, except mask range will be
	// [16,31].

	bool FoundCandidate = false;
	// If both vector operands for the shuffle are the same vector, the mask
	// will contain only elements from the first one and the second one will be
	// undef.
	unsigned VINSERTBSrcElem = IsLE ? 8 : 7;
	// Go through the mask of half-words to find an element that's being moved
	// from one vector to the other.
	for (unsigned i = 0; i < BytesInVector; ++i) {
	unsigned CurrentElement = Mask[i];
	// If 2nd operand is undefined, we should only look for element 7 in the
	// Mask.
	if (V2.isUndef() && CurrentElement != VINSERTBSrcElem)
	continue;

	bool OtherElementsInOrder = true;
	// Examine the other elements in the Mask to see if they're in original
	// order.
	for (unsigned j = 0; j < BytesInVector; ++j) {
	if (j == i)
	continue;
	// If CurrentElement is from V1 [0,15], then we the rest of the Mask to be
	// from V2 [16,31] and vice versa. Unless the 2nd operand is undefined,
	// in which we always assume we're always picking from the 1st operand.
	int MaskOffset =
	(!V2.isUndef() && CurrentElement < BytesInVector) ? BytesInVector : 0;
	if (Mask[j] != OriginalOrder[j] + MaskOffset) {
	OtherElementsInOrder = false;
	break;
	}
	}
	// If other elements are in original order, we record the number of shifts
	// we need to get the element we want into element 7. Also record which byte
	// in the vector we should insert into.
	if (OtherElementsInOrder) {
	// If 2nd operand is undefined, we assume no shifts and no swapping.
	if (V2.isUndef()) {
	ShiftElts = 0;
	Swap = false;
	} else {
	// Only need the last 4-bits for shifts because operands will be swapped if CurrentElement is >= 2^4.
	ShiftElts = IsLE ? LittleEndianShifts[CurrentElement & 0xF]
	: BigEndianShifts[CurrentElement & 0xF];
	Swap = CurrentElement < BytesInVector;
	}
	InsertAtByte = IsLE ? BytesInVector - (i + 1) : i;
	FoundCandidate = true;
	break;
	}
	}

	if (!FoundCandidate)
	return SDValue();

	// Candidate found, construct the proper SDAG sequence with VINSERTB,
	// optionally with VECSHL if shift is required.
	if (Swap)
	std::swap(V1, V2);
	if (V2.isUndef())
	V2 = V1;
	if (ShiftElts) {
	SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v16i8, V2, V2,
	DAG.getConstant(ShiftElts, dl, MVT::i32));
	return DAG.getNode(PPCISD::VECINSERT, dl, MVT::v16i8, V1, Shl,
	DAG.getConstant(InsertAtByte, dl, MVT::i32));
	}
	return DAG.getNode(PPCISD::VECINSERT, dl, MVT::v16i8, V1, V2,
	DAG.getConstant(InsertAtByte, dl, MVT::i32));
	}

	/// lowerToVINSERTH - Return the SDValue if this VECTOR_SHUFFLE can be handled
	/// by the VINSERTH instruction introduced in ISA 3.0, else just return default
	/// SDValue.
	SDValue PPCTargetLowering::lowerToVINSERTH(ShuffleVectorSDNode *N,
	SelectionDAG &DAG) const {
	const unsigned NumHalfWords = 8;
	const unsigned BytesInVector = NumHalfWords * 2;
	// Check that the shuffle is on half-words.
	if (!isNByteElemShuffleMask(N, 2, 1))
	return SDValue();

	bool IsLE = Subtarget.isLittleEndian();
	SDLoc dl(N);
	SDValue V1 = N->getOperand(0);
	SDValue V2 = N->getOperand(1);
	unsigned ShiftElts = 0, InsertAtByte = 0;
	bool Swap = false;

	// Shifts required to get the half-word we want at element 3.
	unsigned LittleEndianShifts[] = {4, 3, 2, 1, 0, 7, 6, 5};
	unsigned BigEndianShifts[] = {5, 6, 7, 0, 1, 2, 3, 4};

	uint32_t Mask = 0;
	uint32_t OriginalOrderLow = 0x1234567;
	uint32_t OriginalOrderHigh = 0x89ABCDEF;
	// Now we look at mask elements 0,2,4,6,8,10,12,14. Pack the mask into a
	// 32-bit space, only need 4-bit nibbles per element.
	for (unsigned i = 0; i < NumHalfWords; ++i) {
	unsigned MaskShift = (NumHalfWords - 1 - i) * 4;
	Mask \|= ((uint32_t)(N->getMaskElt(i * 2) / 2) << MaskShift);
	}

	// For each mask element, find out if we're just inserting something
	// from V2 into V1 or vice versa. Possible permutations inserting an element
	// from V2 into V1:
	// X, 1, 2, 3, 4, 5, 6, 7
	// 0, X, 2, 3, 4, 5, 6, 7
	// 0, 1, X, 3, 4, 5, 6, 7
	// 0, 1, 2, X, 4, 5, 6, 7
	// 0, 1, 2, 3, X, 5, 6, 7
	// 0, 1, 2, 3, 4, X, 6, 7
	// 0, 1, 2, 3, 4, 5, X, 7
	// 0, 1, 2, 3, 4, 5, 6, X
	// Inserting from V1 into V2 will be similar, except mask range will be [8,15].

	bool FoundCandidate = false;
	// Go through the mask of half-words to find an element that's being moved
	// from one vector to the other.
	for (unsigned i = 0; i < NumHalfWords; ++i) {
	unsigned MaskShift = (NumHalfWords - 1 - i) * 4;
	uint32_t MaskOneElt = (Mask >> MaskShift) & 0xF;
	uint32_t MaskOtherElts = ~(0xF << MaskShift);
	uint32_t TargetOrder = 0x0;

	// If both vector operands for the shuffle are the same vector, the mask
	// will contain only elements from the first one and the second one will be
	// undef.
	if (V2.isUndef()) {
	ShiftElts = 0;
	unsigned VINSERTHSrcElem = IsLE ? 4 : 3;
	TargetOrder = OriginalOrderLow;
	Swap = false;
	// Skip if not the correct element or mask of other elements don't equal
	// to our expected order.
	if (MaskOneElt == VINSERTHSrcElem &&
	(Mask & MaskOtherElts) == (TargetOrder & MaskOtherElts)) {
	InsertAtByte = IsLE ? BytesInVector - (i + 1) * 2 : i * 2;
	FoundCandidate = true;
	break;
	}
	} else { // If both operands are defined.
	// Target order is [8,15] if the current mask is between [0,7].
	TargetOrder =
	(MaskOneElt < NumHalfWords) ? OriginalOrderHigh : OriginalOrderLow;
	// Skip if mask of other elements don't equal our expected order.
	if ((Mask & MaskOtherElts) == (TargetOrder & MaskOtherElts)) {
	// We only need the last 3 bits for the number of shifts.
	ShiftElts = IsLE ? LittleEndianShifts[MaskOneElt & 0x7]
	: BigEndianShifts[MaskOneElt & 0x7];
	InsertAtByte = IsLE ? BytesInVector - (i + 1) * 2 : i * 2;
	Swap = MaskOneElt < NumHalfWords;
	FoundCandidate = true;
	break;
	}
	}
	}

	if (!FoundCandidate)
	return SDValue();

	// Candidate found, construct the proper SDAG sequence with VINSERTH,
	// optionally with VECSHL if shift is required.
	if (Swap)
	std::swap(V1, V2);
	if (V2.isUndef())
	V2 = V1;
	SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V1);
	if (ShiftElts) {
	// Double ShiftElts because we're left shifting on v16i8 type.
	SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v16i8, V2, V2,
	DAG.getConstant(2 * ShiftElts, dl, MVT::i32));
	SDValue Conv2 = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, Shl);
	SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v8i16, Conv1, Conv2,
	DAG.getConstant(InsertAtByte, dl, MVT::i32));
	return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
	}
	SDValue Conv2 = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V2);
	SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v8i16, Conv1, Conv2,
	DAG.getConstant(InsertAtByte, dl, MVT::i32));
	return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
	}

	/// lowerToXXSPLTI32DX - Return the SDValue if this VECTOR_SHUFFLE can be
	/// handled by the XXSPLTI32DX instruction introduced in ISA 3.1, otherwise
	/// return the default SDValue.
	SDValue PPCTargetLowering::lowerToXXSPLTI32DX(ShuffleVectorSDNode *SVN,
	SelectionDAG &DAG) const {
	// The LHS and RHS may be bitcasts to v16i8 as we canonicalize shuffles
	// to v16i8. Peek through the bitcasts to get the actual operands.
	SDValue LHS = peekThroughBitcasts(SVN->getOperand(0));
	SDValue RHS = peekThroughBitcasts(SVN->getOperand(1));

	auto ShuffleMask = SVN->getMask();
	SDValue VecShuffle(SVN, 0);
	SDLoc DL(SVN);

	// Check that we have a four byte shuffle.
	if (!isNByteElemShuffleMask(SVN, 4, 1))
	return SDValue();

	// Canonicalize the RHS being a BUILD_VECTOR when lowering to xxsplti32dx.
	if (RHS->getOpcode() != ISD::BUILD_VECTOR) {
	std::swap(LHS, RHS);
	VecShuffle = peekThroughBitcasts(DAG.getCommutedVectorShuffle(*SVN));
	ShuffleVectorSDNode *CommutedSV = dyn_cast<ShuffleVectorSDNode>(VecShuffle);
	if (!CommutedSV)
	return SDValue();
	ShuffleMask = CommutedSV->getMask();
	}

	// Ensure that the RHS is a vector of constants.
	BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(RHS.getNode());
	if (!BVN)
	return SDValue();

	// Check if RHS is a splat of 4-bytes (or smaller).
	APInt APSplatValue, APSplatUndef;
	unsigned SplatBitSize;
	bool HasAnyUndefs;
	if (!BVN->isConstantSplat(APSplatValue, APSplatUndef, SplatBitSize,
	HasAnyUndefs, 0, !Subtarget.isLittleEndian()) \|\|
	SplatBitSize > 32)
	return SDValue();

	// Check that the shuffle mask matches the semantics of XXSPLTI32DX.
	// The instruction splats a constant C into two words of the source vector
	// producing { C, Unchanged, C, Unchanged } or { Unchanged, C, Unchanged, C }.
	// Thus we check that the shuffle mask is the equivalent of
	// <0, [4-7], 2, [4-7]> or <[4-7], 1, [4-7], 3> respectively.
	// Note: the check above of isNByteElemShuffleMask() ensures that the bytes
	// within each word are consecutive, so we only need to check the first byte.
	SDValue Index;
	bool IsLE = Subtarget.isLittleEndian();
	if ((ShuffleMask[0] == 0 && ShuffleMask[8] == 8) &&
	(ShuffleMask[4] % 4 == 0 && ShuffleMask[12] % 4 == 0 &&
	ShuffleMask[4] > 15 && ShuffleMask[12] > 15))
	Index = DAG.getTargetConstant(IsLE ? 0 : 1, DL, MVT::i32);
	else if ((ShuffleMask[4] == 4 && ShuffleMask[12] == 12) &&
	(ShuffleMask[0] % 4 == 0 && ShuffleMask[8] % 4 == 0 &&
	ShuffleMask[0] > 15 && ShuffleMask[8] > 15))
	Index = DAG.getTargetConstant(IsLE ? 1 : 0, DL, MVT::i32);
	else
	return SDValue();

	// If the splat is narrower than 32-bits, we need to get the 32-bit value
	// for XXSPLTI32DX.
	unsigned SplatVal = APSplatValue.getZExtValue();
	for (; SplatBitSize < 32; SplatBitSize <<= 1)
	SplatVal \|= (SplatVal << SplatBitSize);

	SDValue SplatNode = DAG.getNode(
	PPCISD::XXSPLTI32DX, DL, MVT::v2i64, DAG.getBitcast(MVT::v2i64, LHS),
	Index, DAG.getTargetConstant(SplatVal, DL, MVT::i32));
	return DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, SplatNode);
	}

	/// LowerROTL - Custom lowering for ROTL(v1i128) to vector_shuffle(v16i8).
	/// We lower ROTL(v1i128) to vector_shuffle(v16i8) only if shift amount is
	/// a multiple of 8. Otherwise convert it to a scalar rotation(i128)
	/// i.e (or (shl x, C1), (srl x, 128-C1)).
	SDValue PPCTargetLowering::LowerROTL(SDValue Op, SelectionDAG &DAG) const {
	assert(Op.getOpcode() == ISD::ROTL && "Should only be called for ISD::ROTL");
	assert(Op.getValueType() == MVT::v1i128 &&
	"Only set v1i128 as custom, other type shouldn't reach here!");
	SDLoc dl(Op);
	SDValue N0 = peekThroughBitcasts(Op.getOperand(0));
	SDValue N1 = peekThroughBitcasts(Op.getOperand(1));
	unsigned SHLAmt = N1.getConstantOperandVal(0);
	if (SHLAmt % 8 == 0) {
	std::array<int, 16> Mask;
	std::iota(Mask.begin(), Mask.end(), 0);
	std::rotate(Mask.begin(), Mask.begin() + SHLAmt / 8, Mask.end());
	if (SDValue Shuffle =
	DAG.getVectorShuffle(MVT::v16i8, dl, DAG.getBitcast(MVT::v16i8, N0),
	DAG.getUNDEF(MVT::v16i8), Mask))
	return DAG.getNode(ISD::BITCAST, dl, MVT::v1i128, Shuffle);
	}
	SDValue ArgVal = DAG.getBitcast(MVT::i128, N0);
	SDValue SHLOp = DAG.getNode(ISD::SHL, dl, MVT::i128, ArgVal,
	DAG.getConstant(SHLAmt, dl, MVT::i32));
	SDValue SRLOp = DAG.getNode(ISD::SRL, dl, MVT::i128, ArgVal,
	DAG.getConstant(128 - SHLAmt, dl, MVT::i32));
	SDValue OROp = DAG.getNode(ISD::OR, dl, MVT::i128, SHLOp, SRLOp);
	return DAG.getNode(ISD::BITCAST, dl, MVT::v1i128, OROp);
	}

	/// LowerVECTOR_SHUFFLE - Return the code we lower for VECTOR_SHUFFLE. If this
	/// is a shuffle we can handle in a single instruction, return it. Otherwise,
	/// return the code it can be lowered into. Worst case, it can always be
	/// lowered into a vperm.
	SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
	SelectionDAG &DAG) const {
	SDLoc dl(Op);
	SDValue V1 = Op.getOperand(0);
	SDValue V2 = Op.getOperand(1);
	ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);

	// Any nodes that were combined in the target-independent combiner prior
	// to vector legalization will not be sent to the target combine. Try to
	// combine it here.
	if (SDValue NewShuffle = combineVectorShuffle(SVOp, DAG)) {
	if (!isa<ShuffleVectorSDNode>(NewShuffle))
	return NewShuffle;
	Op = NewShuffle;
	SVOp = cast<ShuffleVectorSDNode>(Op);
	V1 = Op.getOperand(0);
	V2 = Op.getOperand(1);
	}
	EVT VT = Op.getValueType();
	bool isLittleEndian = Subtarget.isLittleEndian();

	unsigned ShiftElts, InsertAtByte;
	bool Swap = false;

	// If this is a load-and-splat, we can do that with a single instruction
	// in some cases. However if the load has multiple uses, we don't want to
	// combine it because that will just produce multiple loads.
	bool IsPermutedLoad = false;
	const SDValue *InputLoad = getNormalLoadInput(V1, IsPermutedLoad);
	if (InputLoad && Subtarget.hasVSX() && V2.isUndef() &&
	(PPC::isSplatShuffleMask(SVOp, 4) \|\| PPC::isSplatShuffleMask(SVOp, 8)) &&
	InputLoad->hasOneUse()) {
	bool IsFourByte = PPC::isSplatShuffleMask(SVOp, 4);
	int SplatIdx =
	PPC::getSplatIdxForPPCMnemonics(SVOp, IsFourByte ? 4 : 8, DAG);

	// The splat index for permuted loads will be in the left half of the vector
	// which is strictly wider than the loaded value by 8 bytes. So we need to
	// adjust the splat index to point to the correct address in memory.
	if (IsPermutedLoad) {
	assert((isLittleEndian \|\| IsFourByte) &&
	"Unexpected size for permuted load on big endian target");
	SplatIdx += IsFourByte ? 2 : 1;
	assert((SplatIdx < (IsFourByte ? 4 : 2)) &&
	"Splat of a value outside of the loaded memory");
	}

	LoadSDNode LD = cast<LoadSDNode>(InputLoad);
	// For 4-byte load-and-splat, we need Power9.
	if ((IsFourByte && Subtarget.hasP9Vector()) \|\| !IsFourByte) {
	uint64_t Offset = 0;
	if (IsFourByte)
	Offset = isLittleEndian ? (3 - SplatIdx) * 4 : SplatIdx * 4;
	else
	Offset = isLittleEndian ? (1 - SplatIdx) * 8 : SplatIdx * 8;

	// If the width of the load is the same as the width of the splat,
	// loading with an offset would load the wrong memory.
	if (LD->getValueType(0).getSizeInBits() == (IsFourByte ? 32 : 64))
	Offset = 0;

	SDValue BasePtr = LD->getBasePtr();
	if (Offset != 0)
	BasePtr = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()),
	BasePtr, DAG.getIntPtrConstant(Offset, dl));
	SDValue Ops[] = {
	LD->getChain(), // Chain
	BasePtr, // BasePtr
	DAG.getValueType(Op.getValueType()) // VT
	};
	SDVTList VTL =
	DAG.getVTList(IsFourByte ? MVT::v4i32 : MVT::v2i64, MVT::Other);
	SDValue LdSplt =
	DAG.getMemIntrinsicNode(PPCISD::LD_SPLAT, dl, VTL,
	Ops, LD->getMemoryVT(), LD->getMemOperand());
	DAG.ReplaceAllUsesOfValueWith(InputLoad->getValue(1), LdSplt.getValue(1));
	if (LdSplt.getValueType() != SVOp->getValueType(0))
	LdSplt = DAG.getBitcast(SVOp->getValueType(0), LdSplt);
	return LdSplt;
	}
	}

	// All v2i64 and v2f64 shuffles are legal
	if (VT == MVT::v2i64 \|\| VT == MVT::v2f64)
	return Op;

	if (Subtarget.hasP9Vector() &&
	PPC::isXXINSERTWMask(SVOp, ShiftElts, InsertAtByte, Swap,
	isLittleEndian)) {
	if (V2.isUndef())
	V2 = V1;
	else if (Swap)
	std::swap(V1, V2);
	SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
	SDValue Conv2 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V2);
	if (ShiftElts) {
	SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v4i32, Conv2, Conv2,
	DAG.getConstant(ShiftElts, dl, MVT::i32));
	SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v4i32, Conv1, Shl,
	DAG.getConstant(InsertAtByte, dl, MVT::i32));
	return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
	}
	SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v4i32, Conv1, Conv2,
	DAG.getConstant(InsertAtByte, dl, MVT::i32));
	return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
	}

	if (Subtarget.hasPrefixInstrs() && Subtarget.hasP10Vector()) {
	SDValue SplatInsertNode;
	if ((SplatInsertNode = lowerToXXSPLTI32DX(SVOp, DAG)))
	return SplatInsertNode;
	}

	if (Subtarget.hasP9Altivec()) {
	SDValue NewISDNode;
	if ((NewISDNode = lowerToVINSERTH(SVOp, DAG)))
	return NewISDNode;

	if ((NewISDNode = lowerToVINSERTB(SVOp, DAG)))
	return NewISDNode;
	}

	if (Subtarget.hasVSX() &&
	PPC::isXXSLDWIShuffleMask(SVOp, ShiftElts, Swap, isLittleEndian)) {
	if (Swap)
	std::swap(V1, V2);
	SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
	SDValue Conv2 =
	DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V2.isUndef() ? V1 : V2);

	SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v4i32, Conv1, Conv2,
	DAG.getConstant(ShiftElts, dl, MVT::i32));
	return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Shl);
	}

	if (Subtarget.hasVSX() &&
	PPC::isXXPERMDIShuffleMask(SVOp, ShiftElts, Swap, isLittleEndian)) {
	if (Swap)
	std::swap(V1, V2);
	SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V1);
	SDValue Conv2 =
	DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V2.isUndef() ? V1 : V2);

	SDValue PermDI = DAG.getNode(PPCISD::XXPERMDI, dl, MVT::v2i64, Conv1, Conv2,
	DAG.getConstant(ShiftElts, dl, MVT::i32));
	return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, PermDI);
	}

	if (Subtarget.hasP9Vector()) {
	if (PPC::isXXBRHShuffleMask(SVOp)) {
	SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V1);
	SDValue ReveHWord = DAG.getNode(ISD::BSWAP, dl, MVT::v8i16, Conv);
	return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveHWord);
	} else if (PPC::isXXBRWShuffleMask(SVOp)) {
	SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
	SDValue ReveWord = DAG.getNode(ISD::BSWAP, dl, MVT::v4i32, Conv);
	return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveWord);
	} else if (PPC::isXXBRDShuffleMask(SVOp)) {
	SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V1);
	SDValue ReveDWord = DAG.getNode(ISD::BSWAP, dl, MVT::v2i64, Conv);
	return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveDWord);
	} else if (PPC::isXXBRQShuffleMask(SVOp)) {
	SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v1i128, V1);
	SDValue ReveQWord = DAG.getNode(ISD::BSWAP, dl, MVT::v1i128, Conv);
	return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveQWord);
	}
	}

	if (Subtarget.hasVSX()) {
	if (V2.isUndef() && PPC::isSplatShuffleMask(SVOp, 4)) {
	int SplatIdx = PPC::getSplatIdxForPPCMnemonics(SVOp, 4, DAG);

	SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
	SDValue Splat = DAG.getNode(PPCISD::XXSPLT, dl, MVT::v4i32, Conv,
	DAG.getConstant(SplatIdx, dl, MVT::i32));
	return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Splat);
	}

	// Left shifts of 8 bytes are actually swaps. Convert accordingly.
	if (V2.isUndef() && PPC::isVSLDOIShuffleMask(SVOp, 1, DAG) == 8) {
	SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, V1);
	SDValue Swap = DAG.getNode(PPCISD::SWAP_NO_CHAIN, dl, MVT::v2f64, Conv);
	return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Swap);
	}
	}

	// Cases that are handled by instructions that take permute immediates
	// (such as vsplt*) should be left as VECTOR_SHUFFLE nodes so they can be
	// selected by the instruction selector.
	if (V2.isUndef()) {
	if (PPC::isSplatShuffleMask(SVOp, 1) \|\|
	PPC::isSplatShuffleMask(SVOp, 2) \|\|
	PPC::isSplatShuffleMask(SVOp, 4) \|\|
	PPC::isVPKUWUMShuffleMask(SVOp, 1, DAG) \|\|
	PPC::isVPKUHUMShuffleMask(SVOp, 1, DAG) \|\|
	PPC::isVSLDOIShuffleMask(SVOp, 1, DAG) != -1 \|\|
	PPC::isVMRGLShuffleMask(SVOp, 1, 1, DAG) \|\|
	PPC::isVMRGLShuffleMask(SVOp, 2, 1, DAG) \|\|
	PPC::isVMRGLShuffleMask(SVOp, 4, 1, DAG) \|\|
	PPC::isVMRGHShuffleMask(SVOp, 1, 1, DAG) \|\|
	PPC::isVMRGHShuffleMask(SVOp, 2, 1, DAG) \|\|
	PPC::isVMRGHShuffleMask(SVOp, 4, 1, DAG) \|\|
	(Subtarget.hasP8Altivec() && (
	PPC::isVPKUDUMShuffleMask(SVOp, 1, DAG) \|\|
	PPC::isVMRGEOShuffleMask(SVOp, true, 1, DAG) \|\|
	PPC::isVMRGEOShuffleMask(SVOp, false, 1, DAG)))) {
	return Op;
	}
	}

	// Altivec has a variety of "shuffle immediates" that take two vector inputs
	// and produce a fixed permutation. If any of these match, do not lower to
	// VPERM.
	unsigned int ShuffleKind = isLittleEndian ? 2 : 0;
	if (PPC::isVPKUWUMShuffleMask(SVOp, ShuffleKind, DAG) \|\|
	PPC::isVPKUHUMShuffleMask(SVOp, ShuffleKind, DAG) \|\|
	PPC::isVSLDOIShuffleMask(SVOp, ShuffleKind, DAG) != -1 \|\|
	PPC::isVMRGLShuffleMask(SVOp, 1, ShuffleKind, DAG) \|\|
	PPC::isVMRGLShuffleMask(SVOp, 2, ShuffleKind, DAG) \|\|
	PPC::isVMRGLShuffleMask(SVOp, 4, ShuffleKind, DAG) \|\|
	PPC::isVMRGHShuffleMask(SVOp, 1, ShuffleKind, DAG) \|\|
	PPC::isVMRGHShuffleMask(SVOp, 2, ShuffleKind, DAG) \|\|
	PPC::isVMRGHShuffleMask(SVOp, 4, ShuffleKind, DAG) \|\|
	(Subtarget.hasP8Altivec() && (
	PPC::isVPKUDUMShuffleMask(SVOp, ShuffleKind, DAG) \|\|
	PPC::isVMRGEOShuffleMask(SVOp, true, ShuffleKind, DAG) \|\|
	PPC::isVMRGEOShuffleMask(SVOp, false, ShuffleKind, DAG))))
	return Op;

	// Check to see if this is a shuffle of 4-byte values. If so, we can use our
	// perfect shuffle table to emit an optimal matching sequence.
	ArrayRef<int> PermMask = SVOp->getMask();

	if (!DisablePerfectShuffle && !isLittleEndian) {
	unsigned PFIndexes[4];
	bool isFourElementShuffle = true;
	for (unsigned i = 0; i != 4 && isFourElementShuffle;
	++i) { // Element number
	unsigned EltNo = 8; // Start out undef.
	for (unsigned j = 0; j != 4; ++j) { // Intra-element byte.
	if (PermMask[i * 4 + j] < 0)
	continue; // Undef, ignore it.

	unsigned ByteSource = PermMask[i * 4 + j];
	if ((ByteSource & 3) != j) {
	isFourElementShuffle = false;
	break;
	}

	if (EltNo == 8) {
	EltNo = ByteSource / 4;
	} else if (EltNo != ByteSource / 4) {
	isFourElementShuffle = false;
	break;
	}
	}
	PFIndexes[i] = EltNo;
	}

	// If this shuffle can be expressed as a shuffle of 4-byte elements, use the
	// perfect shuffle vector to determine if it is cost effective to do this as
	// discrete instructions, or whether we should use a vperm.
	// For now, we skip this for little endian until such time as we have a
	// little-endian perfect shuffle table.
	if (isFourElementShuffle) {
	// Compute the index in the perfect shuffle table.
	unsigned PFTableIndex = PFIndexes[0] * 9 * 9 * 9 + PFIndexes[1] * 9 * 9 +
	PFIndexes[2] * 9 + PFIndexes[3];

	unsigned PFEntry = PerfectShuffleTable[PFTableIndex];
	unsigned Cost = (PFEntry >> 30);

	// Determining when to avoid vperm is tricky. Many things affect the cost
	// of vperm, particularly how many times the perm mask needs to be
	// computed. For example, if the perm mask can be hoisted out of a loop or
	// is already used (perhaps because there are multiple permutes with the
	// same shuffle mask?) the vperm has a cost of 1. OTOH, hoisting the
	// permute mask out of the loop requires an extra register.
	//
	// As a compromise, we only emit discrete instructions if the shuffle can
	// be generated in 3 or fewer operations. When we have loop information
	// available, if this block is within a loop, we should avoid using vperm
	// for 3-operation perms and use a constant pool load instead.
	if (Cost < 3)
	return GeneratePerfectShuffle(PFEntry, V1, V2, DAG, dl);
	}
	}

	// Lower this to a VPERM(V1, V2, V3) expression, where V3 is a constant
	// vector that will get spilled to the constant pool.
	if (V2.isUndef()) V2 = V1;

	return LowerVPERM(Op, DAG, PermMask, VT, V1, V2);
	}

	SDValue PPCTargetLowering::LowerVPERM(SDValue Op, SelectionDAG &DAG,
	ArrayRef<int> PermMask, EVT VT,
	SDValue V1, SDValue V2) const {
	unsigned Opcode = PPCISD::VPERM;
	EVT ValType = V1.getValueType();
	SDLoc dl(Op);
	bool NeedSwap = false;
	bool isLittleEndian = Subtarget.isLittleEndian();
	bool isPPC64 = Subtarget.isPPC64();

	if (Subtarget.hasVSX() && Subtarget.hasP9Vector() &&
	(V1->hasOneUse() \|\| V2->hasOneUse())) {
	LLVM_DEBUG(dbgs() << "At least one of two input vectors are dead - using "
	"XXPERM instead\n");
	Opcode = PPCISD::XXPERM;

	// The second input to XXPERM is also an output so if the second input has
	// multiple uses then copying is necessary, as a result we want the
	// single-use operand to be used as the second input to prevent copying.
	if ((!isLittleEndian && !V2->hasOneUse() && V1->hasOneUse()) \|\|
	(isLittleEndian && !V1->hasOneUse() && V2->hasOneUse())) {
	std::swap(V1, V2);
	NeedSwap = !NeedSwap;
	}
	}

	// The SHUFFLE_VECTOR mask is almost exactly what we want for vperm, except
	// that it is in input element units, not in bytes. Convert now.

	// For little endian, the order of the input vectors is reversed, and
	// the permutation mask is complemented with respect to 31. This is
	// necessary to produce proper semantics with the big-endian-based vperm
	// instruction.
	EVT EltVT = V1.getValueType().getVectorElementType();
	unsigned BytesPerElement = EltVT.getSizeInBits() / 8;

	bool V1HasXXSWAPD = V1->getOperand(0)->getOpcode() == PPCISD::XXSWAPD;
	bool V2HasXXSWAPD = V2->getOperand(0)->getOpcode() == PPCISD::XXSWAPD;

	/*
	Vectors will be appended like so: [ V1 \| v2 ]
	XXSWAPD on V1:
	[ A \| B \| C \| D ] -> [ C \| D \| A \| B ]
	0-3 4-7 8-11 12-15 0-3 4-7 8-11 12-15
	i.e. index of A, B += 8, and index of C, D -= 8.
	XXSWAPD on V2:
	[ E \| F \| G \| H ] -> [ G \| H \| E \| F ]
	16-19 20-23 24-27 28-31 16-19 20-23 24-27 28-31
	i.e. index of E, F += 8, index of G, H -= 8
	Swap V1 and V2:
	[ V1 \| V2 ] -> [ V2 \| V1 ]
	0-15 16-31 0-15 16-31
	i.e. index of V1 += 16, index of V2 -= 16
	*/

	SmallVector<SDValue, 16> ResultMask;
	for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i) {
	unsigned SrcElt = PermMask[i] < 0 ? 0 : PermMask[i];

	if (V1HasXXSWAPD) {
	if (SrcElt < 8)
	SrcElt += 8;
	else if (SrcElt < 16)
	SrcElt -= 8;
	}
	if (V2HasXXSWAPD) {
	if (SrcElt > 23)
	SrcElt -= 8;
	else if (SrcElt > 15)
	SrcElt += 8;
	}
	if (NeedSwap) {
	if (SrcElt < 16)
	SrcElt += 16;
	else
	SrcElt -= 16;
	}
	for (unsigned j = 0; j != BytesPerElement; ++j)
	if (isLittleEndian)
	ResultMask.push_back(
	DAG.getConstant(31 - (SrcElt * BytesPerElement + j), dl, MVT::i32));
	else
	ResultMask.push_back(
	DAG.getConstant(SrcElt * BytesPerElement + j, dl, MVT::i32));
	}

	if (V1HasXXSWAPD) {
	dl = SDLoc(V1->getOperand(0));
	V1 = V1->getOperand(0)->getOperand(1);
	}
	if (V2HasXXSWAPD) {
	dl = SDLoc(V2->getOperand(0));
	V2 = V2->getOperand(0)->getOperand(1);
	}

	if (isPPC64 && (V1HasXXSWAPD \|\| V2HasXXSWAPD)) {
	if (ValType != MVT::v2f64)
	V1 = DAG.getBitcast(MVT::v2f64, V1);
	if (V2.getValueType() != MVT::v2f64)
	V2 = DAG.getBitcast(MVT::v2f64, V2);
	}

	ShufflesHandledWithVPERM++;
	SDValue VPermMask = DAG.getBuildVector(MVT::v16i8, dl, ResultMask);
	LLVM_DEBUG({
	ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
	if (Opcode == PPCISD::XXPERM) {
	dbgs() << "Emitting a XXPERM for the following shuffle:\n";
	} else {
	dbgs() << "Emitting a VPERM for the following shuffle:\n";
	}
	SVOp->dump();
	dbgs() << "With the following permute control vector:\n";
	VPermMask.dump();
	});

	if (Opcode == PPCISD::XXPERM)
	VPermMask = DAG.getBitcast(MVT::v4i32, VPermMask);

	// Only need to place items backwards in LE,
	// the mask was properly calculated.
	if (isLittleEndian)
	std::swap(V1, V2);

	SDValue VPERMNode =
	DAG.getNode(Opcode, dl, V1.getValueType(), V1, V2, VPermMask);

	VPERMNode = DAG.getBitcast(ValType, VPERMNode);
	return VPERMNode;
	}

	/// getVectorCompareInfo - Given an intrinsic, return false if it is not a
	/// vector comparison. If it is, return true and fill in Opc/isDot with
	/// information about the intrinsic.
	static bool getVectorCompareInfo(SDValue Intrin, int &CompareOpc,
	bool &isDot, const PPCSubtarget &Subtarget) {
	unsigned IntrinsicID = Intrin.getConstantOperandVal(0);
	CompareOpc = -1;
	isDot = false;
	switch (IntrinsicID) {
	default:
	return false;
	// Comparison predicates.
	case Intrinsic::ppc_altivec_vcmpbfp_p:
	CompareOpc = 966;
	isDot = true;
	break;
	case Intrinsic::ppc_altivec_vcmpeqfp_p:
	CompareOpc = 198;
	isDot = true;
	break;
	case Intrinsic::ppc_altivec_vcmpequb_p:
	CompareOpc = 6;
	isDot = true;
	break;
	case Intrinsic::ppc_altivec_vcmpequh_p:
	CompareOpc = 70;
	isDot = true;
	break;
	case Intrinsic::ppc_altivec_vcmpequw_p:
	CompareOpc = 134;
	isDot = true;
	break;
	case Intrinsic::ppc_altivec_vcmpequd_p:
	if (Subtarget.hasVSX() \|\| Subtarget.hasP8Altivec()) {
	CompareOpc = 199;
	isDot = true;
	} else
	return false;
	break;
	case Intrinsic::ppc_altivec_vcmpneb_p:
	case Intrinsic::ppc_altivec_vcmpneh_p:
	case Intrinsic::ppc_altivec_vcmpnew_p:
	case Intrinsic::ppc_altivec_vcmpnezb_p:
	case Intrinsic::ppc_altivec_vcmpnezh_p:
	case Intrinsic::ppc_altivec_vcmpnezw_p:
	if (Subtarget.hasP9Altivec()) {
	switch (IntrinsicID) {
	default:
	llvm_unreachable("Unknown comparison intrinsic.");
	case Intrinsic::ppc_altivec_vcmpneb_p:
	CompareOpc = 7;
	break;
	case Intrinsic::ppc_altivec_vcmpneh_p:
	CompareOpc = 71;
	break;
	case Intrinsic::ppc_altivec_vcmpnew_p:
	CompareOpc = 135;
	break;
	case Intrinsic::ppc_altivec_vcmpnezb_p:
	CompareOpc = 263;
	break;
	case Intrinsic::ppc_altivec_vcmpnezh_p:
	CompareOpc = 327;
	break;
	case Intrinsic::ppc_altivec_vcmpnezw_p:
	CompareOpc = 391;
	break;
	}
	isDot = true;
	} else
	return false;
	break;
	case Intrinsic::ppc_altivec_vcmpgefp_p:
	CompareOpc = 454;
	isDot = true;
	break;
	case Intrinsic::ppc_altivec_vcmpgtfp_p:
	CompareOpc = 710;
	isDot = true;
	break;
	case Intrinsic::ppc_altivec_vcmpgtsb_p:
	CompareOpc = 774;
	isDot = true;
	break;
	case Intrinsic::ppc_altivec_vcmpgtsh_p:
	CompareOpc = 838;
	isDot = true;
	break;
	case Intrinsic::ppc_altivec_vcmpgtsw_p:
	CompareOpc = 902;
	isDot = true;
	break;
	case Intrinsic::ppc_altivec_vcmpgtsd_p:
	if (Subtarget.hasVSX() \|\| Subtarget.hasP8Altivec()) {
	CompareOpc = 967;
	isDot = true;
	} else
	return false;
	break;
	case Intrinsic::ppc_altivec_vcmpgtub_p:
	CompareOpc = 518;
	isDot = true;
	break;
	case Intrinsic::ppc_altivec_vcmpgtuh_p:
	CompareOpc = 582;
	isDot = true;
	break;
	case Intrinsic::ppc_altivec_vcmpgtuw_p:
	CompareOpc = 646;
	isDot = true;
	break;
	case Intrinsic::ppc_altivec_vcmpgtud_p:
	if (Subtarget.hasVSX() \|\| Subtarget.hasP8Altivec()) {
	CompareOpc = 711;
	isDot = true;
	} else
	return false;
	break;

	case Intrinsic::ppc_altivec_vcmpequq:
	case Intrinsic::ppc_altivec_vcmpgtsq:
	case Intrinsic::ppc_altivec_vcmpgtuq:
	if (!Subtarget.isISA3_1())
	return false;
	switch (IntrinsicID) {
	default:
	llvm_unreachable("Unknown comparison intrinsic.");
	case Intrinsic::ppc_altivec_vcmpequq:
	CompareOpc = 455;
	break;
	case Intrinsic::ppc_altivec_vcmpgtsq:
	CompareOpc = 903;
	break;
	case Intrinsic::ppc_altivec_vcmpgtuq:
	CompareOpc = 647;
	break;
	}
	break;

	// VSX predicate comparisons use the same infrastructure
	case Intrinsic::ppc_vsx_xvcmpeqdp_p:
	case Intrinsic::ppc_vsx_xvcmpgedp_p:
	case Intrinsic::ppc_vsx_xvcmpgtdp_p:
	case Intrinsic::ppc_vsx_xvcmpeqsp_p:
	case Intrinsic::ppc_vsx_xvcmpgesp_p:
	case Intrinsic::ppc_vsx_xvcmpgtsp_p:
	if (Subtarget.hasVSX()) {
	switch (IntrinsicID) {
	case Intrinsic::ppc_vsx_xvcmpeqdp_p:
	CompareOpc = 99;
	break;
	case Intrinsic::ppc_vsx_xvcmpgedp_p:
	CompareOpc = 115;
	break;
	case Intrinsic::ppc_vsx_xvcmpgtdp_p:
	CompareOpc = 107;
	break;
	case Intrinsic::ppc_vsx_xvcmpeqsp_p:
	CompareOpc = 67;
	break;
	case Intrinsic::ppc_vsx_xvcmpgesp_p:
	CompareOpc = 83;
	break;
	case Intrinsic::ppc_vsx_xvcmpgtsp_p:
	CompareOpc = 75;
	break;
	}
	isDot = true;
	} else
	return false;
	break;

	// Normal Comparisons.
	case Intrinsic::ppc_altivec_vcmpbfp:
	CompareOpc = 966;
	break;
	case Intrinsic::ppc_altivec_vcmpeqfp:
	CompareOpc = 198;
	break;
	case Intrinsic::ppc_altivec_vcmpequb:
	CompareOpc = 6;
	break;
	case Intrinsic::ppc_altivec_vcmpequh:
	CompareOpc = 70;
	break;
	case Intrinsic::ppc_altivec_vcmpequw:
	CompareOpc = 134;
	break;
	case Intrinsic::ppc_altivec_vcmpequd:
	if (Subtarget.hasP8Altivec())
	CompareOpc = 199;
	else
	return false;
	break;
	case Intrinsic::ppc_altivec_vcmpneb:
	case Intrinsic::ppc_altivec_vcmpneh:
	case Intrinsic::ppc_altivec_vcmpnew:
	case Intrinsic::ppc_altivec_vcmpnezb:
	case Intrinsic::ppc_altivec_vcmpnezh:
	case Intrinsic::ppc_altivec_vcmpnezw:
	if (Subtarget.hasP9Altivec())
	switch (IntrinsicID) {
	default:
	llvm_unreachable("Unknown comparison intrinsic.");
	case Intrinsic::ppc_altivec_vcmpneb:
	CompareOpc = 7;
	break;
	case Intrinsic::ppc_altivec_vcmpneh:
	CompareOpc = 71;
	break;
	case Intrinsic::ppc_altivec_vcmpnew:
	CompareOpc = 135;
	break;
	case Intrinsic::ppc_altivec_vcmpnezb:
	CompareOpc = 263;
	break;
	case Intrinsic::ppc_altivec_vcmpnezh:
	CompareOpc = 327;
	break;
	case Intrinsic::ppc_altivec_vcmpnezw:
	CompareOpc = 391;
	break;
	}
	else
	return false;
	break;
	case Intrinsic::ppc_altivec_vcmpgefp:
	CompareOpc = 454;
	break;
	case Intrinsic::ppc_altivec_vcmpgtfp:
	CompareOpc = 710;
	break;
	case Intrinsic::ppc_altivec_vcmpgtsb:
	CompareOpc = 774;
	break;
	case Intrinsic::ppc_altivec_vcmpgtsh:
	CompareOpc = 838;
	break;
	case Intrinsic::ppc_altivec_vcmpgtsw:
	CompareOpc = 902;
	break;
	case Intrinsic::ppc_altivec_vcmpgtsd:
	if (Subtarget.hasP8Altivec())
	CompareOpc = 967;
	else
	return false;
	break;
	case Intrinsic::ppc_altivec_vcmpgtub:
	CompareOpc = 518;
	break;
	case Intrinsic::ppc_altivec_vcmpgtuh:
	CompareOpc = 582;
	break;
	case Intrinsic::ppc_altivec_vcmpgtuw:
	CompareOpc = 646;
	break;
	case Intrinsic::ppc_altivec_vcmpgtud:
	if (Subtarget.hasP8Altivec())
	CompareOpc = 711;
	else
	return false;
	break;
	case Intrinsic::ppc_altivec_vcmpequq_p:
	case Intrinsic::ppc_altivec_vcmpgtsq_p:
	case Intrinsic::ppc_altivec_vcmpgtuq_p:
	if (!Subtarget.isISA3_1())
	return false;
	switch (IntrinsicID) {
	default:
	llvm_unreachable("Unknown comparison intrinsic.");
	case Intrinsic::ppc_altivec_vcmpequq_p:
	CompareOpc = 455;
	break;
	case Intrinsic::ppc_altivec_vcmpgtsq_p:
	CompareOpc = 903;
	break;
	case Intrinsic::ppc_altivec_vcmpgtuq_p:
	CompareOpc = 647;
	break;
	}
	isDot = true;
	break;
	}
	return true;
	}

	/// LowerINTRINSIC_WO_CHAIN - If this is an intrinsic that we want to custom
	/// lower, do it, otherwise return null.
	SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
	SelectionDAG &DAG) const {
	unsigned IntrinsicID = Op.getConstantOperandVal(0);

	SDLoc dl(Op);

	switch (IntrinsicID) {
	case Intrinsic::thread_pointer:
	// Reads the thread pointer register, used for __builtin_thread_pointer.
	if (Subtarget.isPPC64())
	return DAG.getRegister(PPC::X13, MVT::i64);
	return DAG.getRegister(PPC::R2, MVT::i32);

	case Intrinsic::ppc_rldimi: {
	assert(Subtarget.isPPC64() && "rldimi is only available in 64-bit!");
	SDValue Src = Op.getOperand(1);
	APInt Mask = Op.getConstantOperandAPInt(4);
	if (Mask.isZero())
	return Op.getOperand(2);
	if (Mask.isAllOnes())
	return DAG.getNode(ISD::ROTL, dl, MVT::i64, Src, Op.getOperand(3));
	uint64_t SH = Op.getConstantOperandVal(3);
	unsigned MB = 0, ME = 0;
	if (!isRunOfOnes64(Mask.getZExtValue(), MB, ME))
	report_fatal_error("invalid rldimi mask!");
	// rldimi requires ME=63-SH, otherwise rotation is needed before rldimi.
	if (ME < 63 - SH) {
	Src = DAG.getNode(ISD::ROTL, dl, MVT::i64, Src,
	DAG.getConstant(ME + SH + 1, dl, MVT::i32));
	} else if (ME > 63 - SH) {
	Src = DAG.getNode(ISD::ROTL, dl, MVT::i64, Src,
	DAG.getConstant(ME + SH - 63, dl, MVT::i32));
	}
	return SDValue(
	DAG.getMachineNode(PPC::RLDIMI, dl, MVT::i64,
	{Op.getOperand(2), Src,
	DAG.getTargetConstant(63 - ME, dl, MVT::i32),
	DAG.getTargetConstant(MB, dl, MVT::i32)}),
	0);
	}

	case Intrinsic::ppc_rlwimi: {
	APInt Mask = Op.getConstantOperandAPInt(4);
	if (Mask.isZero())
	return Op.getOperand(2);
	if (Mask.isAllOnes())
	return DAG.getNode(ISD::ROTL, dl, MVT::i32, Op.getOperand(1),
	Op.getOperand(3));
	unsigned MB = 0, ME = 0;
	if (!isRunOfOnes(Mask.getZExtValue(), MB, ME))
	report_fatal_error("invalid rlwimi mask!");
	return SDValue(DAG.getMachineNode(
	PPC::RLWIMI, dl, MVT::i32,
	{Op.getOperand(2), Op.getOperand(1), Op.getOperand(3),
	DAG.getTargetConstant(MB, dl, MVT::i32),
	DAG.getTargetConstant(ME, dl, MVT::i32)}),
	0);
	}

	case Intrinsic::ppc_rlwnm: {
	if (Op.getConstantOperandVal(3) == 0)
	return DAG.getConstant(0, dl, MVT::i32);
	unsigned MB = 0, ME = 0;
	if (!isRunOfOnes(Op.getConstantOperandVal(3), MB, ME))
	report_fatal_error("invalid rlwnm mask!");
	return SDValue(
	DAG.getMachineNode(PPC::RLWNM, dl, MVT::i32,
	{Op.getOperand(1), Op.getOperand(2),
	DAG.getTargetConstant(MB, dl, MVT::i32),
	DAG.getTargetConstant(ME, dl, MVT::i32)}),
	0);
	}

	case Intrinsic::ppc_mma_disassemble_acc: {
	if (Subtarget.isISAFuture()) {
	EVT ReturnTypes[] = {MVT::v256i1, MVT::v256i1};
	SDValue WideVec =
	SDValue(DAG.getMachineNode(PPC::DMXXEXTFDMR512, dl, ReturnTypes,
	Op.getOperand(1)),
	0);
	SmallVector<SDValue, 4> RetOps;
	SDValue Value = SDValue(WideVec.getNode(), 0);
	SDValue Value2 = SDValue(WideVec.getNode(), 1);

	SDValue Extract;
	Extract = DAG.getNode(
	PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8,
	Subtarget.isLittleEndian() ? Value2 : Value,
	DAG.getConstant(Subtarget.isLittleEndian() ? 1 : 0,
	dl, getPointerTy(DAG.getDataLayout())));
	RetOps.push_back(Extract);
	Extract = DAG.getNode(
	PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8,
	Subtarget.isLittleEndian() ? Value2 : Value,
	DAG.getConstant(Subtarget.isLittleEndian() ? 0 : 1,
	dl, getPointerTy(DAG.getDataLayout())));
	RetOps.push_back(Extract);
	Extract = DAG.getNode(
	PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8,
	Subtarget.isLittleEndian() ? Value : Value2,
	DAG.getConstant(Subtarget.isLittleEndian() ? 1 : 0,
	dl, getPointerTy(DAG.getDataLayout())));
	RetOps.push_back(Extract);
	Extract = DAG.getNode(
	PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8,
	Subtarget.isLittleEndian() ? Value : Value2,
	DAG.getConstant(Subtarget.isLittleEndian() ? 0 : 1,
	dl, getPointerTy(DAG.getDataLayout())));
	RetOps.push_back(Extract);
	return DAG.getMergeValues(RetOps, dl);
	}
	[[fallthrough]];
	}
	case Intrinsic::ppc_vsx_disassemble_pair: {
	int NumVecs = 2;
	SDValue WideVec = Op.getOperand(1);
	if (IntrinsicID == Intrinsic::ppc_mma_disassemble_acc) {
	NumVecs = 4;
	WideVec = DAG.getNode(PPCISD::XXMFACC, dl, MVT::v512i1, WideVec);
	}
	SmallVector<SDValue, 4> RetOps;
	for (int VecNo = 0; VecNo < NumVecs; VecNo++) {
	SDValue Extract = DAG.getNode(
	PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8, WideVec,
	DAG.getConstant(Subtarget.isLittleEndian() ? NumVecs - 1 - VecNo
	: VecNo,
	dl, getPointerTy(DAG.getDataLayout())));
	RetOps.push_back(Extract);
	}
	return DAG.getMergeValues(RetOps, dl);
	}

	case Intrinsic::ppc_mma_xxmfacc:
	case Intrinsic::ppc_mma_xxmtacc: {
	// Allow pre-isa-future subtargets to lower as normal.
	if (!Subtarget.isISAFuture())
	return SDValue();
	// The intrinsics for xxmtacc and xxmfacc take one argument of
	// type v512i1, for future cpu the corresponding wacc instruction
	// dmxx[inst\|extf]dmr512 is always generated for type v512i1, negating
	// the need to produce the xxm[t\|f]acc.
	SDValue WideVec = Op.getOperand(1);
	DAG.ReplaceAllUsesWith(Op, WideVec);
	return SDValue();
	}

	case Intrinsic::ppc_unpack_longdouble: {
	auto *Idx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
	assert(Idx && (Idx->getSExtValue() == 0 \|\| Idx->getSExtValue() == 1) &&
	"Argument of long double unpack must be 0 or 1!");
	return DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::f64, Op.getOperand(1),
	DAG.getConstant(!!(Idx->getSExtValue()), dl,
	Idx->getValueType(0)));
	}

	case Intrinsic::ppc_compare_exp_lt:
	case Intrinsic::ppc_compare_exp_gt:
	case Intrinsic::ppc_compare_exp_eq:
	case Intrinsic::ppc_compare_exp_uo: {
	unsigned Pred;
	switch (IntrinsicID) {
	case Intrinsic::ppc_compare_exp_lt:
	Pred = PPC::PRED_LT;
	break;
	case Intrinsic::ppc_compare_exp_gt:
	Pred = PPC::PRED_GT;
	break;
	case Intrinsic::ppc_compare_exp_eq:
	Pred = PPC::PRED_EQ;
	break;
	case Intrinsic::ppc_compare_exp_uo:
	Pred = PPC::PRED_UN;
	break;
	}
	return SDValue(
	DAG.getMachineNode(
	PPC::SELECT_CC_I4, dl, MVT::i32,
	{SDValue(DAG.getMachineNode(PPC::XSCMPEXPDP, dl, MVT::i32,
	Op.getOperand(1), Op.getOperand(2)),
	0),
	DAG.getConstant(1, dl, MVT::i32), DAG.getConstant(0, dl, MVT::i32),
	DAG.getTargetConstant(Pred, dl, MVT::i32)}),
	0);
	}
	case Intrinsic::ppc_test_data_class: {
	EVT OpVT = Op.getOperand(1).getValueType();
	unsigned CmprOpc = OpVT == MVT::f128 ? PPC::XSTSTDCQP
	: (OpVT == MVT::f64 ? PPC::XSTSTDCDP
	: PPC::XSTSTDCSP);
	return SDValue(
	DAG.getMachineNode(
	PPC::SELECT_CC_I4, dl, MVT::i32,
	{SDValue(DAG.getMachineNode(CmprOpc, dl, MVT::i32, Op.getOperand(2),
	Op.getOperand(1)),
	0),
	DAG.getConstant(1, dl, MVT::i32), DAG.getConstant(0, dl, MVT::i32),
	DAG.getTargetConstant(PPC::PRED_EQ, dl, MVT::i32)}),
	0);
	}
	case Intrinsic::ppc_fnmsub: {
	EVT VT = Op.getOperand(1).getValueType();
	if (!Subtarget.hasVSX() \|\| (!Subtarget.hasFloat128() && VT == MVT::f128))
	return DAG.getNode(
	ISD::FNEG, dl, VT,
	DAG.getNode(ISD::FMA, dl, VT, Op.getOperand(1), Op.getOperand(2),
	DAG.getNode(ISD::FNEG, dl, VT, Op.getOperand(3))));
	return DAG.getNode(PPCISD::FNMSUB, dl, VT, Op.getOperand(1),
	Op.getOperand(2), Op.getOperand(3));
	}
	case Intrinsic::ppc_convert_f128_to_ppcf128:
	case Intrinsic::ppc_convert_ppcf128_to_f128: {
	RTLIB::Libcall LC = IntrinsicID == Intrinsic::ppc_convert_ppcf128_to_f128
	? RTLIB::CONVERT_PPCF128_F128
	: RTLIB::CONVERT_F128_PPCF128;
	MakeLibCallOptions CallOptions;
	std::pair<SDValue, SDValue> Result =
	makeLibCall(DAG, LC, Op.getValueType(), Op.getOperand(1), CallOptions,
	dl, SDValue());
	return Result.first;
	}
	case Intrinsic::ppc_maxfe:
	case Intrinsic::ppc_maxfl:
	case Intrinsic::ppc_maxfs:
	case Intrinsic::ppc_minfe:
	case Intrinsic::ppc_minfl:
	case Intrinsic::ppc_minfs: {
	EVT VT = Op.getValueType();
	assert(
	all_of(Op->ops().drop_front(4),
	[VT](const SDUse &Use) { return Use.getValueType() == VT; }) &&
	"ppc_[max\|min]f[e\|l\|s] must have uniform type arguments");
	(void)VT;
	ISD::CondCode CC = ISD::SETGT;
	if (IntrinsicID == Intrinsic::ppc_minfe \|\|
	IntrinsicID == Intrinsic::ppc_minfl \|\|
	IntrinsicID == Intrinsic::ppc_minfs)
	CC = ISD::SETLT;
	unsigned I = Op.getNumOperands() - 2, Cnt = I;
	SDValue Res = Op.getOperand(I);
	for (--I; Cnt != 0; --Cnt, I = (--I == 0 ? (Op.getNumOperands() - 1) : I)) {
	Res =
	DAG.getSelectCC(dl, Res, Op.getOperand(I), Res, Op.getOperand(I), CC);
	}
	return Res;
	}
	}

	// If this is a lowered altivec predicate compare, CompareOpc is set to the
	// opcode number of the comparison.
	int CompareOpc;
	bool isDot;
	if (!getVectorCompareInfo(Op, CompareOpc, isDot, Subtarget))
	return SDValue(); // Don't custom lower most intrinsics.

	// If this is a non-dot comparison, make the VCMP node and we are done.
	if (!isDot) {
	SDValue Tmp = DAG.getNode(PPCISD::VCMP, dl, Op.getOperand(2).getValueType(),
	Op.getOperand(1), Op.getOperand(2),
	DAG.getConstant(CompareOpc, dl, MVT::i32));
	return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Tmp);
	}

	// Create the PPCISD altivec 'dot' comparison node.
	SDValue Ops[] = {
	Op.getOperand(2), // LHS
	Op.getOperand(3), // RHS
	DAG.getConstant(CompareOpc, dl, MVT::i32)
	};
	EVT VTs[] = { Op.getOperand(2).getValueType(), MVT::Glue };
	SDValue CompNode = DAG.getNode(PPCISD::VCMP_rec, dl, VTs, Ops);

	// Now that we have the comparison, emit a copy from the CR to a GPR.
	// This is flagged to the above dot comparison.
	SDValue Flags = DAG.getNode(PPCISD::MFOCRF, dl, MVT::i32,
	DAG.getRegister(PPC::CR6, MVT::i32),
	CompNode.getValue(1));

	// Unpack the result based on how the target uses it.
	unsigned BitNo; // Bit # of CR6.
	bool InvertBit; // Invert result?
	switch (Op.getConstantOperandVal(1)) {
	default: // Can't happen, don't crash on invalid number though.
	case 0: // Return the value of the EQ bit of CR6.
	BitNo = 0; InvertBit = false;
	break;
	case 1: // Return the inverted value of the EQ bit of CR6.
	BitNo = 0; InvertBit = true;
	break;
	case 2: // Return the value of the LT bit of CR6.
	BitNo = 2; InvertBit = false;
	break;
	case 3: // Return the inverted value of the LT bit of CR6.
	BitNo = 2; InvertBit = true;
	break;
	}

	// Shift the bit into the low position.
	Flags = DAG.getNode(ISD::SRL, dl, MVT::i32, Flags,
	DAG.getConstant(8 - (3 - BitNo), dl, MVT::i32));
	// Isolate the bit.
	Flags = DAG.getNode(ISD::AND, dl, MVT::i32, Flags,
	DAG.getConstant(1, dl, MVT::i32));

	// If we are supposed to, toggle the bit.
	if (InvertBit)
	Flags = DAG.getNode(ISD::XOR, dl, MVT::i32, Flags,
	DAG.getConstant(1, dl, MVT::i32));
	return Flags;
	}

	SDValue PPCTargetLowering::LowerINTRINSIC_VOID(SDValue Op,
	SelectionDAG &DAG) const {
	// SelectionDAGBuilder::visitTargetIntrinsic may insert one extra chain to
	// the beginning of the argument list.
	int ArgStart = isa<ConstantSDNode>(Op.getOperand(0)) ? 0 : 1;
	SDLoc DL(Op);
	switch (Op.getConstantOperandVal(ArgStart)) {
	case Intrinsic::ppc_cfence: {
	assert(ArgStart == 1 && "llvm.ppc.cfence must carry a chain argument.");
	SDValue Val = Op.getOperand(ArgStart + 1);
	EVT Ty = Val.getValueType();
	if (Ty == MVT::i128) {
	// FIXME: Testing one of two paired registers is sufficient to guarantee
	// ordering?
	Val = DAG.getNode(ISD::TRUNCATE, DL, MVT::i64, Val);
	}
	unsigned Opcode = Subtarget.isPPC64() ? PPC::CFENCE8 : PPC::CFENCE;
	EVT FTy = Subtarget.isPPC64() ? MVT::i64 : MVT::i32;
	return SDValue(
	DAG.getMachineNode(Opcode, DL, MVT::Other,
	DAG.getNode(ISD::ANY_EXTEND, DL, FTy, Val),
	Op.getOperand(0)),
	0);
	}
	default:
	break;
	}
	return SDValue();
	}

	// Lower scalar BSWAP64 to xxbrd.
	SDValue PPCTargetLowering::LowerBSWAP(SDValue Op, SelectionDAG &DAG) const {
	SDLoc dl(Op);
	if (!Subtarget.isPPC64())
	return Op;
	// MTVSRDD
	Op = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64, Op.getOperand(0),
	Op.getOperand(0));
	// XXBRD
	Op = DAG.getNode(ISD::BSWAP, dl, MVT::v2i64, Op);
	// MFVSRD
	int VectorIndex = 0;
	if (Subtarget.isLittleEndian())
	VectorIndex = 1;
	Op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i64, Op,
	DAG.getTargetConstant(VectorIndex, dl, MVT::i32));
	return Op;
	}

	// ATOMIC_CMP_SWAP for i8/i16 needs to zero-extend its input since it will be
	// compared to a value that is atomically loaded (atomic loads zero-extend).
	SDValue PPCTargetLowering::LowerATOMIC_CMP_SWAP(SDValue Op,
	SelectionDAG &DAG) const {
	assert(Op.getOpcode() == ISD::ATOMIC_CMP_SWAP &&
	"Expecting an atomic compare-and-swap here.");
	SDLoc dl(Op);
	auto *AtomicNode = cast<AtomicSDNode>(Op.getNode());
	EVT MemVT = AtomicNode->getMemoryVT();
	if (MemVT.getSizeInBits() >= 32)
	return Op;

	SDValue CmpOp = Op.getOperand(2);
	// If this is already correctly zero-extended, leave it alone.
	auto HighBits = APInt::getHighBitsSet(32, 32 - MemVT.getSizeInBits());
	if (DAG.MaskedValueIsZero(CmpOp, HighBits))
	return Op;

	// Clear the high bits of the compare operand.
	unsigned MaskVal = (1 << MemVT.getSizeInBits()) - 1;
	SDValue NewCmpOp =
	DAG.getNode(ISD::AND, dl, MVT::i32, CmpOp,
	DAG.getConstant(MaskVal, dl, MVT::i32));

	// Replace the existing compare operand with the properly zero-extended one.
	SmallVector<SDValue, 4> Ops;
	for (int i = 0, e = AtomicNode->getNumOperands(); i < e; i++)
	Ops.push_back(AtomicNode->getOperand(i));
	Ops[2] = NewCmpOp;
	MachineMemOperand *MMO = AtomicNode->getMemOperand();
	SDVTList Tys = DAG.getVTList(MVT::i32, MVT::Other);
	auto NodeTy =
	(MemVT == MVT::i8) ? PPCISD::ATOMIC_CMP_SWAP_8 : PPCISD::ATOMIC_CMP_SWAP_16;
	return DAG.getMemIntrinsicNode(NodeTy, dl, Tys, Ops, MemVT, MMO);
	}

	SDValue PPCTargetLowering::LowerATOMIC_LOAD_STORE(SDValue Op,
	SelectionDAG &DAG) const {
	AtomicSDNode *N = cast<AtomicSDNode>(Op.getNode());
	EVT MemVT = N->getMemoryVT();
	assert(MemVT.getSimpleVT() == MVT::i128 &&
	"Expect quadword atomic operations");
	SDLoc dl(N);
	unsigned Opc = N->getOpcode();
	switch (Opc) {
	case ISD::ATOMIC_LOAD: {
	// Lower quadword atomic load to int_ppc_atomic_load_i128 which will be
	// lowered to ppc instructions by pattern matching instruction selector.
	SDVTList Tys = DAG.getVTList(MVT::i64, MVT::i64, MVT::Other);
	SmallVector<SDValue, 4> Ops{
	N->getOperand(0),
	DAG.getConstant(Intrinsic::ppc_atomic_load_i128, dl, MVT::i32)};
	for (int I = 1, E = N->getNumOperands(); I < E; ++I)
	Ops.push_back(N->getOperand(I));
	SDValue LoadedVal = DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, dl, Tys,
	Ops, MemVT, N->getMemOperand());
	SDValue ValLo = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i128, LoadedVal);
	SDValue ValHi =
	DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i128, LoadedVal.getValue(1));
	ValHi = DAG.getNode(ISD::SHL, dl, MVT::i128, ValHi,
	DAG.getConstant(64, dl, MVT::i32));
	SDValue Val =
	DAG.getNode(ISD::OR, dl, {MVT::i128, MVT::Other}, {ValLo, ValHi});
	return DAG.getNode(ISD::MERGE_VALUES, dl, {MVT::i128, MVT::Other},
	{Val, LoadedVal.getValue(2)});
	}
	case ISD::ATOMIC_STORE: {
	// Lower quadword atomic store to int_ppc_atomic_store_i128 which will be
	// lowered to ppc instructions by pattern matching instruction selector.
	SDVTList Tys = DAG.getVTList(MVT::Other);
	SmallVector<SDValue, 4> Ops{
	N->getOperand(0),
	DAG.getConstant(Intrinsic::ppc_atomic_store_i128, dl, MVT::i32)};
	SDValue Val = N->getOperand(1);
	SDValue ValLo = DAG.getNode(ISD::TRUNCATE, dl, MVT::i64, Val);
	SDValue ValHi = DAG.getNode(ISD::SRL, dl, MVT::i128, Val,
	DAG.getConstant(64, dl, MVT::i32));
	ValHi = DAG.getNode(ISD::TRUNCATE, dl, MVT::i64, ValHi);
	Ops.push_back(ValLo);
	Ops.push_back(ValHi);
	Ops.push_back(N->getOperand(2));
	return DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID, dl, Tys, Ops, MemVT,
	N->getMemOperand());
	}
	default:
	llvm_unreachable("Unexpected atomic opcode");
	}
	}

	static SDValue getDataClassTest(SDValue Op, FPClassTest Mask, const SDLoc &Dl,
	SelectionDAG &DAG,
	const PPCSubtarget &Subtarget) {
	assert(Mask <= fcAllFlags && "Invalid fp_class flags!");

	enum DataClassMask {
	DC_NAN = 1 << 6,
	DC_NEG_INF = 1 << 4,
	DC_POS_INF = 1 << 5,
	DC_NEG_ZERO = 1 << 2,
	DC_POS_ZERO = 1 << 3,
	DC_NEG_SUBNORM = 1,
	DC_POS_SUBNORM = 1 << 1,
	};

	EVT VT = Op.getValueType();

	unsigned TestOp = VT == MVT::f128 ? PPC::XSTSTDCQP
	: VT == MVT::f64 ? PPC::XSTSTDCDP
	: PPC::XSTSTDCSP;

	if (Mask == fcAllFlags)
	return DAG.getBoolConstant(true, Dl, MVT::i1, VT);
	if (Mask == 0)
	return DAG.getBoolConstant(false, Dl, MVT::i1, VT);

	// When it's cheaper or necessary to test reverse flags.
	if ((Mask & fcNormal) == fcNormal \|\| Mask == ~fcQNan \|\| Mask == ~fcSNan) {
	SDValue Rev = getDataClassTest(Op, ~Mask, Dl, DAG, Subtarget);
	return DAG.getNOT(Dl, Rev, MVT::i1);
	}

	// Power doesn't support testing whether a value is 'normal'. Test the rest
	// first, and test if it's 'not not-normal' with expected sign.
	if (Mask & fcNormal) {
	SDValue Rev(DAG.getMachineNode(
	TestOp, Dl, MVT::i32,
	DAG.getTargetConstant(DC_NAN \| DC_NEG_INF \| DC_POS_INF \|
	DC_NEG_ZERO \| DC_POS_ZERO \|
	DC_NEG_SUBNORM \| DC_POS_SUBNORM,
	Dl, MVT::i32),
	Op),
	0);
	// Sign are stored in CR bit 0, result are in CR bit 2.
	SDValue Sign(
	DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, Dl, MVT::i1, Rev,
	DAG.getTargetConstant(PPC::sub_lt, Dl, MVT::i32)),
	0);
	SDValue Normal(DAG.getNOT(
	Dl,
	SDValue(DAG.getMachineNode(
	TargetOpcode::EXTRACT_SUBREG, Dl, MVT::i1, Rev,
	DAG.getTargetConstant(PPC::sub_eq, Dl, MVT::i32)),
	0),
	MVT::i1));
	if (Mask & fcPosNormal)
	Sign = DAG.getNOT(Dl, Sign, MVT::i1);
	SDValue Result = DAG.getNode(ISD::AND, Dl, MVT::i1, Sign, Normal);
	if (Mask == fcPosNormal \|\| Mask == fcNegNormal)
	return Result;

	return DAG.getNode(
	ISD::OR, Dl, MVT::i1,
	getDataClassTest(Op, Mask & ~fcNormal, Dl, DAG, Subtarget), Result);
	}

	// The instruction doesn't differentiate between signaling or quiet NaN. Test
	// the rest first, and test if it 'is NaN and is signaling/quiet'.
	if ((Mask & fcNan) == fcQNan \|\| (Mask & fcNan) == fcSNan) {
	bool IsQuiet = Mask & fcQNan;
	SDValue NanCheck = getDataClassTest(Op, fcNan, Dl, DAG, Subtarget);

	// Quietness is determined by the first bit in fraction field.
	uint64_t QuietMask = 0;
	SDValue HighWord;
	if (VT == MVT::f128) {
	HighWord = DAG.getNode(
	ISD::EXTRACT_VECTOR_ELT, Dl, MVT::i32, DAG.getBitcast(MVT::v4i32, Op),
	DAG.getVectorIdxConstant(Subtarget.isLittleEndian() ? 3 : 0, Dl));
	QuietMask = 0x8000;
	} else if (VT == MVT::f64) {
	if (Subtarget.isPPC64()) {
	HighWord = DAG.getNode(ISD::EXTRACT_ELEMENT, Dl, MVT::i32,
	DAG.getBitcast(MVT::i64, Op),
	DAG.getConstant(1, Dl, MVT::i32));
	} else {
	SDValue Vec = DAG.getBitcast(
	MVT::v4i32, DAG.getNode(ISD::SCALAR_TO_VECTOR, Dl, MVT::v2f64, Op));
	HighWord = DAG.getNode(
	ISD::EXTRACT_VECTOR_ELT, Dl, MVT::i32, Vec,
	DAG.getVectorIdxConstant(Subtarget.isLittleEndian() ? 1 : 0, Dl));
	}
	QuietMask = 0x80000;
	} else if (VT == MVT::f32) {
	HighWord = DAG.getBitcast(MVT::i32, Op);
	QuietMask = 0x400000;
	}
	SDValue NanRes = DAG.getSetCC(
	Dl, MVT::i1,
	DAG.getNode(ISD::AND, Dl, MVT::i32, HighWord,
	DAG.getConstant(QuietMask, Dl, MVT::i32)),
	DAG.getConstant(0, Dl, MVT::i32), IsQuiet ? ISD::SETNE : ISD::SETEQ);
	NanRes = DAG.getNode(ISD::AND, Dl, MVT::i1, NanCheck, NanRes);
	if (Mask == fcQNan \|\| Mask == fcSNan)
	return NanRes;

	return DAG.getNode(ISD::OR, Dl, MVT::i1,
	getDataClassTest(Op, Mask & ~fcNan, Dl, DAG, Subtarget),
	NanRes);
	}

	unsigned NativeMask = 0;
	if ((Mask & fcNan) == fcNan)
	NativeMask \|= DC_NAN;
	if (Mask & fcNegInf)
	NativeMask \|= DC_NEG_INF;
	if (Mask & fcPosInf)
	NativeMask \|= DC_POS_INF;
	if (Mask & fcNegZero)
	NativeMask \|= DC_NEG_ZERO;
	if (Mask & fcPosZero)
	NativeMask \|= DC_POS_ZERO;
	if (Mask & fcNegSubnormal)
	NativeMask \|= DC_NEG_SUBNORM;
	if (Mask & fcPosSubnormal)
	NativeMask \|= DC_POS_SUBNORM;
	return SDValue(
	DAG.getMachineNode(
	TargetOpcode::EXTRACT_SUBREG, Dl, MVT::i1,
	SDValue(DAG.getMachineNode(
	TestOp, Dl, MVT::i32,
	DAG.getTargetConstant(NativeMask, Dl, MVT::i32), Op),
	0),
	DAG.getTargetConstant(PPC::sub_eq, Dl, MVT::i32)),
	0);
	}

	SDValue PPCTargetLowering::LowerIS_FPCLASS(SDValue Op,
	SelectionDAG &DAG) const {
	assert(Subtarget.hasP9Vector() && "Test data class requires Power9");
	SDValue LHS = Op.getOperand(0);
	uint64_t RHSC = Op.getConstantOperandVal(1);
	SDLoc Dl(Op);
	FPClassTest Category = static_cast<FPClassTest>(RHSC);
	return getDataClassTest(LHS, Category, Dl, DAG, Subtarget);
	}

	SDValue PPCTargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op,
	SelectionDAG &DAG) const {
	SDLoc dl(Op);
	// Create a stack slot that is 16-byte aligned.
	MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
	int FrameIdx = MFI.CreateStackObject(16, Align(16), false);
	EVT PtrVT = getPointerTy(DAG.getDataLayout());
	SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);

	// Store the input value into Value#0 of the stack slot.
	SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0), FIdx,
	MachinePointerInfo());
	// Load it out.
	return DAG.getLoad(Op.getValueType(), dl, Store, FIdx, MachinePointerInfo());
	}

	SDValue PPCTargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
	SelectionDAG &DAG) const {
	assert(Op.getOpcode() == ISD::INSERT_VECTOR_ELT &&
	"Should only be called for ISD::INSERT_VECTOR_ELT");

	ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(2));

	EVT VT = Op.getValueType();
	SDLoc dl(Op);
	SDValue V1 = Op.getOperand(0);
	SDValue V2 = Op.getOperand(1);

	if (VT == MVT::v2f64 && C)
	return Op;

	if (Subtarget.hasP9Vector()) {
	// A f32 load feeding into a v4f32 insert_vector_elt is handled in this way
	// because on P10, it allows this specific insert_vector_elt load pattern to
	// utilize the refactored load and store infrastructure in order to exploit
	// prefixed loads.
	// On targets with inexpensive direct moves (Power9 and up), a
	// (insert_vector_elt v4f32:$vec, (f32 load)) is always better as an integer
	// load since a single precision load will involve conversion to double
	// precision on the load followed by another conversion to single precision.
	if ((VT == MVT::v4f32) && (V2.getValueType() == MVT::f32) &&
	(isa<LoadSDNode>(V2))) {
	SDValue BitcastVector = DAG.getBitcast(MVT::v4i32, V1);
	SDValue BitcastLoad = DAG.getBitcast(MVT::i32, V2);
	SDValue InsVecElt =
	DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v4i32, BitcastVector,
	BitcastLoad, Op.getOperand(2));
	return DAG.getBitcast(MVT::v4f32, InsVecElt);
	}
	}

	if (Subtarget.isISA3_1()) {
	if ((VT == MVT::v2i64 \|\| VT == MVT::v2f64) && !Subtarget.isPPC64())
	return SDValue();
	// On P10, we have legal lowering for constant and variable indices for
	// all vectors.
	if (VT == MVT::v16i8 \|\| VT == MVT::v8i16 \|\| VT == MVT::v4i32 \|\|
	VT == MVT::v2i64 \|\| VT == MVT::v4f32 \|\| VT == MVT::v2f64)
	return Op;
	}

	// Before P10, we have legal lowering for constant indices but not for
	// variable ones.
	if (!C)
	return SDValue();

	// We can use MTVSRZ + VECINSERT for v8i16 and v16i8 types.
	if (VT == MVT::v8i16 \|\| VT == MVT::v16i8) {
	SDValue Mtvsrz = DAG.getNode(PPCISD::MTVSRZ, dl, VT, V2);
	unsigned BytesInEachElement = VT.getVectorElementType().getSizeInBits() / 8;
	unsigned InsertAtElement = C->getZExtValue();
	unsigned InsertAtByte = InsertAtElement * BytesInEachElement;
	if (Subtarget.isLittleEndian()) {
	InsertAtByte = (16 - BytesInEachElement) - InsertAtByte;
	}
	return DAG.getNode(PPCISD::VECINSERT, dl, VT, V1, Mtvsrz,
	DAG.getConstant(InsertAtByte, dl, MVT::i32));
	}
	return Op;
	}

	SDValue PPCTargetLowering::LowerVectorLoad(SDValue Op,
	SelectionDAG &DAG) const {
	SDLoc dl(Op);
	LoadSDNode *LN = cast<LoadSDNode>(Op.getNode());
	SDValue LoadChain = LN->getChain();
	SDValue BasePtr = LN->getBasePtr();
	EVT VT = Op.getValueType();

	if (VT != MVT::v256i1 && VT != MVT::v512i1)
	return Op;

	// Type v256i1 is used for pairs and v512i1 is used for accumulators.
	// Here we create 2 or 4 v16i8 loads to load the pair or accumulator value in
	// 2 or 4 vsx registers.
	assert((VT != MVT::v512i1 \|\| Subtarget.hasMMA()) &&
	"Type unsupported without MMA");
	assert((VT != MVT::v256i1 \|\| Subtarget.pairedVectorMemops()) &&
	"Type unsupported without paired vector support");
	Align Alignment = LN->getAlign();
	SmallVector<SDValue, 4> Loads;
	SmallVector<SDValue, 4> LoadChains;
	unsigned NumVecs = VT.getSizeInBits() / 128;
	for (unsigned Idx = 0; Idx < NumVecs; ++Idx) {
	SDValue Load =
	DAG.getLoad(MVT::v16i8, dl, LoadChain, BasePtr,
	LN->getPointerInfo().getWithOffset(Idx * 16),
	commonAlignment(Alignment, Idx * 16),
	LN->getMemOperand()->getFlags(), LN->getAAInfo());
	BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
	DAG.getConstant(16, dl, BasePtr.getValueType()));
	Loads.push_back(Load);
	LoadChains.push_back(Load.getValue(1));
	}
	if (Subtarget.isLittleEndian()) {
	std::reverse(Loads.begin(), Loads.end());
	std::reverse(LoadChains.begin(), LoadChains.end());
	}
	SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, LoadChains);
	SDValue Value =
	DAG.getNode(VT == MVT::v512i1 ? PPCISD::ACC_BUILD : PPCISD::PAIR_BUILD,
	dl, VT, Loads);
	SDValue RetOps[] = {Value, TF};
	return DAG.getMergeValues(RetOps, dl);
	}

	SDValue PPCTargetLowering::LowerVectorStore(SDValue Op,
	SelectionDAG &DAG) const {
	SDLoc dl(Op);
	StoreSDNode *SN = cast<StoreSDNode>(Op.getNode());
	SDValue StoreChain = SN->getChain();
	SDValue BasePtr = SN->getBasePtr();
	SDValue Value = SN->getValue();
	SDValue Value2 = SN->getValue();
	EVT StoreVT = Value.getValueType();

	if (StoreVT != MVT::v256i1 && StoreVT != MVT::v512i1)
	return Op;

	// Type v256i1 is used for pairs and v512i1 is used for accumulators.
	// Here we create 2 or 4 v16i8 stores to store the pair or accumulator
	// underlying registers individually.
	assert((StoreVT != MVT::v512i1 \|\| Subtarget.hasMMA()) &&
	"Type unsupported without MMA");
	assert((StoreVT != MVT::v256i1 \|\| Subtarget.pairedVectorMemops()) &&
	"Type unsupported without paired vector support");
	Align Alignment = SN->getAlign();
	SmallVector<SDValue, 4> Stores;
	unsigned NumVecs = 2;
	if (StoreVT == MVT::v512i1) {
	if (Subtarget.isISAFuture()) {
	EVT ReturnTypes[] = {MVT::v256i1, MVT::v256i1};
	MachineSDNode *ExtNode = DAG.getMachineNode(
	PPC::DMXXEXTFDMR512, dl, ReturnTypes, Op.getOperand(1));

	Value = SDValue(ExtNode, 0);
	Value2 = SDValue(ExtNode, 1);
	} else
	Value = DAG.getNode(PPCISD::XXMFACC, dl, MVT::v512i1, Value);
	NumVecs = 4;
	}
	for (unsigned Idx = 0; Idx < NumVecs; ++Idx) {
	unsigned VecNum = Subtarget.isLittleEndian() ? NumVecs - 1 - Idx : Idx;
	SDValue Elt;
	if (Subtarget.isISAFuture()) {
	VecNum = Subtarget.isLittleEndian() ? 1 - (Idx % 2) : (Idx % 2);
	Elt = DAG.getNode(PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8,
	Idx > 1 ? Value2 : Value,
	DAG.getConstant(VecNum, dl, getPointerTy(DAG.getDataLayout())));
	} else
	Elt = DAG.getNode(PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8, Value,
	DAG.getConstant(VecNum, dl, getPointerTy(DAG.getDataLayout())));

	SDValue Store =
	DAG.getStore(StoreChain, dl, Elt, BasePtr,
	SN->getPointerInfo().getWithOffset(Idx * 16),
	commonAlignment(Alignment, Idx * 16),
	SN->getMemOperand()->getFlags(), SN->getAAInfo());
	BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
	DAG.getConstant(16, dl, BasePtr.getValueType()));
	Stores.push_back(Store);
	}
	SDValue TF = DAG.getTokenFactor(dl, Stores);
	return TF;
	}

	SDValue PPCTargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const {
	SDLoc dl(Op);
	if (Op.getValueType() == MVT::v4i32) {
	SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);

	SDValue Zero = getCanonicalConstSplat(0, 1, MVT::v4i32, DAG, dl);
	// +16 as shift amt.
	SDValue Neg16 = getCanonicalConstSplat(-16, 4, MVT::v4i32, DAG, dl);
	SDValue RHSSwap = // = vrlw RHS, 16
	BuildIntrinsicOp(Intrinsic::ppc_altivec_vrlw, RHS, Neg16, DAG, dl);

	// Shrinkify inputs to v8i16.
	LHS = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, LHS);
	RHS = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, RHS);
	RHSSwap = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, RHSSwap);

	// Low parts multiplied together, generating 32-bit results (we ignore the
	// top parts).
	SDValue LoProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmulouh,
	LHS, RHS, DAG, dl, MVT::v4i32);

	SDValue HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmsumuhm,
	LHS, RHSSwap, Zero, DAG, dl, MVT::v4i32);
	// Shift the high parts up 16 bits.
	HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, HiProd,
	Neg16, DAG, dl);
	return DAG.getNode(ISD::ADD, dl, MVT::v4i32, LoProd, HiProd);
	} else if (Op.getValueType() == MVT::v16i8) {
	SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
	bool isLittleEndian = Subtarget.isLittleEndian();

	// Multiply the even 8-bit parts, producing 16-bit sums.
	SDValue EvenParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuleub,
	LHS, RHS, DAG, dl, MVT::v8i16);
	EvenParts = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, EvenParts);

	// Multiply the odd 8-bit parts, producing 16-bit sums.
	SDValue OddParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuloub,
	LHS, RHS, DAG, dl, MVT::v8i16);
	OddParts = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OddParts);

	// Merge the results together. Because vmuleub and vmuloub are
	// instructions with a big-endian bias, we must reverse the
	// element numbering and reverse the meaning of "odd" and "even"
	// when generating little endian code.
	int Ops[16];
	for (unsigned i = 0; i != 8; ++i) {
	if (isLittleEndian) {
	Ops[i2 ] = 2i;
	Ops[i2+1] = 2i+16;
	} else {
	Ops[i2 ] = 2i+1;
	Ops[i2+1] = 2i+1+16;
	}
	}
	if (isLittleEndian)
	return DAG.getVectorShuffle(MVT::v16i8, dl, OddParts, EvenParts, Ops);
	else
	return DAG.getVectorShuffle(MVT::v16i8, dl, EvenParts, OddParts, Ops);
	} else {
	llvm_unreachable("Unknown mul to lower!");
	}
	}

	SDValue PPCTargetLowering::LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const {
	bool IsStrict = Op->isStrictFPOpcode();
	if (Op.getOperand(IsStrict ? 1 : 0).getValueType() == MVT::f128 &&
	!Subtarget.hasP9Vector())
	return SDValue();

	return Op;
	}

	// Custom lowering for fpext vf32 to v2f64
	SDValue PPCTargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const {

	assert(Op.getOpcode() == ISD::FP_EXTEND &&
	"Should only be called for ISD::FP_EXTEND");

	// FIXME: handle extends from half precision float vectors on P9.
	// We only want to custom lower an extend from v2f32 to v2f64.
	if (Op.getValueType() != MVT::v2f64 \|\|
	Op.getOperand(0).getValueType() != MVT::v2f32)
	return SDValue();

	SDLoc dl(Op);
	SDValue Op0 = Op.getOperand(0);

	switch (Op0.getOpcode()) {
	default:
	return SDValue();
	case ISD::EXTRACT_SUBVECTOR: {
	assert(Op0.getNumOperands() == 2 &&
	isa<ConstantSDNode>(Op0->getOperand(1)) &&
	"Node should have 2 operands with second one being a constant!");

	if (Op0.getOperand(0).getValueType() != MVT::v4f32)
	return SDValue();

	// Custom lower is only done for high or low doubleword.
	int Idx = Op0.getConstantOperandVal(1);
	if (Idx % 2 != 0)
	return SDValue();

	// Since input is v4f32, at this point Idx is either 0 or 2.
	// Shift to get the doubleword position we want.
	int DWord = Idx >> 1;

	// High and low word positions are different on little endian.
	if (Subtarget.isLittleEndian())
	DWord ^= 0x1;

	return DAG.getNode(PPCISD::FP_EXTEND_HALF, dl, MVT::v2f64,
	Op0.getOperand(0), DAG.getConstant(DWord, dl, MVT::i32));
	}
	case ISD::FADD:
	case ISD::FMUL:
	case ISD::FSUB: {
	SDValue NewLoad[2];
	for (unsigned i = 0, ie = Op0.getNumOperands(); i != ie; ++i) {
	// Ensure both input are loads.
	SDValue LdOp = Op0.getOperand(i);
	if (LdOp.getOpcode() != ISD::LOAD)
	return SDValue();
	// Generate new load node.
	LoadSDNode *LD = cast<LoadSDNode>(LdOp);
	SDValue LoadOps[] = {LD->getChain(), LD->getBasePtr()};
	NewLoad[i] = DAG.getMemIntrinsicNode(
	PPCISD::LD_VSX_LH, dl, DAG.getVTList(MVT::v4f32, MVT::Other), LoadOps,
	LD->getMemoryVT(), LD->getMemOperand());
	}
	SDValue NewOp =
	DAG.getNode(Op0.getOpcode(), SDLoc(Op0), MVT::v4f32, NewLoad[0],
	NewLoad[1], Op0.getNode()->getFlags());
	return DAG.getNode(PPCISD::FP_EXTEND_HALF, dl, MVT::v2f64, NewOp,
	DAG.getConstant(0, dl, MVT::i32));
	}
	case ISD::LOAD: {
	LoadSDNode *LD = cast<LoadSDNode>(Op0);
	SDValue LoadOps[] = {LD->getChain(), LD->getBasePtr()};
	SDValue NewLd = DAG.getMemIntrinsicNode(
	PPCISD::LD_VSX_LH, dl, DAG.getVTList(MVT::v4f32, MVT::Other), LoadOps,
	LD->getMemoryVT(), LD->getMemOperand());
	return DAG.getNode(PPCISD::FP_EXTEND_HALF, dl, MVT::v2f64, NewLd,
	DAG.getConstant(0, dl, MVT::i32));
	}
	}
	llvm_unreachable("ERROR:Should return for all cases within swtich.");
	}

	/// LowerOperation - Provide custom lowering hooks for some operations.
	///
	SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
	switch (Op.getOpcode()) {
	default: llvm_unreachable("Wasn't expecting to be able to lower this!");
	case ISD::FPOW: return lowerPow(Op, DAG);
	case ISD::FSIN: return lowerSin(Op, DAG);
	case ISD::FCOS: return lowerCos(Op, DAG);
	case ISD::FLOG: return lowerLog(Op, DAG);
	case ISD::FLOG10: return lowerLog10(Op, DAG);
	case ISD::FEXP: return lowerExp(Op, DAG);
	case ISD::ConstantPool: return LowerConstantPool(Op, DAG);
	case ISD::BlockAddress: return LowerBlockAddress(Op, DAG);
	case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG);
	case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG);
	case ISD::JumpTable: return LowerJumpTable(Op, DAG);
	case ISD::STRICT_FSETCC:
	case ISD::STRICT_FSETCCS:
	case ISD::SETCC: return LowerSETCC(Op, DAG);
	case ISD::INIT_TRAMPOLINE: return LowerINIT_TRAMPOLINE(Op, DAG);
	case ISD::ADJUST_TRAMPOLINE: return LowerADJUST_TRAMPOLINE(Op, DAG);

	case ISD::INLINEASM:
	case ISD::INLINEASM_BR: return LowerINLINEASM(Op, DAG);
	// Variable argument lowering.
	case ISD::VASTART: return LowerVASTART(Op, DAG);
	case ISD::VAARG: return LowerVAARG(Op, DAG);
	case ISD::VACOPY: return LowerVACOPY(Op, DAG);

	case ISD::STACKRESTORE: return LowerSTACKRESTORE(Op, DAG);
	case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG);
	case ISD::GET_DYNAMIC_AREA_OFFSET:
	return LowerGET_DYNAMIC_AREA_OFFSET(Op, DAG);

	// Exception handling lowering.
	case ISD::EH_DWARF_CFA: return LowerEH_DWARF_CFA(Op, DAG);
	case ISD::EH_SJLJ_SETJMP: return lowerEH_SJLJ_SETJMP(Op, DAG);
	case ISD::EH_SJLJ_LONGJMP: return lowerEH_SJLJ_LONGJMP(Op, DAG);

	case ISD::LOAD: return LowerLOAD(Op, DAG);
	case ISD::STORE: return LowerSTORE(Op, DAG);
	case ISD::TRUNCATE: return LowerTRUNCATE(Op, DAG);
	case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
	case ISD::STRICT_FP_TO_UINT:
	case ISD::STRICT_FP_TO_SINT:
	case ISD::FP_TO_UINT:
	case ISD::FP_TO_SINT: return LowerFP_TO_INT(Op, DAG, SDLoc(Op));
	case ISD::STRICT_UINT_TO_FP:
	case ISD::STRICT_SINT_TO_FP:
	case ISD::UINT_TO_FP:
	case ISD::SINT_TO_FP: return LowerINT_TO_FP(Op, DAG);
	case ISD::GET_ROUNDING: return LowerGET_ROUNDING(Op, DAG);

	// Lower 64-bit shifts.
	case ISD::SHL_PARTS: return LowerSHL_PARTS(Op, DAG);
	case ISD::SRL_PARTS: return LowerSRL_PARTS(Op, DAG);
	case ISD::SRA_PARTS: return LowerSRA_PARTS(Op, DAG);

	case ISD::FSHL: return LowerFunnelShift(Op, DAG);
	case ISD::FSHR: return LowerFunnelShift(Op, DAG);

	// Vector-related lowering.
	case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG);
	case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG);
	case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
	case ISD::SCALAR_TO_VECTOR: return LowerSCALAR_TO_VECTOR(Op, DAG);
	case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG);
	case ISD::MUL: return LowerMUL(Op, DAG);
	case ISD::FP_EXTEND: return LowerFP_EXTEND(Op, DAG);
	case ISD::STRICT_FP_ROUND:
	case ISD::FP_ROUND:
	return LowerFP_ROUND(Op, DAG);
	case ISD::ROTL: return LowerROTL(Op, DAG);

	// For counter-based loop handling.
	case ISD::INTRINSIC_W_CHAIN: return SDValue();

	case ISD::BITCAST: return LowerBITCAST(Op, DAG);

	// Frame & Return address.
	case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG);
	case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG);

	case ISD::INTRINSIC_VOID:
	return LowerINTRINSIC_VOID(Op, DAG);
	case ISD::BSWAP:
	return LowerBSWAP(Op, DAG);
	case ISD::ATOMIC_CMP_SWAP:
	return LowerATOMIC_CMP_SWAP(Op, DAG);
	case ISD::ATOMIC_STORE:
	return LowerATOMIC_LOAD_STORE(Op, DAG);
	case ISD::IS_FPCLASS:
	return LowerIS_FPCLASS(Op, DAG);
	}
	}

	void PPCTargetLowering::ReplaceNodeResults(SDNode *N,
	SmallVectorImpl<SDValue>&Results,
	SelectionDAG &DAG) const {
	SDLoc dl(N);
	switch (N->getOpcode()) {
	default:
	llvm_unreachable("Do not know how to custom type legalize this operation!");
	case ISD::ATOMIC_LOAD: {
	SDValue Res = LowerATOMIC_LOAD_STORE(SDValue(N, 0), DAG);
	Results.push_back(Res);
	Results.push_back(Res.getValue(1));
	break;
	}
	case ISD::READCYCLECOUNTER: {
	SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other);
	SDValue RTB = DAG.getNode(PPCISD::READ_TIME_BASE, dl, VTs, N->getOperand(0));

	Results.push_back(
	DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, RTB, RTB.getValue(1)));
	Results.push_back(RTB.getValue(2));
	break;
	}
	case ISD::INTRINSIC_W_CHAIN: {
	if (N->getConstantOperandVal(1) != Intrinsic::loop_decrement)
	break;

	assert(N->getValueType(0) == MVT::i1 &&
	"Unexpected result type for CTR decrement intrinsic");
	EVT SVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(),
	N->getValueType(0));
	SDVTList VTs = DAG.getVTList(SVT, MVT::Other);
	SDValue NewInt = DAG.getNode(N->getOpcode(), dl, VTs, N->getOperand(0),
	N->getOperand(1));

	Results.push_back(DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, NewInt));
	Results.push_back(NewInt.getValue(1));
	break;
	}
	case ISD::INTRINSIC_WO_CHAIN: {
	switch (N->getConstantOperandVal(0)) {
	case Intrinsic::ppc_pack_longdouble:
	Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, MVT::ppcf128,
	N->getOperand(2), N->getOperand(1)));
	break;
	case Intrinsic::ppc_maxfe:
	case Intrinsic::ppc_minfe:
	case Intrinsic::ppc_fnmsub:
	case Intrinsic::ppc_convert_f128_to_ppcf128:
	Results.push_back(LowerINTRINSIC_WO_CHAIN(SDValue(N, 0), DAG));
	break;
	}
	break;
	}
	case ISD::VAARG: {
	if (!Subtarget.isSVR4ABI() \|\| Subtarget.isPPC64())
	return;

	EVT VT = N->getValueType(0);

	if (VT == MVT::i64) {
	SDValue NewNode = LowerVAARG(SDValue(N, 1), DAG);

	Results.push_back(NewNode);
	Results.push_back(NewNode.getValue(1));
	}
	return;
	}
	case ISD::STRICT_FP_TO_SINT:
	case ISD::STRICT_FP_TO_UINT:
	case ISD::FP_TO_SINT:
	case ISD::FP_TO_UINT: {
	// LowerFP_TO_INT() can only handle f32 and f64.
	if (N->getOperand(N->isStrictFPOpcode() ? 1 : 0).getValueType() ==
	MVT::ppcf128)
	return;
	SDValue LoweredValue = LowerFP_TO_INT(SDValue(N, 0), DAG, dl);
	Results.push_back(LoweredValue);
	if (N->isStrictFPOpcode())
	Results.push_back(LoweredValue.getValue(1));
	return;
	}
	case ISD::TRUNCATE: {
	if (!N->getValueType(0).isVector())
	return;
	SDValue Lowered = LowerTRUNCATEVector(SDValue(N, 0), DAG);
	if (Lowered)
	Results.push_back(Lowered);
	return;
	}
	case ISD::FSHL:
	case ISD::FSHR:
	// Don't handle funnel shifts here.
	return;
	case ISD::BITCAST:
	// Don't handle bitcast here.
	return;
	case ISD::FP_EXTEND:
	SDValue Lowered = LowerFP_EXTEND(SDValue(N, 0), DAG);
	if (Lowered)
	Results.push_back(Lowered);
	return;
	}
	}

	//===----------------------------------------------------------------------===//
	// Other Lowering Code
	//===----------------------------------------------------------------------===//

	static Instruction *callIntrinsic(IRBuilderBase &Builder, Intrinsic::ID Id) {
	Module *M = Builder.GetInsertBlock()->getParent()->getParent();
	Function *Func = Intrinsic::getDeclaration(M, Id);
	return Builder.CreateCall(Func, {});
	}

	// The mappings for emitLeading/TrailingFence is taken from
	// http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html
	Instruction *PPCTargetLowering::emitLeadingFence(IRBuilderBase &Builder,
	Instruction *Inst,
	AtomicOrdering Ord) const {
	if (Ord == AtomicOrdering::SequentiallyConsistent)
	return callIntrinsic(Builder, Intrinsic::ppc_sync);
	if (isReleaseOrStronger(Ord))
	return callIntrinsic(Builder, Intrinsic::ppc_lwsync);
	return nullptr;
	}

	Instruction *PPCTargetLowering::emitTrailingFence(IRBuilderBase &Builder,
	Instruction *Inst,
	AtomicOrdering Ord) const {
	if (Inst->hasAtomicLoad() && isAcquireOrStronger(Ord)) {
	// See http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html and
	// http://www.rdrop.com/users/paulmck/scalability/paper/N2745r.2011.03.04a.html
	// and http://www.cl.cam.ac.uk/~pes20/cppppc/ for justification.
	if (isa<LoadInst>(Inst))
	return Builder.CreateCall(
	Intrinsic::getDeclaration(
	Builder.GetInsertBlock()->getParent()->getParent(),
	Intrinsic::ppc_cfence, {Inst->getType()}),
	{Inst});
	// FIXME: Can use isync for rmw operation.
	return callIntrinsic(Builder, Intrinsic::ppc_lwsync);
	}
	return nullptr;
	}

	MachineBasicBlock *
	PPCTargetLowering::EmitAtomicBinary(MachineInstr &MI, MachineBasicBlock *BB,
	unsigned AtomicSize,
	unsigned BinOpcode,
	unsigned CmpOpcode,
	unsigned CmpPred) const {
	// This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
	const TargetInstrInfo *TII = Subtarget.getInstrInfo();

	auto LoadMnemonic = PPC::LDARX;
	auto StoreMnemonic = PPC::STDCX;
	switch (AtomicSize) {
	default:
	llvm_unreachable("Unexpected size of atomic entity");
	case 1:
	LoadMnemonic = PPC::LBARX;
	StoreMnemonic = PPC::STBCX;
	assert(Subtarget.hasPartwordAtomics() && "Call this only with size >=4");
	break;
	case 2:
	LoadMnemonic = PPC::LHARX;
	StoreMnemonic = PPC::STHCX;
	assert(Subtarget.hasPartwordAtomics() && "Call this only with size >=4");
	break;
	case 4:
	LoadMnemonic = PPC::LWARX;
	StoreMnemonic = PPC::STWCX;
	break;
	case 8:
	LoadMnemonic = PPC::LDARX;
	StoreMnemonic = PPC::STDCX;
	break;
	}

	const BasicBlock *LLVM_BB = BB->getBasicBlock();
	MachineFunction *F = BB->getParent();
	MachineFunction::iterator It = ++BB->getIterator();

	Register dest = MI.getOperand(0).getReg();
	Register ptrA = MI.getOperand(1).getReg();
	Register ptrB = MI.getOperand(2).getReg();
	Register incr = MI.getOperand(3).getReg();
	DebugLoc dl = MI.getDebugLoc();

	MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB);
	MachineBasicBlock *loop2MBB =
	CmpOpcode ? F->CreateMachineBasicBlock(LLVM_BB) : nullptr;
	MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
	F->insert(It, loopMBB);
	if (CmpOpcode)
	F->insert(It, loop2MBB);
	F->insert(It, exitMBB);
	exitMBB->splice(exitMBB->begin(), BB,
	std::next(MachineBasicBlock::iterator(MI)), BB->end());
	exitMBB->transferSuccessorsAndUpdatePHIs(BB);

	MachineRegisterInfo &RegInfo = F->getRegInfo();
	Register TmpReg = (!BinOpcode) ? incr :
	RegInfo.createVirtualRegister( AtomicSize == 8 ? &PPC::G8RCRegClass
	: &PPC::GPRCRegClass);

	// thisMBB:
	// ...
	// fallthrough --> loopMBB
	BB->addSuccessor(loopMBB);

	// loopMBB:
	// l[wd]arx dest, ptr
	// add r0, dest, incr
	// st[wd]cx. r0, ptr
	// bne- loopMBB
	// fallthrough --> exitMBB

	// For max/min...
	// loopMBB:
	// l[wd]arx dest, ptr
	// cmpl?[wd] dest, incr
	// bgt exitMBB
	// loop2MBB:
	// st[wd]cx. dest, ptr
	// bne- loopMBB
	// fallthrough --> exitMBB

	BB = loopMBB;
	BuildMI(BB, dl, TII->get(LoadMnemonic), dest)
	.addReg(ptrA).addReg(ptrB);
	if (BinOpcode)
	BuildMI(BB, dl, TII->get(BinOpcode), TmpReg).addReg(incr).addReg(dest);
	if (CmpOpcode) {
	Register CrReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
	// Signed comparisons of byte or halfword values must be sign-extended.
	if (CmpOpcode == PPC::CMPW && AtomicSize < 4) {
	Register ExtReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass);
	BuildMI(BB, dl, TII->get(AtomicSize == 1 ? PPC::EXTSB : PPC::EXTSH),
	ExtReg).addReg(dest);
	BuildMI(BB, dl, TII->get(CmpOpcode), CrReg).addReg(ExtReg).addReg(incr);
	} else
	BuildMI(BB, dl, TII->get(CmpOpcode), CrReg).addReg(dest).addReg(incr);

	BuildMI(BB, dl, TII->get(PPC::BCC))
	.addImm(CmpPred)
	.addReg(CrReg)
	.addMBB(exitMBB);
	BB->addSuccessor(loop2MBB);
	BB->addSuccessor(exitMBB);
	BB = loop2MBB;
	}
	BuildMI(BB, dl, TII->get(StoreMnemonic))
	.addReg(TmpReg).addReg(ptrA).addReg(ptrB);
	BuildMI(BB, dl, TII->get(PPC::BCC))
	.addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loopMBB);
	BB->addSuccessor(loopMBB);
	BB->addSuccessor(exitMBB);

	// exitMBB:
	// ...
	BB = exitMBB;
	return BB;
	}

	static bool isSignExtended(MachineInstr &MI, const PPCInstrInfo *TII) {
	switch(MI.getOpcode()) {
	default:
	return false;
	case PPC::COPY:
	return TII->isSignExtended(MI.getOperand(1).getReg(),
	&MI.getMF()->getRegInfo());
	case PPC::LHA:
	case PPC::LHA8:
	case PPC::LHAU:
	case PPC::LHAU8:
	case PPC::LHAUX:
	case PPC::LHAUX8:
	case PPC::LHAX:
	case PPC::LHAX8:
	case PPC::LWA:
	case PPC::LWAUX:
	case PPC::LWAX:
	case PPC::LWAX_32:
	case PPC::LWA_32:
	case PPC::PLHA:
	case PPC::PLHA8:
	case PPC::PLHA8pc:
	case PPC::PLHApc:
	case PPC::PLWA:
	case PPC::PLWA8:
	case PPC::PLWA8pc:
	case PPC::PLWApc:
	case PPC::EXTSB:
	case PPC::EXTSB8:
	case PPC::EXTSB8_32_64:
	case PPC::EXTSB8_rec:
	case PPC::EXTSB_rec:
	case PPC::EXTSH:
	case PPC::EXTSH8:
	case PPC::EXTSH8_32_64:
	case PPC::EXTSH8_rec:
	case PPC::EXTSH_rec:
	case PPC::EXTSW:
	case PPC::EXTSWSLI:
	case PPC::EXTSWSLI_32_64:
	case PPC::EXTSWSLI_32_64_rec:
	case PPC::EXTSWSLI_rec:
	case PPC::EXTSW_32:
	case PPC::EXTSW_32_64:
	case PPC::EXTSW_32_64_rec:
	case PPC::EXTSW_rec:
	case PPC::SRAW:
	case PPC::SRAWI:
	case PPC::SRAWI_rec:
	case PPC::SRAW_rec:
	return true;
	}
	return false;
	}

	MachineBasicBlock *PPCTargetLowering::EmitPartwordAtomicBinary(
	MachineInstr &MI, MachineBasicBlock *BB,
	bool is8bit, // operation
	unsigned BinOpcode, unsigned CmpOpcode, unsigned CmpPred) const {
	// This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
	const PPCInstrInfo *TII = Subtarget.getInstrInfo();

	// If this is a signed comparison and the value being compared is not known
	// to be sign extended, sign extend it here.
	DebugLoc dl = MI.getDebugLoc();
	MachineFunction *F = BB->getParent();
	MachineRegisterInfo &RegInfo = F->getRegInfo();
	Register incr = MI.getOperand(3).getReg();
	bool IsSignExtended =
	incr.isVirtual() && isSignExtended(*RegInfo.getVRegDef(incr), TII);

	if (CmpOpcode == PPC::CMPW && !IsSignExtended) {
	Register ValueReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass);
	BuildMI(*BB, MI, dl, TII->get(is8bit ? PPC::EXTSB : PPC::EXTSH), ValueReg)
	.addReg(MI.getOperand(3).getReg());
	MI.getOperand(3).setReg(ValueReg);
	incr = ValueReg;
	}
	// If we support part-word atomic mnemonics, just use them
	if (Subtarget.hasPartwordAtomics())
	return EmitAtomicBinary(MI, BB, is8bit ? 1 : 2, BinOpcode, CmpOpcode,
	CmpPred);

	// In 64 bit mode we have to use 64 bits for addresses, even though the
	// lwarx/stwcx are 32 bits. With the 32-bit atomics we can use address
	// registers without caring whether they're 32 or 64, but here we're
	// doing actual arithmetic on the addresses.
	bool is64bit = Subtarget.isPPC64();
	bool isLittleEndian = Subtarget.isLittleEndian();
	unsigned ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO;

	const BasicBlock *LLVM_BB = BB->getBasicBlock();
	MachineFunction::iterator It = ++BB->getIterator();

	Register dest = MI.getOperand(0).getReg();
	Register ptrA = MI.getOperand(1).getReg();
	Register ptrB = MI.getOperand(2).getReg();

	MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB);
	MachineBasicBlock *loop2MBB =
	CmpOpcode ? F->CreateMachineBasicBlock(LLVM_BB) : nullptr;
	MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
	F->insert(It, loopMBB);
	if (CmpOpcode)
	F->insert(It, loop2MBB);
	F->insert(It, exitMBB);
	exitMBB->splice(exitMBB->begin(), BB,
	std::next(MachineBasicBlock::iterator(MI)), BB->end());
	exitMBB->transferSuccessorsAndUpdatePHIs(BB);

	const TargetRegisterClass *RC =
	is64bit ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
	const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;

	Register PtrReg = RegInfo.createVirtualRegister(RC);
	Register Shift1Reg = RegInfo.createVirtualRegister(GPRC);
	Register ShiftReg =
	isLittleEndian ? Shift1Reg : RegInfo.createVirtualRegister(GPRC);
	Register Incr2Reg = RegInfo.createVirtualRegister(GPRC);
	Register MaskReg = RegInfo.createVirtualRegister(GPRC);
	Register Mask2Reg = RegInfo.createVirtualRegister(GPRC);
	Register Mask3Reg = RegInfo.createVirtualRegister(GPRC);
	Register Tmp2Reg = RegInfo.createVirtualRegister(GPRC);
	Register Tmp3Reg = RegInfo.createVirtualRegister(GPRC);
	Register Tmp4Reg = RegInfo.createVirtualRegister(GPRC);
	Register TmpDestReg = RegInfo.createVirtualRegister(GPRC);
	Register SrwDestReg = RegInfo.createVirtualRegister(GPRC);
	Register Ptr1Reg;
	Register TmpReg =
	(!BinOpcode) ? Incr2Reg : RegInfo.createVirtualRegister(GPRC);

	// thisMBB:
	// ...
	// fallthrough --> loopMBB
	BB->addSuccessor(loopMBB);

	// The 4-byte load must be aligned, while a char or short may be
	// anywhere in the word. Hence all this nasty bookkeeping code.
	// add ptr1, ptrA, ptrB [copy if ptrA==0]
	// rlwinm shift1, ptr1, 3, 27, 28 [3, 27, 27]
	// xori shift, shift1, 24 [16]
	// rlwinm ptr, ptr1, 0, 0, 29
	// slw incr2, incr, shift
	// li mask2, 255 [li mask3, 0; ori mask2, mask3, 65535]
	// slw mask, mask2, shift
	// loopMBB:
	// lwarx tmpDest, ptr
	// add tmp, tmpDest, incr2
	// andc tmp2, tmpDest, mask
	// and tmp3, tmp, mask
	// or tmp4, tmp3, tmp2
	// stwcx. tmp4, ptr
	// bne- loopMBB
	// fallthrough --> exitMBB
	// srw SrwDest, tmpDest, shift
	// rlwinm SrwDest, SrwDest, 0, 24 [16], 31
	if (ptrA != ZeroReg) {
	Ptr1Reg = RegInfo.createVirtualRegister(RC);
	BuildMI(BB, dl, TII->get(is64bit ? PPC::ADD8 : PPC::ADD4), Ptr1Reg)
	.addReg(ptrA)
	.addReg(ptrB);
	} else {
	Ptr1Reg = ptrB;
	}
	// We need use 32-bit subregister to avoid mismatch register class in 64-bit
	// mode.
	BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg)
	.addReg(Ptr1Reg, 0, is64bit ? PPC::sub_32 : 0)
	.addImm(3)
	.addImm(27)
	.addImm(is8bit ? 28 : 27);
	if (!isLittleEndian)
	BuildMI(BB, dl, TII->get(PPC::XORI), ShiftReg)
	.addReg(Shift1Reg)
	.addImm(is8bit ? 24 : 16);
	if (is64bit)
	BuildMI(BB, dl, TII->get(PPC::RLDICR), PtrReg)
	.addReg(Ptr1Reg)
	.addImm(0)
	.addImm(61);
	else
	BuildMI(BB, dl, TII->get(PPC::RLWINM), PtrReg)
	.addReg(Ptr1Reg)
	.addImm(0)
	.addImm(0)
	.addImm(29);
	BuildMI(BB, dl, TII->get(PPC::SLW), Incr2Reg).addReg(incr).addReg(ShiftReg);
	if (is8bit)
	BuildMI(BB, dl, TII->get(PPC::LI), Mask2Reg).addImm(255);
	else {
	BuildMI(BB, dl, TII->get(PPC::LI), Mask3Reg).addImm(0);
	BuildMI(BB, dl, TII->get(PPC::ORI), Mask2Reg)
	.addReg(Mask3Reg)
	.addImm(65535);
	}
	BuildMI(BB, dl, TII->get(PPC::SLW), MaskReg)
	.addReg(Mask2Reg)
	.addReg(ShiftReg);

	BB = loopMBB;
	BuildMI(BB, dl, TII->get(PPC::LWARX), TmpDestReg)
	.addReg(ZeroReg)
	.addReg(PtrReg);
	if (BinOpcode)
	BuildMI(BB, dl, TII->get(BinOpcode), TmpReg)
	.addReg(Incr2Reg)
	.addReg(TmpDestReg);
	BuildMI(BB, dl, TII->get(PPC::ANDC), Tmp2Reg)
	.addReg(TmpDestReg)
	.addReg(MaskReg);
	BuildMI(BB, dl, TII->get(PPC::AND), Tmp3Reg).addReg(TmpReg).addReg(MaskReg);
	if (CmpOpcode) {
	// For unsigned comparisons, we can directly compare the shifted values.
	// For signed comparisons we shift and sign extend.
	Register SReg = RegInfo.createVirtualRegister(GPRC);
	Register CrReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
	BuildMI(BB, dl, TII->get(PPC::AND), SReg)
	.addReg(TmpDestReg)
	.addReg(MaskReg);
	unsigned ValueReg = SReg;
	unsigned CmpReg = Incr2Reg;
	if (CmpOpcode == PPC::CMPW) {
	ValueReg = RegInfo.createVirtualRegister(GPRC);
	BuildMI(BB, dl, TII->get(PPC::SRW), ValueReg)
	.addReg(SReg)
	.addReg(ShiftReg);
	Register ValueSReg = RegInfo.createVirtualRegister(GPRC);
	BuildMI(BB, dl, TII->get(is8bit ? PPC::EXTSB : PPC::EXTSH), ValueSReg)
	.addReg(ValueReg);
	ValueReg = ValueSReg;
	CmpReg = incr;
	}
	BuildMI(BB, dl, TII->get(CmpOpcode), CrReg).addReg(ValueReg).addReg(CmpReg);
	BuildMI(BB, dl, TII->get(PPC::BCC))
	.addImm(CmpPred)
	.addReg(CrReg)
	.addMBB(exitMBB);
	BB->addSuccessor(loop2MBB);
	BB->addSuccessor(exitMBB);
	BB = loop2MBB;
	}
	BuildMI(BB, dl, TII->get(PPC::OR), Tmp4Reg).addReg(Tmp3Reg).addReg(Tmp2Reg);
	BuildMI(BB, dl, TII->get(PPC::STWCX))
	.addReg(Tmp4Reg)
	.addReg(ZeroReg)
	.addReg(PtrReg);
	BuildMI(BB, dl, TII->get(PPC::BCC))
	.addImm(PPC::PRED_NE)
	.addReg(PPC::CR0)
	.addMBB(loopMBB);
	BB->addSuccessor(loopMBB);
	BB->addSuccessor(exitMBB);

	// exitMBB:
	// ...
	BB = exitMBB;
	// Since the shift amount is not a constant, we need to clear
	// the upper bits with a separate RLWINM.
	BuildMI(*BB, BB->begin(), dl, TII->get(PPC::RLWINM), dest)
	.addReg(SrwDestReg)
	.addImm(0)
	.addImm(is8bit ? 24 : 16)
	.addImm(31);
	BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW), SrwDestReg)
	.addReg(TmpDestReg)
	.addReg(ShiftReg);
	return BB;
	}

	llvm::MachineBasicBlock *
	PPCTargetLowering::emitEHSjLjSetJmp(MachineInstr &MI,
	MachineBasicBlock *MBB) const {
	DebugLoc DL = MI.getDebugLoc();
	const TargetInstrInfo *TII = Subtarget.getInstrInfo();
	const PPCRegisterInfo *TRI = Subtarget.getRegisterInfo();

	MachineFunction *MF = MBB->getParent();
	MachineRegisterInfo &MRI = MF->getRegInfo();

	const BasicBlock *BB = MBB->getBasicBlock();
	MachineFunction::iterator I = ++MBB->getIterator();

	Register DstReg = MI.getOperand(0).getReg();
	const TargetRegisterClass *RC = MRI.getRegClass(DstReg);
	assert(TRI->isTypeLegalForClass(*RC, MVT::i32) && "Invalid destination!");
	Register mainDstReg = MRI.createVirtualRegister(RC);
	Register restoreDstReg = MRI.createVirtualRegister(RC);

	MVT PVT = getPointerTy(MF->getDataLayout());
	assert((PVT == MVT::i64 \|\| PVT == MVT::i32) &&
	"Invalid Pointer Size!");
	// For v = setjmp(buf), we generate
	//
	// thisMBB:
	// SjLjSetup mainMBB
	// bl mainMBB
	// v_restore = 1
	// b sinkMBB
	//
	// mainMBB:
	// buf[LabelOffset] = LR
	// v_main = 0
	//
	// sinkMBB:
	// v = phi(main, restore)
	//

	MachineBasicBlock *thisMBB = MBB;
	MachineBasicBlock *mainMBB = MF->CreateMachineBasicBlock(BB);
	MachineBasicBlock *sinkMBB = MF->CreateMachineBasicBlock(BB);
	MF->insert(I, mainMBB);
	MF->insert(I, sinkMBB);

	MachineInstrBuilder MIB;

	// Transfer the remainder of BB and its successor edges to sinkMBB.
	sinkMBB->splice(sinkMBB->begin(), MBB,
	std::next(MachineBasicBlock::iterator(MI)), MBB->end());
	sinkMBB->transferSuccessorsAndUpdatePHIs(MBB);

	// Note that the structure of the jmp_buf used here is not compatible
	// with that used by libc, and is not designed to be. Specifically, it
	// stores only those 'reserved' registers that LLVM does not otherwise
	// understand how to spill. Also, by convention, by the time this
	// intrinsic is called, Clang has already stored the frame address in the
	// first slot of the buffer and stack address in the third. Following the
	// X86 target code, we'll store the jump address in the second slot. We also
	// need to save the TOC pointer (R2) to handle jumps between shared
	// libraries, and that will be stored in the fourth slot. The thread
	// identifier (R13) is not affected.

	// thisMBB:
	const int64_t LabelOffset = 1 * PVT.getStoreSize();
	const int64_t TOCOffset = 3 * PVT.getStoreSize();
	const int64_t BPOffset = 4 * PVT.getStoreSize();

	// Prepare IP either in reg.
	const TargetRegisterClass *PtrRC = getRegClassFor(PVT);
	Register LabelReg = MRI.createVirtualRegister(PtrRC);
	Register BufReg = MI.getOperand(1).getReg();

	if (Subtarget.is64BitELFABI()) {
	setUsesTOCBasePtr(*MBB->getParent());
	MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::STD))
	.addReg(PPC::X2)
	.addImm(TOCOffset)
	.addReg(BufReg)
	.cloneMemRefs(MI);
	}

	// Naked functions never have a base pointer, and so we use r1. For all
	// other functions, this decision must be delayed until during PEI.
	unsigned BaseReg;
	if (MF->getFunction().hasFnAttribute(Attribute::Naked))
	BaseReg = Subtarget.isPPC64() ? PPC::X1 : PPC::R1;
	else
	BaseReg = Subtarget.isPPC64() ? PPC::BP8 : PPC::BP;

	MIB = BuildMI(*thisMBB, MI, DL,
	TII->get(Subtarget.isPPC64() ? PPC::STD : PPC::STW))
	.addReg(BaseReg)
	.addImm(BPOffset)
	.addReg(BufReg)
	.cloneMemRefs(MI);

	// Setup
	MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::BCLalways)).addMBB(mainMBB);
	MIB.addRegMask(TRI->getNoPreservedMask());

	BuildMI(*thisMBB, MI, DL, TII->get(PPC::LI), restoreDstReg).addImm(1);

	MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::EH_SjLj_Setup))
	.addMBB(mainMBB);
	MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::B)).addMBB(sinkMBB);

	thisMBB->addSuccessor(mainMBB, BranchProbability::getZero());
	thisMBB->addSuccessor(sinkMBB, BranchProbability::getOne());

	// mainMBB:
	// mainDstReg = 0
	MIB =
	BuildMI(mainMBB, DL,
	TII->get(Subtarget.isPPC64() ? PPC::MFLR8 : PPC::MFLR), LabelReg);

	// Store IP
	if (Subtarget.isPPC64()) {
	MIB = BuildMI(mainMBB, DL, TII->get(PPC::STD))
	.addReg(LabelReg)
	.addImm(LabelOffset)
	.addReg(BufReg);
	} else {
	MIB = BuildMI(mainMBB, DL, TII->get(PPC::STW))
	.addReg(LabelReg)
	.addImm(LabelOffset)
	.addReg(BufReg);
	}
	MIB.cloneMemRefs(MI);

	BuildMI(mainMBB, DL, TII->get(PPC::LI), mainDstReg).addImm(0);
	mainMBB->addSuccessor(sinkMBB);

	// sinkMBB:
	BuildMI(*sinkMBB, sinkMBB->begin(), DL,
	TII->get(PPC::PHI), DstReg)
	.addReg(mainDstReg).addMBB(mainMBB)
	.addReg(restoreDstReg).addMBB(thisMBB);

	MI.eraseFromParent();
	return sinkMBB;
	}

	MachineBasicBlock *
	PPCTargetLowering::emitEHSjLjLongJmp(MachineInstr &MI,
	MachineBasicBlock *MBB) const {
	DebugLoc DL = MI.getDebugLoc();
	const TargetInstrInfo *TII = Subtarget.getInstrInfo();

	MachineFunction *MF = MBB->getParent();
	MachineRegisterInfo &MRI = MF->getRegInfo();

	MVT PVT = getPointerTy(MF->getDataLayout());
	assert((PVT == MVT::i64 \|\| PVT == MVT::i32) &&
	"Invalid Pointer Size!");

	const TargetRegisterClass *RC =
	(PVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
	Register Tmp = MRI.createVirtualRegister(RC);
	// Since FP is only updated here but NOT referenced, it's treated as GPR.
	unsigned FP = (PVT == MVT::i64) ? PPC::X31 : PPC::R31;
	unsigned SP = (PVT == MVT::i64) ? PPC::X1 : PPC::R1;
	unsigned BP =
	(PVT == MVT::i64)
	? PPC::X30
	: (Subtarget.isSVR4ABI() && isPositionIndependent() ? PPC::R29
	: PPC::R30);

	MachineInstrBuilder MIB;

	const int64_t LabelOffset = 1 * PVT.getStoreSize();
	const int64_t SPOffset = 2 * PVT.getStoreSize();
	const int64_t TOCOffset = 3 * PVT.getStoreSize();
	const int64_t BPOffset = 4 * PVT.getStoreSize();

	Register BufReg = MI.getOperand(0).getReg();

	// Reload FP (the jumped-to function may not have had a
	// frame pointer, and if so, then its r31 will be restored
	// as necessary).
	if (PVT == MVT::i64) {
	MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), FP)
	.addImm(0)
	.addReg(BufReg);
	} else {
	MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), FP)
	.addImm(0)
	.addReg(BufReg);
	}
	MIB.cloneMemRefs(MI);

	// Reload IP
	if (PVT == MVT::i64) {
	MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), Tmp)
	.addImm(LabelOffset)
	.addReg(BufReg);
	} else {
	MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), Tmp)
	.addImm(LabelOffset)
	.addReg(BufReg);
	}
	MIB.cloneMemRefs(MI);

	// Reload SP
	if (PVT == MVT::i64) {
	MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), SP)
	.addImm(SPOffset)
	.addReg(BufReg);
	} else {
	MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), SP)
	.addImm(SPOffset)
	.addReg(BufReg);
	}
	MIB.cloneMemRefs(MI);

	// Reload BP
	if (PVT == MVT::i64) {
	MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), BP)
	.addImm(BPOffset)
	.addReg(BufReg);
	} else {
	MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), BP)
	.addImm(BPOffset)
	.addReg(BufReg);
	}
	MIB.cloneMemRefs(MI);

	// Reload TOC
	if (PVT == MVT::i64 && Subtarget.isSVR4ABI()) {
	setUsesTOCBasePtr(*MBB->getParent());
	MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), PPC::X2)
	.addImm(TOCOffset)
	.addReg(BufReg)
	.cloneMemRefs(MI);
	}

	// Jump
	BuildMI(*MBB, MI, DL,
	TII->get(PVT == MVT::i64 ? PPC::MTCTR8 : PPC::MTCTR)).addReg(Tmp);
	BuildMI(*MBB, MI, DL, TII->get(PVT == MVT::i64 ? PPC::BCTR8 : PPC::BCTR));

	MI.eraseFromParent();
	return MBB;
	}

	bool PPCTargetLowering::hasInlineStackProbe(const MachineFunction &MF) const {
	// If the function specifically requests inline stack probes, emit them.
	if (MF.getFunction().hasFnAttribute("probe-stack"))
	return MF.getFunction().getFnAttribute("probe-stack").getValueAsString() ==
	"inline-asm";
	return false;
	}

	unsigned PPCTargetLowering::getStackProbeSize(const MachineFunction &MF) const {
	const TargetFrameLowering *TFI = Subtarget.getFrameLowering();
	unsigned StackAlign = TFI->getStackAlignment();
	assert(StackAlign >= 1 && isPowerOf2_32(StackAlign) &&
	"Unexpected stack alignment");
	// The default stack probe size is 4096 if the function has no
	// stack-probe-size attribute.
	const Function &Fn = MF.getFunction();
	unsigned StackProbeSize =
	Fn.getFnAttributeAsParsedInteger("stack-probe-size", 4096);
	// Round down to the stack alignment.
	StackProbeSize &= ~(StackAlign - 1);
	return StackProbeSize ? StackProbeSize : StackAlign;
	}

	// Lower dynamic stack allocation with probing. `emitProbedAlloca` is splitted
	// into three phases. In the first phase, it uses pseudo instruction
	// PREPARE_PROBED_ALLOCA to get the future result of actual FramePointer and
	// FinalStackPtr. In the second phase, it generates a loop for probing blocks.
	// At last, it uses pseudo instruction DYNAREAOFFSET to get the future result of
	// MaxCallFrameSize so that it can calculate correct data area pointer.
	MachineBasicBlock *
	PPCTargetLowering::emitProbedAlloca(MachineInstr &MI,
	MachineBasicBlock *MBB) const {
	const bool isPPC64 = Subtarget.isPPC64();
	MachineFunction *MF = MBB->getParent();
	const TargetInstrInfo *TII = Subtarget.getInstrInfo();
	DebugLoc DL = MI.getDebugLoc();
	const unsigned ProbeSize = getStackProbeSize(*MF);
	const BasicBlock *ProbedBB = MBB->getBasicBlock();
	MachineRegisterInfo &MRI = MF->getRegInfo();
	// The CFG of probing stack looks as
	// +-----+
	// \| MBB \|
	// +--+--+
	// \|
	// +----v----+
	// +--->+ TestMBB +---+
	// \| +----+----+ \|
	// \| \| \|
	// \| +-----v----+ \|
	// +---+ BlockMBB \| \|
	// +----------+ \|
	// \|
	// +---------+ \|
	// \| TailMBB +<--+
	// +---------+
	// In MBB, calculate previous frame pointer and final stack pointer.
	// In TestMBB, test if sp is equal to final stack pointer, if so, jump to
	// TailMBB. In BlockMBB, update the sp atomically and jump back to TestMBB.
	// TailMBB is spliced via \p MI.
	MachineBasicBlock *TestMBB = MF->CreateMachineBasicBlock(ProbedBB);
	MachineBasicBlock *TailMBB = MF->CreateMachineBasicBlock(ProbedBB);
	MachineBasicBlock *BlockMBB = MF->CreateMachineBasicBlock(ProbedBB);

	MachineFunction::iterator MBBIter = ++MBB->getIterator();
	MF->insert(MBBIter, TestMBB);
	MF->insert(MBBIter, BlockMBB);
	MF->insert(MBBIter, TailMBB);

	const TargetRegisterClass *G8RC = &PPC::G8RCRegClass;
	const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;

	Register DstReg = MI.getOperand(0).getReg();
	Register NegSizeReg = MI.getOperand(1).getReg();
	Register SPReg = isPPC64 ? PPC::X1 : PPC::R1;
	Register FinalStackPtr = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
	Register FramePointer = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
	Register ActualNegSizeReg = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);

	// Since value of NegSizeReg might be realigned in prologepilog, insert a
	// PREPARE_PROBED_ALLOCA pseudo instruction to get actual FramePointer and
	// NegSize.
	unsigned ProbeOpc;
	if (!MRI.hasOneNonDBGUse(NegSizeReg))
	ProbeOpc =
	isPPC64 ? PPC::PREPARE_PROBED_ALLOCA_64 : PPC::PREPARE_PROBED_ALLOCA_32;
	else
	// By introducing PREPARE_PROBED_ALLOCA_NEGSIZE_OPT, ActualNegSizeReg
	// and NegSizeReg will be allocated in the same phyreg to avoid
	// redundant copy when NegSizeReg has only one use which is current MI and
	// will be replaced by PREPARE_PROBED_ALLOCA then.
	ProbeOpc = isPPC64 ? PPC::PREPARE_PROBED_ALLOCA_NEGSIZE_SAME_REG_64
	: PPC::PREPARE_PROBED_ALLOCA_NEGSIZE_SAME_REG_32;
	BuildMI(*MBB, {MI}, DL, TII->get(ProbeOpc), FramePointer)
	.addDef(ActualNegSizeReg)
	.addReg(NegSizeReg)
	.add(MI.getOperand(2))
	.add(MI.getOperand(3));

	// Calculate final stack pointer, which equals to SP + ActualNegSize.
	BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::ADD8 : PPC::ADD4),
	FinalStackPtr)
	.addReg(SPReg)
	.addReg(ActualNegSizeReg);

	// Materialize a scratch register for update.
	int64_t NegProbeSize = -(int64_t)ProbeSize;
	assert(isInt<32>(NegProbeSize) && "Unhandled probe size!");
	Register ScratchReg = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
	if (!isInt<16>(NegProbeSize)) {
	Register TempReg = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
	BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::LIS8 : PPC::LIS), TempReg)
	.addImm(NegProbeSize >> 16);
	BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::ORI8 : PPC::ORI),
	ScratchReg)
	.addReg(TempReg)
	.addImm(NegProbeSize & 0xFFFF);
	} else
	BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::LI8 : PPC::LI), ScratchReg)
	.addImm(NegProbeSize);

	{
	// Probing leading residual part.
	Register Div = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
	BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::DIVD : PPC::DIVW), Div)
	.addReg(ActualNegSizeReg)
	.addReg(ScratchReg);
	Register Mul = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
	BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::MULLD : PPC::MULLW), Mul)
	.addReg(Div)
	.addReg(ScratchReg);
	Register NegMod = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
	BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::SUBF8 : PPC::SUBF), NegMod)
	.addReg(Mul)
	.addReg(ActualNegSizeReg);
	BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::STDUX : PPC::STWUX), SPReg)
	.addReg(FramePointer)
	.addReg(SPReg)
	.addReg(NegMod);
	}

	{
	// Remaining part should be multiple of ProbeSize.
	Register CmpResult = MRI.createVirtualRegister(&PPC::CRRCRegClass);
	BuildMI(TestMBB, DL, TII->get(isPPC64 ? PPC::CMPD : PPC::CMPW), CmpResult)
	.addReg(SPReg)
	.addReg(FinalStackPtr);
	BuildMI(TestMBB, DL, TII->get(PPC::BCC))
	.addImm(PPC::PRED_EQ)
	.addReg(CmpResult)
	.addMBB(TailMBB);
	TestMBB->addSuccessor(BlockMBB);
	TestMBB->addSuccessor(TailMBB);
	}

	{
	// Touch the block.
	// \|P...\|P...\|P...
	BuildMI(BlockMBB, DL, TII->get(isPPC64 ? PPC::STDUX : PPC::STWUX), SPReg)
	.addReg(FramePointer)
	.addReg(SPReg)
	.addReg(ScratchReg);
	BuildMI(BlockMBB, DL, TII->get(PPC::B)).addMBB(TestMBB);
	BlockMBB->addSuccessor(TestMBB);
	}

	// Calculation of MaxCallFrameSize is deferred to prologepilog, use
	// DYNAREAOFFSET pseudo instruction to get the future result.
	Register MaxCallFrameSizeReg =
	MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
	BuildMI(TailMBB, DL,
	TII->get(isPPC64 ? PPC::DYNAREAOFFSET8 : PPC::DYNAREAOFFSET),
	MaxCallFrameSizeReg)
	.add(MI.getOperand(2))
	.add(MI.getOperand(3));
	BuildMI(TailMBB, DL, TII->get(isPPC64 ? PPC::ADD8 : PPC::ADD4), DstReg)
	.addReg(SPReg)
	.addReg(MaxCallFrameSizeReg);

	// Splice instructions after MI to TailMBB.
	TailMBB->splice(TailMBB->end(), MBB,
	std::next(MachineBasicBlock::iterator(MI)), MBB->end());
	TailMBB->transferSuccessorsAndUpdatePHIs(MBB);
	MBB->addSuccessor(TestMBB);

	// Delete the pseudo instruction.
	MI.eraseFromParent();

	++NumDynamicAllocaProbed;
	return TailMBB;
	}

	static bool IsSelectCC(MachineInstr &MI) {
	switch (MI.getOpcode()) {
	case PPC::SELECT_CC_I4:
	case PPC::SELECT_CC_I8:
	case PPC::SELECT_CC_F4:
	case PPC::SELECT_CC_F8:
	case PPC::SELECT_CC_F16:
	case PPC::SELECT_CC_VRRC:
	case PPC::SELECT_CC_VSFRC:
	case PPC::SELECT_CC_VSSRC:
	case PPC::SELECT_CC_VSRC:
	case PPC::SELECT_CC_SPE4:
	case PPC::SELECT_CC_SPE:
	return true;
	default:
	return false;
	}
	}

	static bool IsSelect(MachineInstr &MI) {
	switch (MI.getOpcode()) {
	case PPC::SELECT_I4:
	case PPC::SELECT_I8:
	case PPC::SELECT_F4:
	case PPC::SELECT_F8:
	case PPC::SELECT_F16:
	case PPC::SELECT_SPE:
	case PPC::SELECT_SPE4:
	case PPC::SELECT_VRRC:
	case PPC::SELECT_VSFRC:
	case PPC::SELECT_VSSRC:
	case PPC::SELECT_VSRC:
	return true;
	default:
	return false;
	}
	}

	MachineBasicBlock *
	PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
	MachineBasicBlock *BB) const {
	if (MI.getOpcode() == TargetOpcode::STACKMAP \|\|
	MI.getOpcode() == TargetOpcode::PATCHPOINT) {
	if (Subtarget.is64BitELFABI() &&
	MI.getOpcode() == TargetOpcode::PATCHPOINT &&
	!Subtarget.isUsingPCRelativeCalls()) {
	// Call lowering should have added an r2 operand to indicate a dependence
	// on the TOC base pointer value. It can't however, because there is no
	// way to mark the dependence as implicit there, and so the stackmap code
	// will confuse it with a regular operand. Instead, add the dependence
	// here.
	MI.addOperand(MachineOperand::CreateReg(PPC::X2, false, true));
	}

	return emitPatchPoint(MI, BB);
	}

	if (MI.getOpcode() == PPC::EH_SjLj_SetJmp32 \|\|
	MI.getOpcode() == PPC::EH_SjLj_SetJmp64) {
	return emitEHSjLjSetJmp(MI, BB);
	} else if (MI.getOpcode() == PPC::EH_SjLj_LongJmp32 \|\|
	MI.getOpcode() == PPC::EH_SjLj_LongJmp64) {
	return emitEHSjLjLongJmp(MI, BB);
	}

	const TargetInstrInfo *TII = Subtarget.getInstrInfo();

	// To "insert" these instructions we actually have to insert their
	// control-flow patterns.
	const BasicBlock *LLVM_BB = BB->getBasicBlock();
	MachineFunction::iterator It = ++BB->getIterator();

	MachineFunction *F = BB->getParent();
	MachineRegisterInfo &MRI = F->getRegInfo();

	if (Subtarget.hasISEL() &&
	(MI.getOpcode() == PPC::SELECT_CC_I4 \|\|
	MI.getOpcode() == PPC::SELECT_CC_I8 \|\|
	MI.getOpcode() == PPC::SELECT_I4 \|\| MI.getOpcode() == PPC::SELECT_I8)) {
	SmallVector<MachineOperand, 2> Cond;
	if (MI.getOpcode() == PPC::SELECT_CC_I4 \|\|
	MI.getOpcode() == PPC::SELECT_CC_I8)
	Cond.push_back(MI.getOperand(4));
	else
	Cond.push_back(MachineOperand::CreateImm(PPC::PRED_BIT_SET));
	Cond.push_back(MI.getOperand(1));

	DebugLoc dl = MI.getDebugLoc();
	TII->insertSelect(*BB, MI, dl, MI.getOperand(0).getReg(), Cond,
	MI.getOperand(2).getReg(), MI.getOperand(3).getReg());
	} else if (IsSelectCC(MI) \|\| IsSelect(MI)) {
	// The incoming instruction knows the destination vreg to set, the
	// condition code register to branch on, the true/false values to
	// select between, and a branch opcode to use.

	// thisMBB:
	// ...
	// TrueVal = ...
	// cmpTY ccX, r1, r2
	// bCC sinkMBB
	// fallthrough --> copy0MBB
	MachineBasicBlock *thisMBB = BB;
	MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
	MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
	DebugLoc dl = MI.getDebugLoc();
	F->insert(It, copy0MBB);
	F->insert(It, sinkMBB);

	// Set the call frame size on entry to the new basic blocks.
	// See https://reviews.llvm.org/D156113.
	unsigned CallFrameSize = TII->getCallFrameSizeAt(MI);
	copy0MBB->setCallFrameSize(CallFrameSize);
	sinkMBB->setCallFrameSize(CallFrameSize);

	// Transfer the remainder of BB and its successor edges to sinkMBB.
	sinkMBB->splice(sinkMBB->begin(), BB,
	std::next(MachineBasicBlock::iterator(MI)), BB->end());
	sinkMBB->transferSuccessorsAndUpdatePHIs(BB);

	// Next, add the true and fallthrough blocks as its successors.
	BB->addSuccessor(copy0MBB);
	BB->addSuccessor(sinkMBB);

	if (IsSelect(MI)) {
	BuildMI(BB, dl, TII->get(PPC::BC))
	.addReg(MI.getOperand(1).getReg())
	.addMBB(sinkMBB);
	} else {
	unsigned SelectPred = MI.getOperand(4).getImm();
	BuildMI(BB, dl, TII->get(PPC::BCC))
	.addImm(SelectPred)
	.addReg(MI.getOperand(1).getReg())
	.addMBB(sinkMBB);
	}

	// copy0MBB:
	// %FalseValue = ...
	// # fallthrough to sinkMBB
	BB = copy0MBB;

	// Update machine-CFG edges
	BB->addSuccessor(sinkMBB);

	// sinkMBB:
	// %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
	// ...
	BB = sinkMBB;
	BuildMI(*BB, BB->begin(), dl, TII->get(PPC::PHI), MI.getOperand(0).getReg())
	.addReg(MI.getOperand(3).getReg())
	.addMBB(copy0MBB)
	.addReg(MI.getOperand(2).getReg())
	.addMBB(thisMBB);
	} else if (MI.getOpcode() == PPC::ReadTB) {
	// To read the 64-bit time-base register on a 32-bit target, we read the
	// two halves. Should the counter have wrapped while it was being read, we
	// need to try again.
	// ...
	// readLoop:
	// mfspr Rx,TBU # load from TBU
	// mfspr Ry,TB # load from TB
	// mfspr Rz,TBU # load from TBU
	// cmpw crX,Rx,Rz # check if 'old'='new'
	// bne readLoop # branch if they're not equal
	// ...

	MachineBasicBlock *readMBB = F->CreateMachineBasicBlock(LLVM_BB);
	MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
	DebugLoc dl = MI.getDebugLoc();
	F->insert(It, readMBB);
	F->insert(It, sinkMBB);

	// Transfer the remainder of BB and its successor edges to sinkMBB.
	sinkMBB->splice(sinkMBB->begin(), BB,
	std::next(MachineBasicBlock::iterator(MI)), BB->end());
	sinkMBB->transferSuccessorsAndUpdatePHIs(BB);

	BB->addSuccessor(readMBB);
	BB = readMBB;

	MachineRegisterInfo &RegInfo = F->getRegInfo();
	Register ReadAgainReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass);
	Register LoReg = MI.getOperand(0).getReg();
	Register HiReg = MI.getOperand(1).getReg();

	BuildMI(BB, dl, TII->get(PPC::MFSPR), HiReg).addImm(269);
	BuildMI(BB, dl, TII->get(PPC::MFSPR), LoReg).addImm(268);
	BuildMI(BB, dl, TII->get(PPC::MFSPR), ReadAgainReg).addImm(269);

	Register CmpReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);

	BuildMI(BB, dl, TII->get(PPC::CMPW), CmpReg)
	.addReg(HiReg)
	.addReg(ReadAgainReg);
	BuildMI(BB, dl, TII->get(PPC::BCC))
	.addImm(PPC::PRED_NE)
	.addReg(CmpReg)
	.addMBB(readMBB);

	BB->addSuccessor(readMBB);
	BB->addSuccessor(sinkMBB);
	} else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I8)
	BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::ADD4);
	else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I16)
	BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::ADD4);
	else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I32)
	BB = EmitAtomicBinary(MI, BB, 4, PPC::ADD4);
	else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I64)
	BB = EmitAtomicBinary(MI, BB, 8, PPC::ADD8);

	else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I8)
	BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::AND);
	else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I16)
	BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::AND);
	else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I32)
	BB = EmitAtomicBinary(MI, BB, 4, PPC::AND);
	else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I64)
	BB = EmitAtomicBinary(MI, BB, 8, PPC::AND8);

	else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I8)
	BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::OR);
	else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I16)
	BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::OR);
	else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I32)
	BB = EmitAtomicBinary(MI, BB, 4, PPC::OR);
	else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I64)
	BB = EmitAtomicBinary(MI, BB, 8, PPC::OR8);

	else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I8)
	BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::XOR);
	else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I16)
	BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::XOR);
	else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I32)
	BB = EmitAtomicBinary(MI, BB, 4, PPC::XOR);
	else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I64)
	BB = EmitAtomicBinary(MI, BB, 8, PPC::XOR8);

	else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I8)
	BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::NAND);
	else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I16)
	BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::NAND);
	else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I32)
	BB = EmitAtomicBinary(MI, BB, 4, PPC::NAND);
	else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I64)
	BB = EmitAtomicBinary(MI, BB, 8, PPC::NAND8);

	else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I8)
	BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::SUBF);
	else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I16)
	BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::SUBF);
	else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I32)
	BB = EmitAtomicBinary(MI, BB, 4, PPC::SUBF);
	else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I64)
	BB = EmitAtomicBinary(MI, BB, 8, PPC::SUBF8);

	else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I8)
	BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPW, PPC::PRED_LT);
	else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I16)
	BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPW, PPC::PRED_LT);
	else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I32)
	BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPW, PPC::PRED_LT);
	else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I64)
	BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPD, PPC::PRED_LT);

	else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I8)
	BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPW, PPC::PRED_GT);
	else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I16)
	BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPW, PPC::PRED_GT);
	else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I32)
	BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPW, PPC::PRED_GT);
	else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I64)
	BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPD, PPC::PRED_GT);

	else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I8)
	BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPLW, PPC::PRED_LT);
	else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I16)
	BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPLW, PPC::PRED_LT);
	else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I32)
	BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPLW, PPC::PRED_LT);
	else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I64)
	BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPLD, PPC::PRED_LT);

	else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I8)
	BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPLW, PPC::PRED_GT);
	else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I16)
	BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPLW, PPC::PRED_GT);
	else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I32)
	BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPLW, PPC::PRED_GT);
	else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I64)
	BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPLD, PPC::PRED_GT);

	else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I8)
	BB = EmitPartwordAtomicBinary(MI, BB, true, 0);
	else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I16)
	BB = EmitPartwordAtomicBinary(MI, BB, false, 0);
	else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I32)
	BB = EmitAtomicBinary(MI, BB, 4, 0);
	else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I64)
	BB = EmitAtomicBinary(MI, BB, 8, 0);
	else if (MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I32 \|\|
	MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I64 \|\|
	(Subtarget.hasPartwordAtomics() &&
	MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8) \|\|
	(Subtarget.hasPartwordAtomics() &&
	MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I16)) {
	bool is64bit = MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I64;

	auto LoadMnemonic = PPC::LDARX;
	auto StoreMnemonic = PPC::STDCX;
	switch (MI.getOpcode()) {
	default:
	llvm_unreachable("Compare and swap of unknown size");
	case PPC::ATOMIC_CMP_SWAP_I8:
	LoadMnemonic = PPC::LBARX;
	StoreMnemonic = PPC::STBCX;
	assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.");
	break;
	case PPC::ATOMIC_CMP_SWAP_I16:
	LoadMnemonic = PPC::LHARX;
	StoreMnemonic = PPC::STHCX;
	assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.");
	break;
	case PPC::ATOMIC_CMP_SWAP_I32:
	LoadMnemonic = PPC::LWARX;
	StoreMnemonic = PPC::STWCX;
	break;
	case PPC::ATOMIC_CMP_SWAP_I64:
	LoadMnemonic = PPC::LDARX;
	StoreMnemonic = PPC::STDCX;
	break;
	}
	MachineRegisterInfo &RegInfo = F->getRegInfo();
	Register dest = MI.getOperand(0).getReg();
	Register ptrA = MI.getOperand(1).getReg();
	Register ptrB = MI.getOperand(2).getReg();
	Register CrReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
	Register oldval = MI.getOperand(3).getReg();
	Register newval = MI.getOperand(4).getReg();
	DebugLoc dl = MI.getDebugLoc();

	MachineBasicBlock *loop1MBB = F->CreateMachineBasicBlock(LLVM_BB);
	MachineBasicBlock *loop2MBB = F->CreateMachineBasicBlock(LLVM_BB);
	MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
	F->insert(It, loop1MBB);
	F->insert(It, loop2MBB);
	F->insert(It, exitMBB);
	exitMBB->splice(exitMBB->begin(), BB,
	std::next(MachineBasicBlock::iterator(MI)), BB->end());
	exitMBB->transferSuccessorsAndUpdatePHIs(BB);

	// thisMBB:
	// ...
	// fallthrough --> loopMBB
	BB->addSuccessor(loop1MBB);

	// loop1MBB:
	// l[bhwd]arx dest, ptr
	// cmp[wd] dest, oldval
	// bne- exitBB
	// loop2MBB:
	// st[bhwd]cx. newval, ptr
	// bne- loopMBB
	// b exitBB
	// exitBB:
	BB = loop1MBB;
	BuildMI(BB, dl, TII->get(LoadMnemonic), dest).addReg(ptrA).addReg(ptrB);
	BuildMI(BB, dl, TII->get(is64bit ? PPC::CMPD : PPC::CMPW), CrReg)
	.addReg(dest)
	.addReg(oldval);
	BuildMI(BB, dl, TII->get(PPC::BCC))
	.addImm(PPC::PRED_NE)
	.addReg(CrReg)
	.addMBB(exitMBB);
	BB->addSuccessor(loop2MBB);
	BB->addSuccessor(exitMBB);

	BB = loop2MBB;
	BuildMI(BB, dl, TII->get(StoreMnemonic))
	.addReg(newval)
	.addReg(ptrA)
	.addReg(ptrB);
	BuildMI(BB, dl, TII->get(PPC::BCC))
	.addImm(PPC::PRED_NE)
	.addReg(PPC::CR0)
	.addMBB(loop1MBB);
	BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB);
	BB->addSuccessor(loop1MBB);
	BB->addSuccessor(exitMBB);

	// exitMBB:
	// ...
	BB = exitMBB;
	} else if (MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8 \|\|
	MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I16) {
	// We must use 64-bit registers for addresses when targeting 64-bit,
	// since we're actually doing arithmetic on them. Other registers
	// can be 32-bit.
	bool is64bit = Subtarget.isPPC64();
	bool isLittleEndian = Subtarget.isLittleEndian();
	bool is8bit = MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8;

	Register dest = MI.getOperand(0).getReg();
	Register ptrA = MI.getOperand(1).getReg();
	Register ptrB = MI.getOperand(2).getReg();
	Register oldval = MI.getOperand(3).getReg();
	Register newval = MI.getOperand(4).getReg();
	DebugLoc dl = MI.getDebugLoc();

	MachineBasicBlock *loop1MBB = F->CreateMachineBasicBlock(LLVM_BB);
	MachineBasicBlock *loop2MBB = F->CreateMachineBasicBlock(LLVM_BB);
	MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
	F->insert(It, loop1MBB);
	F->insert(It, loop2MBB);
	F->insert(It, exitMBB);
	exitMBB->splice(exitMBB->begin(), BB,
	std::next(MachineBasicBlock::iterator(MI)), BB->end());
	exitMBB->transferSuccessorsAndUpdatePHIs(BB);

	MachineRegisterInfo &RegInfo = F->getRegInfo();
	const TargetRegisterClass *RC =
	is64bit ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
	const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;

	Register PtrReg = RegInfo.createVirtualRegister(RC);
	Register Shift1Reg = RegInfo.createVirtualRegister(GPRC);
	Register ShiftReg =
	isLittleEndian ? Shift1Reg : RegInfo.createVirtualRegister(GPRC);
	Register NewVal2Reg = RegInfo.createVirtualRegister(GPRC);
	Register NewVal3Reg = RegInfo.createVirtualRegister(GPRC);
	Register OldVal2Reg = RegInfo.createVirtualRegister(GPRC);
	Register OldVal3Reg = RegInfo.createVirtualRegister(GPRC);
	Register MaskReg = RegInfo.createVirtualRegister(GPRC);
	Register Mask2Reg = RegInfo.createVirtualRegister(GPRC);
	Register Mask3Reg = RegInfo.createVirtualRegister(GPRC);
	Register Tmp2Reg = RegInfo.createVirtualRegister(GPRC);
	Register Tmp4Reg = RegInfo.createVirtualRegister(GPRC);
	Register TmpDestReg = RegInfo.createVirtualRegister(GPRC);
	Register Ptr1Reg;
	Register TmpReg = RegInfo.createVirtualRegister(GPRC);
	Register ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO;
	Register CrReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
	// thisMBB:
	// ...
	// fallthrough --> loopMBB
	BB->addSuccessor(loop1MBB);

	// The 4-byte load must be aligned, while a char or short may be
	// anywhere in the word. Hence all this nasty bookkeeping code.
	// add ptr1, ptrA, ptrB [copy if ptrA==0]
	// rlwinm shift1, ptr1, 3, 27, 28 [3, 27, 27]
	// xori shift, shift1, 24 [16]
	// rlwinm ptr, ptr1, 0, 0, 29
	// slw newval2, newval, shift
	// slw oldval2, oldval,shift
	// li mask2, 255 [li mask3, 0; ori mask2, mask3, 65535]
	// slw mask, mask2, shift
	// and newval3, newval2, mask
	// and oldval3, oldval2, mask
	// loop1MBB:
	// lwarx tmpDest, ptr
	// and tmp, tmpDest, mask
	// cmpw tmp, oldval3
	// bne- exitBB
	// loop2MBB:
	// andc tmp2, tmpDest, mask
	// or tmp4, tmp2, newval3
	// stwcx. tmp4, ptr
	// bne- loop1MBB
	// b exitBB
	// exitBB:
	// srw dest, tmpDest, shift
	if (ptrA != ZeroReg) {
	Ptr1Reg = RegInfo.createVirtualRegister(RC);
	BuildMI(BB, dl, TII->get(is64bit ? PPC::ADD8 : PPC::ADD4), Ptr1Reg)
	.addReg(ptrA)
	.addReg(ptrB);
	} else {
	Ptr1Reg = ptrB;
	}

	// We need use 32-bit subregister to avoid mismatch register class in 64-bit
	// mode.
	BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg)
	.addReg(Ptr1Reg, 0, is64bit ? PPC::sub_32 : 0)
	.addImm(3)
	.addImm(27)
	.addImm(is8bit ? 28 : 27);
	if (!isLittleEndian)
	BuildMI(BB, dl, TII->get(PPC::XORI), ShiftReg)
	.addReg(Shift1Reg)
	.addImm(is8bit ? 24 : 16);
	if (is64bit)
	BuildMI(BB, dl, TII->get(PPC::RLDICR), PtrReg)
	.addReg(Ptr1Reg)
	.addImm(0)
	.addImm(61);
	else
	BuildMI(BB, dl, TII->get(PPC::RLWINM), PtrReg)
	.addReg(Ptr1Reg)
	.addImm(0)
	.addImm(0)
	.addImm(29);
	BuildMI(BB, dl, TII->get(PPC::SLW), NewVal2Reg)
	.addReg(newval)
	.addReg(ShiftReg);
	BuildMI(BB, dl, TII->get(PPC::SLW), OldVal2Reg)
	.addReg(oldval)
	.addReg(ShiftReg);
	if (is8bit)
	BuildMI(BB, dl, TII->get(PPC::LI), Mask2Reg).addImm(255);
	else {
	BuildMI(BB, dl, TII->get(PPC::LI), Mask3Reg).addImm(0);
	BuildMI(BB, dl, TII->get(PPC::ORI), Mask2Reg)
	.addReg(Mask3Reg)
	.addImm(65535);
	}
	BuildMI(BB, dl, TII->get(PPC::SLW), MaskReg)
	.addReg(Mask2Reg)
	.addReg(ShiftReg);
	BuildMI(BB, dl, TII->get(PPC::AND), NewVal3Reg)
	.addReg(NewVal2Reg)
	.addReg(MaskReg);
	BuildMI(BB, dl, TII->get(PPC::AND), OldVal3Reg)
	.addReg(OldVal2Reg)
	.addReg(MaskReg);

	BB = loop1MBB;
	BuildMI(BB, dl, TII->get(PPC::LWARX), TmpDestReg)
	.addReg(ZeroReg)
	.addReg(PtrReg);
	BuildMI(BB, dl, TII->get(PPC::AND), TmpReg)
	.addReg(TmpDestReg)
	.addReg(MaskReg);
	BuildMI(BB, dl, TII->get(PPC::CMPW), CrReg)
	.addReg(TmpReg)
	.addReg(OldVal3Reg);
	BuildMI(BB, dl, TII->get(PPC::BCC))
	.addImm(PPC::PRED_NE)
	.addReg(CrReg)
	.addMBB(exitMBB);
	BB->addSuccessor(loop2MBB);
	BB->addSuccessor(exitMBB);

	BB = loop2MBB;
	BuildMI(BB, dl, TII->get(PPC::ANDC), Tmp2Reg)
	.addReg(TmpDestReg)
	.addReg(MaskReg);
	BuildMI(BB, dl, TII->get(PPC::OR), Tmp4Reg)
	.addReg(Tmp2Reg)
	.addReg(NewVal3Reg);
	BuildMI(BB, dl, TII->get(PPC::STWCX))
	.addReg(Tmp4Reg)
	.addReg(ZeroReg)
	.addReg(PtrReg);
	BuildMI(BB, dl, TII->get(PPC::BCC))
	.addImm(PPC::PRED_NE)
	.addReg(PPC::CR0)
	.addMBB(loop1MBB);
	BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB);
	BB->addSuccessor(loop1MBB);
	BB->addSuccessor(exitMBB);

	// exitMBB:
	// ...
	BB = exitMBB;
	BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW), dest)
	.addReg(TmpReg)
	.addReg(ShiftReg);
	} else if (MI.getOpcode() == PPC::FADDrtz) {
	// This pseudo performs an FADD with rounding mode temporarily forced
	// to round-to-zero. We emit this via custom inserter since the FPSCR
	// is not modeled at the SelectionDAG level.
	Register Dest = MI.getOperand(0).getReg();
	Register Src1 = MI.getOperand(1).getReg();
	Register Src2 = MI.getOperand(2).getReg();
	DebugLoc dl = MI.getDebugLoc();

	MachineRegisterInfo &RegInfo = F->getRegInfo();
	Register MFFSReg = RegInfo.createVirtualRegister(&PPC::F8RCRegClass);

	// Save FPSCR value.
	BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), MFFSReg);

	// Set rounding mode to round-to-zero.
	BuildMI(*BB, MI, dl, TII->get(PPC::MTFSB1))
	.addImm(31)
	.addReg(PPC::RM, RegState::ImplicitDefine);

	BuildMI(*BB, MI, dl, TII->get(PPC::MTFSB0))
	.addImm(30)
	.addReg(PPC::RM, RegState::ImplicitDefine);

	// Perform addition.
	auto MIB = BuildMI(*BB, MI, dl, TII->get(PPC::FADD), Dest)
	.addReg(Src1)
	.addReg(Src2);
	if (MI.getFlag(MachineInstr::NoFPExcept))
	MIB.setMIFlag(MachineInstr::NoFPExcept);

	// Restore FPSCR value.
	BuildMI(*BB, MI, dl, TII->get(PPC::MTFSFb)).addImm(1).addReg(MFFSReg);
	} else if (MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT \|\|
	MI.getOpcode() == PPC::ANDI_rec_1_GT_BIT \|\|
	MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT8 \|\|
	MI.getOpcode() == PPC::ANDI_rec_1_GT_BIT8) {
	unsigned Opcode = (MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT8 \|\|
	MI.getOpcode() == PPC::ANDI_rec_1_GT_BIT8)
	? PPC::ANDI8_rec
	: PPC::ANDI_rec;
	bool IsEQ = (MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT \|\|
	MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT8);

	MachineRegisterInfo &RegInfo = F->getRegInfo();
	Register Dest = RegInfo.createVirtualRegister(
	Opcode == PPC::ANDI_rec ? &PPC::GPRCRegClass : &PPC::G8RCRegClass);

	DebugLoc Dl = MI.getDebugLoc();
	BuildMI(*BB, MI, Dl, TII->get(Opcode), Dest)
	.addReg(MI.getOperand(1).getReg())
	.addImm(1);
	BuildMI(*BB, MI, Dl, TII->get(TargetOpcode::COPY),
	MI.getOperand(0).getReg())
	.addReg(IsEQ ? PPC::CR0EQ : PPC::CR0GT);
	} else if (MI.getOpcode() == PPC::TCHECK_RET) {
	DebugLoc Dl = MI.getDebugLoc();
	MachineRegisterInfo &RegInfo = F->getRegInfo();
	Register CRReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
	BuildMI(*BB, MI, Dl, TII->get(PPC::TCHECK), CRReg);
	BuildMI(*BB, MI, Dl, TII->get(TargetOpcode::COPY),
	MI.getOperand(0).getReg())
	.addReg(CRReg);
	} else if (MI.getOpcode() == PPC::TBEGIN_RET) {
	DebugLoc Dl = MI.getDebugLoc();
	unsigned Imm = MI.getOperand(1).getImm();
	BuildMI(*BB, MI, Dl, TII->get(PPC::TBEGIN)).addImm(Imm);
	BuildMI(*BB, MI, Dl, TII->get(TargetOpcode::COPY),
	MI.getOperand(0).getReg())
	.addReg(PPC::CR0EQ);
	} else if (MI.getOpcode() == PPC::SETRNDi) {
	DebugLoc dl = MI.getDebugLoc();
	Register OldFPSCRReg = MI.getOperand(0).getReg();

	// Save FPSCR value.
	if (MRI.use_empty(OldFPSCRReg))
	BuildMI(*BB, MI, dl, TII->get(TargetOpcode::IMPLICIT_DEF), OldFPSCRReg);
	else
	BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), OldFPSCRReg);

	// The floating point rounding mode is in the bits 62:63 of FPCSR, and has
	// the following settings:
	// 00 Round to nearest
	// 01 Round to 0
	// 10 Round to +inf
	// 11 Round to -inf

	// When the operand is immediate, using the two least significant bits of
	// the immediate to set the bits 62:63 of FPSCR.
	unsigned Mode = MI.getOperand(1).getImm();
	BuildMI(*BB, MI, dl, TII->get((Mode & 1) ? PPC::MTFSB1 : PPC::MTFSB0))
	.addImm(31)
	.addReg(PPC::RM, RegState::ImplicitDefine);

	BuildMI(*BB, MI, dl, TII->get((Mode & 2) ? PPC::MTFSB1 : PPC::MTFSB0))
	.addImm(30)
	.addReg(PPC::RM, RegState::ImplicitDefine);
	} else if (MI.getOpcode() == PPC::SETRND) {
	DebugLoc dl = MI.getDebugLoc();

	// Copy register from F8RCRegClass::SrcReg to G8RCRegClass::DestReg
	// or copy register from G8RCRegClass::SrcReg to F8RCRegClass::DestReg.
	// If the target doesn't have DirectMove, we should use stack to do the
	// conversion, because the target doesn't have the instructions like mtvsrd
	// or mfvsrd to do this conversion directly.
	auto copyRegFromG8RCOrF8RC = [&] (unsigned DestReg, unsigned SrcReg) {
	if (Subtarget.hasDirectMove()) {
	BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY), DestReg)
	.addReg(SrcReg);
	} else {
	// Use stack to do the register copy.
	unsigned StoreOp = PPC::STD, LoadOp = PPC::LFD;
	MachineRegisterInfo &RegInfo = F->getRegInfo();
	const TargetRegisterClass *RC = RegInfo.getRegClass(SrcReg);
	if (RC == &PPC::F8RCRegClass) {
	// Copy register from F8RCRegClass to G8RCRegclass.
	assert((RegInfo.getRegClass(DestReg) == &PPC::G8RCRegClass) &&
	"Unsupported RegClass.");

	StoreOp = PPC::STFD;
	LoadOp = PPC::LD;
	} else {
	// Copy register from G8RCRegClass to F8RCRegclass.
	assert((RegInfo.getRegClass(SrcReg) == &PPC::G8RCRegClass) &&
	(RegInfo.getRegClass(DestReg) == &PPC::F8RCRegClass) &&
	"Unsupported RegClass.");
	}

	MachineFrameInfo &MFI = F->getFrameInfo();
	int FrameIdx = MFI.CreateStackObject(8, Align(8), false);

	MachineMemOperand *MMOStore = F->getMachineMemOperand(
	MachinePointerInfo::getFixedStack(*F, FrameIdx, 0),
	MachineMemOperand::MOStore, MFI.getObjectSize(FrameIdx),
	MFI.getObjectAlign(FrameIdx));

	// Store the SrcReg into the stack.
	BuildMI(*BB, MI, dl, TII->get(StoreOp))
	.addReg(SrcReg)
	.addImm(0)
	.addFrameIndex(FrameIdx)
	.addMemOperand(MMOStore);

	MachineMemOperand *MMOLoad = F->getMachineMemOperand(
	MachinePointerInfo::getFixedStack(*F, FrameIdx, 0),
	MachineMemOperand::MOLoad, MFI.getObjectSize(FrameIdx),
	MFI.getObjectAlign(FrameIdx));

	// Load from the stack where SrcReg is stored, and save to DestReg,
	// so we have done the RegClass conversion from RegClass::SrcReg to
	// RegClass::DestReg.
	BuildMI(*BB, MI, dl, TII->get(LoadOp), DestReg)
	.addImm(0)
	.addFrameIndex(FrameIdx)
	.addMemOperand(MMOLoad);
	}
	};

	Register OldFPSCRReg = MI.getOperand(0).getReg();

	// Save FPSCR value.
	BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), OldFPSCRReg);

	// When the operand is gprc register, use two least significant bits of the
	// register and mtfsf instruction to set the bits 62:63 of FPSCR.
	//
	// copy OldFPSCRTmpReg, OldFPSCRReg
	// (INSERT_SUBREG ExtSrcReg, (IMPLICIT_DEF ImDefReg), SrcOp, 1)
	// rldimi NewFPSCRTmpReg, ExtSrcReg, OldFPSCRReg, 0, 62
	// copy NewFPSCRReg, NewFPSCRTmpReg
	// mtfsf 255, NewFPSCRReg
	MachineOperand SrcOp = MI.getOperand(1);
	MachineRegisterInfo &RegInfo = F->getRegInfo();
	Register OldFPSCRTmpReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);

	copyRegFromG8RCOrF8RC(OldFPSCRTmpReg, OldFPSCRReg);

	Register ImDefReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
	Register ExtSrcReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);

	// The first operand of INSERT_SUBREG should be a register which has
	// subregisters, we only care about its RegClass, so we should use an
	// IMPLICIT_DEF register.
	BuildMI(*BB, MI, dl, TII->get(TargetOpcode::IMPLICIT_DEF), ImDefReg);
	BuildMI(*BB, MI, dl, TII->get(PPC::INSERT_SUBREG), ExtSrcReg)
	.addReg(ImDefReg)
	.add(SrcOp)
	.addImm(1);

	Register NewFPSCRTmpReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
	BuildMI(*BB, MI, dl, TII->get(PPC::RLDIMI), NewFPSCRTmpReg)
	.addReg(OldFPSCRTmpReg)
	.addReg(ExtSrcReg)
	.addImm(0)
	.addImm(62);

	Register NewFPSCRReg = RegInfo.createVirtualRegister(&PPC::F8RCRegClass);
	copyRegFromG8RCOrF8RC(NewFPSCRReg, NewFPSCRTmpReg);

	// The mask 255 means that put the 32:63 bits of NewFPSCRReg to the 32:63
	// bits of FPSCR.
	BuildMI(*BB, MI, dl, TII->get(PPC::MTFSF))
	.addImm(255)
	.addReg(NewFPSCRReg)
	.addImm(0)
	.addImm(0);
	} else if (MI.getOpcode() == PPC::SETFLM) {
	DebugLoc Dl = MI.getDebugLoc();

	// Result of setflm is previous FPSCR content, so we need to save it first.
	Register OldFPSCRReg = MI.getOperand(0).getReg();
	if (MRI.use_empty(OldFPSCRReg))
	BuildMI(*BB, MI, Dl, TII->get(TargetOpcode::IMPLICIT_DEF), OldFPSCRReg);
	else
	BuildMI(*BB, MI, Dl, TII->get(PPC::MFFS), OldFPSCRReg);

	// Put bits in 32:63 to FPSCR.
	Register NewFPSCRReg = MI.getOperand(1).getReg();
	BuildMI(*BB, MI, Dl, TII->get(PPC::MTFSF))
	.addImm(255)
	.addReg(NewFPSCRReg)
	.addImm(0)
	.addImm(0);
	} else if (MI.getOpcode() == PPC::PROBED_ALLOCA_32 \|\|
	MI.getOpcode() == PPC::PROBED_ALLOCA_64) {
	return emitProbedAlloca(MI, BB);
	} else if (MI.getOpcode() == PPC::SPLIT_QUADWORD) {
	DebugLoc DL = MI.getDebugLoc();
	Register Src = MI.getOperand(2).getReg();
	Register Lo = MI.getOperand(0).getReg();
	Register Hi = MI.getOperand(1).getReg();
	BuildMI(*BB, MI, DL, TII->get(TargetOpcode::COPY))
	.addDef(Lo)
	.addUse(Src, 0, PPC::sub_gp8_x1);
	BuildMI(*BB, MI, DL, TII->get(TargetOpcode::COPY))
	.addDef(Hi)
	.addUse(Src, 0, PPC::sub_gp8_x0);
	} else if (MI.getOpcode() == PPC::LQX_PSEUDO \|\|
	MI.getOpcode() == PPC::STQX_PSEUDO) {
	DebugLoc DL = MI.getDebugLoc();
	// Ptr is used as the ptr_rc_no_r0 part
	// of LQ/STQ's memory operand and adding result of RA and RB,
	// so it has to be g8rc_and_g8rc_nox0.
	Register Ptr =
	F->getRegInfo().createVirtualRegister(&PPC::G8RC_and_G8RC_NOX0RegClass);
	Register Val = MI.getOperand(0).getReg();
	Register RA = MI.getOperand(1).getReg();
	Register RB = MI.getOperand(2).getReg();
	BuildMI(*BB, MI, DL, TII->get(PPC::ADD8), Ptr).addReg(RA).addReg(RB);
	BuildMI(*BB, MI, DL,
	MI.getOpcode() == PPC::LQX_PSEUDO ? TII->get(PPC::LQ)
	: TII->get(PPC::STQ))
	.addReg(Val, MI.getOpcode() == PPC::LQX_PSEUDO ? RegState::Define : 0)
	.addImm(0)
	.addReg(Ptr);
	} else {
	llvm_unreachable("Unexpected instr type to insert");
	}

	MI.eraseFromParent(); // The pseudo instruction is gone now.
	return BB;
	}

	//===----------------------------------------------------------------------===//
	// Target Optimization Hooks
	//===----------------------------------------------------------------------===//

	static int getEstimateRefinementSteps(EVT VT, const PPCSubtarget &Subtarget) {
	// For the estimates, convergence is quadratic, so we essentially double the
	// number of digits correct after every iteration. For both FRE and FRSQRTE,
	// the minimum architected relative accuracy is 2^-5. When hasRecipPrec(),
	// this is 2^-14. IEEE float has 23 digits and double has 52 digits.
	int RefinementSteps = Subtarget.hasRecipPrec() ? 1 : 3;
	if (VT.getScalarType() == MVT::f64)
	RefinementSteps++;
	return RefinementSteps;
	}

	SDValue PPCTargetLowering::getSqrtInputTest(SDValue Op, SelectionDAG &DAG,
	const DenormalMode &Mode) const {
	// We only have VSX Vector Test for software Square Root.
	EVT VT = Op.getValueType();
	if (!isTypeLegal(MVT::i1) \|\|
	(VT != MVT::f64 &&
	((VT != MVT::v2f64 && VT != MVT::v4f32) \|\| !Subtarget.hasVSX())))
	return TargetLowering::getSqrtInputTest(Op, DAG, Mode);

	SDLoc DL(Op);
	// The output register of FTSQRT is CR field.
	SDValue FTSQRT = DAG.getNode(PPCISD::FTSQRT, DL, MVT::i32, Op);
	// ftsqrt BF,FRB
	// Let e_b be the unbiased exponent of the double-precision
	// floating-point operand in register FRB.
	// fe_flag is set to 1 if either of the following conditions occurs.
	// - The double-precision floating-point operand in register FRB is a zero,
	// a NaN, or an infinity, or a negative value.
	// - e_b is less than or equal to -970.
	// Otherwise fe_flag is set to 0.
	// Both VSX and non-VSX versions would set EQ bit in the CR if the number is
	// not eligible for iteration. (zero/negative/infinity/nan or unbiased
	// exponent is less than -970)
	SDValue SRIdxVal = DAG.getTargetConstant(PPC::sub_eq, DL, MVT::i32);
	return SDValue(DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL, MVT::i1,
	FTSQRT, SRIdxVal),
	0);
	}

	SDValue
	PPCTargetLowering::getSqrtResultForDenormInput(SDValue Op,
	SelectionDAG &DAG) const {
	// We only have VSX Vector Square Root.
	EVT VT = Op.getValueType();
	if (VT != MVT::f64 &&
	((VT != MVT::v2f64 && VT != MVT::v4f32) \|\| !Subtarget.hasVSX()))
	return TargetLowering::getSqrtResultForDenormInput(Op, DAG);

	return DAG.getNode(PPCISD::FSQRT, SDLoc(Op), VT, Op);
	}

	SDValue PPCTargetLowering::getSqrtEstimate(SDValue Operand, SelectionDAG &DAG,
	int Enabled, int &RefinementSteps,
	bool &UseOneConstNR,
	bool Reciprocal) const {
	EVT VT = Operand.getValueType();
	if ((VT == MVT::f32 && Subtarget.hasFRSQRTES()) \|\|
	(VT == MVT::f64 && Subtarget.hasFRSQRTE()) \|\|
	(VT == MVT::v4f32 && Subtarget.hasAltivec()) \|\|
	(VT == MVT::v2f64 && Subtarget.hasVSX())) {
	if (RefinementSteps == ReciprocalEstimate::Unspecified)
	RefinementSteps = getEstimateRefinementSteps(VT, Subtarget);

	// The Newton-Raphson computation with a single constant does not provide
	// enough accuracy on some CPUs.
	UseOneConstNR = !Subtarget.needsTwoConstNR();
	return DAG.getNode(PPCISD::FRSQRTE, SDLoc(Operand), VT, Operand);
	}
	return SDValue();
	}

	SDValue PPCTargetLowering::getRecipEstimate(SDValue Operand, SelectionDAG &DAG,
	int Enabled,
	int &RefinementSteps) const {
	EVT VT = Operand.getValueType();
	if ((VT == MVT::f32 && Subtarget.hasFRES()) \|\|
	(VT == MVT::f64 && Subtarget.hasFRE()) \|\|
	(VT == MVT::v4f32 && Subtarget.hasAltivec()) \|\|
	(VT == MVT::v2f64 && Subtarget.hasVSX())) {
	if (RefinementSteps == ReciprocalEstimate::Unspecified)
	RefinementSteps = getEstimateRefinementSteps(VT, Subtarget);
	return DAG.getNode(PPCISD::FRE, SDLoc(Operand), VT, Operand);
	}
	return SDValue();
	}

	unsigned PPCTargetLowering::combineRepeatedFPDivisors() const {
	// Note: This functionality is used only when unsafe-fp-math is enabled, and
	// on cores with reciprocal estimates (which are used when unsafe-fp-math is
	// enabled for division), this functionality is redundant with the default
	// combiner logic (once the division -> reciprocal/multiply transformation
	// has taken place). As a result, this matters more for older cores than for
	// newer ones.

	// Combine multiple FDIVs with the same divisor into multiple FMULs by the
	// reciprocal if there are two or more FDIVs (for embedded cores with only
	// one FP pipeline) for three or more FDIVs (for generic OOO cores).
	switch (Subtarget.getCPUDirective()) {
	default:
	return 3;
	case PPC::DIR_440:
	case PPC::DIR_A2:
	case PPC::DIR_E500:
	case PPC::DIR_E500mc:
	case PPC::DIR_E5500:
	return 2;
	}
	}

	// isConsecutiveLSLoc needs to work even if all adds have not yet been
	// collapsed, and so we need to look through chains of them.
	static void getBaseWithConstantOffset(SDValue Loc, SDValue &Base,
	int64_t& Offset, SelectionDAG &DAG) {
	if (DAG.isBaseWithConstantOffset(Loc)) {
	Base = Loc.getOperand(0);
	Offset += cast<ConstantSDNode>(Loc.getOperand(1))->getSExtValue();

	// The base might itself be a base plus an offset, and if so, accumulate
	// that as well.
	getBaseWithConstantOffset(Loc.getOperand(0), Base, Offset, DAG);
	}
	}

	static bool isConsecutiveLSLoc(SDValue Loc, EVT VT, LSBaseSDNode *Base,
	unsigned Bytes, int Dist,
	SelectionDAG &DAG) {
	if (VT.getSizeInBits() / 8 != Bytes)
	return false;

	SDValue BaseLoc = Base->getBasePtr();
	if (Loc.getOpcode() == ISD::FrameIndex) {
	if (BaseLoc.getOpcode() != ISD::FrameIndex)
	return false;
	const MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
	int FI = cast<FrameIndexSDNode>(Loc)->getIndex();
	int BFI = cast<FrameIndexSDNode>(BaseLoc)->getIndex();
	int FS = MFI.getObjectSize(FI);
	int BFS = MFI.getObjectSize(BFI);
	if (FS != BFS \|\| FS != (int)Bytes) return false;
	return MFI.getObjectOffset(FI) == (MFI.getObjectOffset(BFI) + Dist*Bytes);
	}

	SDValue Base1 = Loc, Base2 = BaseLoc;
	int64_t Offset1 = 0, Offset2 = 0;
	getBaseWithConstantOffset(Loc, Base1, Offset1, DAG);
	getBaseWithConstantOffset(BaseLoc, Base2, Offset2, DAG);
	if (Base1 == Base2 && Offset1 == (Offset2 + Dist * Bytes))
	return true;

	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
	const GlobalValue *GV1 = nullptr;
	const GlobalValue *GV2 = nullptr;
	Offset1 = 0;
	Offset2 = 0;
	bool isGA1 = TLI.isGAPlusOffset(Loc.getNode(), GV1, Offset1);
	bool isGA2 = TLI.isGAPlusOffset(BaseLoc.getNode(), GV2, Offset2);
	if (isGA1 && isGA2 && GV1 == GV2)
	return Offset1 == (Offset2 + Dist*Bytes);
	return false;
	}

	// Like SelectionDAG::isConsecutiveLoad, but also works for stores, and does
	// not enforce equality of the chain operands.
	static bool isConsecutiveLS(SDNode N, LSBaseSDNode Base,
	unsigned Bytes, int Dist,
	SelectionDAG &DAG) {
	if (LSBaseSDNode *LS = dyn_cast<LSBaseSDNode>(N)) {
	EVT VT = LS->getMemoryVT();
	SDValue Loc = LS->getBasePtr();
	return isConsecutiveLSLoc(Loc, VT, Base, Bytes, Dist, DAG);
	}

	if (N->getOpcode() == ISD::INTRINSIC_W_CHAIN) {
	EVT VT;
	switch (N->getConstantOperandVal(1)) {
	default: return false;
	case Intrinsic::ppc_altivec_lvx:
	case Intrinsic::ppc_altivec_lvxl:
	case Intrinsic::ppc_vsx_lxvw4x:
	case Intrinsic::ppc_vsx_lxvw4x_be:
	VT = MVT::v4i32;
	break;
	case Intrinsic::ppc_vsx_lxvd2x:
	case Intrinsic::ppc_vsx_lxvd2x_be:
	VT = MVT::v2f64;
	break;
	case Intrinsic::ppc_altivec_lvebx:
	VT = MVT::i8;
	break;
	case Intrinsic::ppc_altivec_lvehx:
	VT = MVT::i16;
	break;
	case Intrinsic::ppc_altivec_lvewx:
	VT = MVT::i32;
	break;
	}

	return isConsecutiveLSLoc(N->getOperand(2), VT, Base, Bytes, Dist, DAG);
	}

	if (N->getOpcode() == ISD::INTRINSIC_VOID) {
	EVT VT;
	switch (N->getConstantOperandVal(1)) {
	default: return false;
	case Intrinsic::ppc_altivec_stvx:
	case Intrinsic::ppc_altivec_stvxl:
	case Intrinsic::ppc_vsx_stxvw4x:
	VT = MVT::v4i32;
	break;
	case Intrinsic::ppc_vsx_stxvd2x:
	VT = MVT::v2f64;
	break;
	case Intrinsic::ppc_vsx_stxvw4x_be:
	VT = MVT::v4i32;
	break;
	case Intrinsic::ppc_vsx_stxvd2x_be:
	VT = MVT::v2f64;
	break;
	case Intrinsic::ppc_altivec_stvebx:
	VT = MVT::i8;
	break;
	case Intrinsic::ppc_altivec_stvehx:
	VT = MVT::i16;
	break;
	case Intrinsic::ppc_altivec_stvewx:
	VT = MVT::i32;
	break;
	}

	return isConsecutiveLSLoc(N->getOperand(3), VT, Base, Bytes, Dist, DAG);
	}

	return false;
	}

	// Return true is there is a nearyby consecutive load to the one provided
	// (regardless of alignment). We search up and down the chain, looking though
	// token factors and other loads (but nothing else). As a result, a true result
	// indicates that it is safe to create a new consecutive load adjacent to the
	// load provided.
	static bool findConsecutiveLoad(LoadSDNode *LD, SelectionDAG &DAG) {
	SDValue Chain = LD->getChain();
	EVT VT = LD->getMemoryVT();

	SmallSet<SDNode *, 16> LoadRoots;
	SmallVector<SDNode *, 8> Queue(1, Chain.getNode());
	SmallSet<SDNode *, 16> Visited;

	// First, search up the chain, branching to follow all token-factor operands.
	// If we find a consecutive load, then we're done, otherwise, record all
	// nodes just above the top-level loads and token factors.
	while (!Queue.empty()) {
	SDNode *ChainNext = Queue.pop_back_val();
	if (!Visited.insert(ChainNext).second)
	continue;

	if (MemSDNode *ChainLD = dyn_cast<MemSDNode>(ChainNext)) {
	if (isConsecutiveLS(ChainLD, LD, VT.getStoreSize(), 1, DAG))
	return true;

	if (!Visited.count(ChainLD->getChain().getNode()))
	Queue.push_back(ChainLD->getChain().getNode());
	} else if (ChainNext->getOpcode() == ISD::TokenFactor) {
	for (const SDUse &O : ChainNext->ops())
	if (!Visited.count(O.getNode()))
	Queue.push_back(O.getNode());
	} else
	LoadRoots.insert(ChainNext);
	}

	// Second, search down the chain, starting from the top-level nodes recorded
	// in the first phase. These top-level nodes are the nodes just above all
	// loads and token factors. Starting with their uses, recursively look though
	// all loads (just the chain uses) and token factors to find a consecutive
	// load.
	Visited.clear();
	Queue.clear();

	for (SDNode *I : LoadRoots) {
	Queue.push_back(I);

	while (!Queue.empty()) {
	SDNode *LoadRoot = Queue.pop_back_val();
	if (!Visited.insert(LoadRoot).second)
	continue;

	if (MemSDNode *ChainLD = dyn_cast<MemSDNode>(LoadRoot))
	if (isConsecutiveLS(ChainLD, LD, VT.getStoreSize(), 1, DAG))
	return true;

	for (SDNode *U : LoadRoot->uses())
	if (((isa<MemSDNode>(U) &&
	cast<MemSDNode>(U)->getChain().getNode() == LoadRoot) \|\|
	U->getOpcode() == ISD::TokenFactor) &&
	!Visited.count(U))
	Queue.push_back(U);
	}
	}

	return false;
	}

	/// This function is called when we have proved that a SETCC node can be replaced
	/// by subtraction (and other supporting instructions) so that the result of
	/// comparison is kept in a GPR instead of CR. This function is purely for
	/// codegen purposes and has some flags to guide the codegen process.
	static SDValue generateEquivalentSub(SDNode *N, int Size, bool Complement,
	bool Swap, SDLoc &DL, SelectionDAG &DAG) {
	assert(N->getOpcode() == ISD::SETCC && "ISD::SETCC Expected.");

	// Zero extend the operands to the largest legal integer. Originally, they
	// must be of a strictly smaller size.
	auto Op0 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N->getOperand(0),
	DAG.getConstant(Size, DL, MVT::i32));
	auto Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N->getOperand(1),
	DAG.getConstant(Size, DL, MVT::i32));

	// Swap if needed. Depends on the condition code.
	if (Swap)
	std::swap(Op0, Op1);

	// Subtract extended integers.
	auto SubNode = DAG.getNode(ISD::SUB, DL, MVT::i64, Op0, Op1);

	// Move the sign bit to the least significant position and zero out the rest.
	// Now the least significant bit carries the result of original comparison.
	auto Shifted = DAG.getNode(ISD::SRL, DL, MVT::i64, SubNode,
	DAG.getConstant(Size - 1, DL, MVT::i32));
	auto Final = Shifted;

	// Complement the result if needed. Based on the condition code.
	if (Complement)
	Final = DAG.getNode(ISD::XOR, DL, MVT::i64, Shifted,
	DAG.getConstant(1, DL, MVT::i64));

	return DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, Final);
	}

	SDValue PPCTargetLowering::ConvertSETCCToSubtract(SDNode *N,
	DAGCombinerInfo &DCI) const {
	assert(N->getOpcode() == ISD::SETCC && "ISD::SETCC Expected.");

	SelectionDAG &DAG = DCI.DAG;
	SDLoc DL(N);

	// Size of integers being compared has a critical role in the following
	// analysis, so we prefer to do this when all types are legal.
	if (!DCI.isAfterLegalizeDAG())
	return SDValue();

	// If all users of SETCC extend its value to a legal integer type
	// then we replace SETCC with a subtraction
	for (const SDNode *U : N->uses())
	if (U->getOpcode() != ISD::ZERO_EXTEND)
	return SDValue();

	ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
	auto OpSize = N->getOperand(0).getValueSizeInBits();

	unsigned Size = DAG.getDataLayout().getLargestLegalIntTypeSizeInBits();

	if (OpSize < Size) {
	switch (CC) {
	default: break;
	case ISD::SETULT:
	return generateEquivalentSub(N, Size, false, false, DL, DAG);
	case ISD::SETULE:
	return generateEquivalentSub(N, Size, true, true, DL, DAG);
	case ISD::SETUGT:
	return generateEquivalentSub(N, Size, false, true, DL, DAG);
	case ISD::SETUGE:
	return generateEquivalentSub(N, Size, true, false, DL, DAG);
	}
	}

	return SDValue();
	}

	SDValue PPCTargetLowering::DAGCombineTruncBoolExt(SDNode *N,
	DAGCombinerInfo &DCI) const {
	SelectionDAG &DAG = DCI.DAG;
	SDLoc dl(N);

	assert(Subtarget.useCRBits() && "Expecting to be tracking CR bits");
	// If we're tracking CR bits, we need to be careful that we don't have:
	// trunc(binary-ops(zext(x), zext(y)))
	// or
	// trunc(binary-ops(binary-ops(zext(x), zext(y)), ...)
	// such that we're unnecessarily moving things into GPRs when it would be
	// better to keep them in CR bits.

	// Note that trunc here can be an actual i1 trunc, or can be the effective
	// truncation that comes from a setcc or select_cc.
	if (N->getOpcode() == ISD::TRUNCATE &&
	N->getValueType(0) != MVT::i1)
	return SDValue();

	if (N->getOperand(0).getValueType() != MVT::i32 &&
	N->getOperand(0).getValueType() != MVT::i64)
	return SDValue();

	if (N->getOpcode() == ISD::SETCC \|\|
	N->getOpcode() == ISD::SELECT_CC) {
	// If we're looking at a comparison, then we need to make sure that the
	// high bits (all except for the first) don't matter the result.
	ISD::CondCode CC =
	cast<CondCodeSDNode>(N->getOperand(
	N->getOpcode() == ISD::SETCC ? 2 : 4))->get();
	unsigned OpBits = N->getOperand(0).getValueSizeInBits();

	if (ISD::isSignedIntSetCC(CC)) {
	if (DAG.ComputeNumSignBits(N->getOperand(0)) != OpBits \|\|
	DAG.ComputeNumSignBits(N->getOperand(1)) != OpBits)
	return SDValue();
	} else if (ISD::isUnsignedIntSetCC(CC)) {
	if (!DAG.MaskedValueIsZero(N->getOperand(0),
	APInt::getHighBitsSet(OpBits, OpBits-1)) \|\|
	!DAG.MaskedValueIsZero(N->getOperand(1),
	APInt::getHighBitsSet(OpBits, OpBits-1)))
	return (N->getOpcode() == ISD::SETCC ? ConvertSETCCToSubtract(N, DCI)
	: SDValue());
	} else {
	// This is neither a signed nor an unsigned comparison, just make sure
	// that the high bits are equal.
	KnownBits Op1Known = DAG.computeKnownBits(N->getOperand(0));
	KnownBits Op2Known = DAG.computeKnownBits(N->getOperand(1));

	// We don't really care about what is known about the first bit (if
	// anything), so pretend that it is known zero for both to ensure they can
	// be compared as constants.
	Op1Known.Zero.setBit(0); Op1Known.One.clearBit(0);
	Op2Known.Zero.setBit(0); Op2Known.One.clearBit(0);

	if (!Op1Known.isConstant() \|\| !Op2Known.isConstant() \|\|
	Op1Known.getConstant() != Op2Known.getConstant())
	return SDValue();
	}
	}

	// We now know that the higher-order bits are irrelevant, we just need to
	// make sure that all of the intermediate operations are bit operations, and
	// all inputs are extensions.
	if (N->getOperand(0).getOpcode() != ISD::AND &&
	N->getOperand(0).getOpcode() != ISD::OR &&
	N->getOperand(0).getOpcode() != ISD::XOR &&
	N->getOperand(0).getOpcode() != ISD::SELECT &&
	N->getOperand(0).getOpcode() != ISD::SELECT_CC &&
	N->getOperand(0).getOpcode() != ISD::TRUNCATE &&
	N->getOperand(0).getOpcode() != ISD::SIGN_EXTEND &&
	N->getOperand(0).getOpcode() != ISD::ZERO_EXTEND &&
	N->getOperand(0).getOpcode() != ISD::ANY_EXTEND)
	return SDValue();

	if ((N->getOpcode() == ISD::SETCC \|\| N->getOpcode() == ISD::SELECT_CC) &&
	N->getOperand(1).getOpcode() != ISD::AND &&
	N->getOperand(1).getOpcode() != ISD::OR &&
	N->getOperand(1).getOpcode() != ISD::XOR &&
	N->getOperand(1).getOpcode() != ISD::SELECT &&
	N->getOperand(1).getOpcode() != ISD::SELECT_CC &&
	N->getOperand(1).getOpcode() != ISD::TRUNCATE &&
	N->getOperand(1).getOpcode() != ISD::SIGN_EXTEND &&
	N->getOperand(1).getOpcode() != ISD::ZERO_EXTEND &&
	N->getOperand(1).getOpcode() != ISD::ANY_EXTEND)
	return SDValue();

	SmallVector<SDValue, 4> Inputs;
	SmallVector<SDValue, 8> BinOps, PromOps;
	SmallPtrSet<SDNode *, 16> Visited;

	for (unsigned i = 0; i < 2; ++i) {
	if (((N->getOperand(i).getOpcode() == ISD::SIGN_EXTEND \|\|
	N->getOperand(i).getOpcode() == ISD::ZERO_EXTEND \|\|
	N->getOperand(i).getOpcode() == ISD::ANY_EXTEND) &&
	N->getOperand(i).getOperand(0).getValueType() == MVT::i1) \|\|
	isa<ConstantSDNode>(N->getOperand(i)))
	Inputs.push_back(N->getOperand(i));
	else
	BinOps.push_back(N->getOperand(i));

	if (N->getOpcode() == ISD::TRUNCATE)
	break;
	}

	// Visit all inputs, collect all binary operations (and, or, xor and
	// select) that are all fed by extensions.
	while (!BinOps.empty()) {
	SDValue BinOp = BinOps.pop_back_val();

	if (!Visited.insert(BinOp.getNode()).second)
	continue;

	PromOps.push_back(BinOp);

	for (unsigned i = 0, ie = BinOp.getNumOperands(); i != ie; ++i) {
	// The condition of the select is not promoted.
	if (BinOp.getOpcode() == ISD::SELECT && i == 0)
	continue;
	if (BinOp.getOpcode() == ISD::SELECT_CC && i != 2 && i != 3)
	continue;

	if (((BinOp.getOperand(i).getOpcode() == ISD::SIGN_EXTEND \|\|
	BinOp.getOperand(i).getOpcode() == ISD::ZERO_EXTEND \|\|
	BinOp.getOperand(i).getOpcode() == ISD::ANY_EXTEND) &&
	BinOp.getOperand(i).getOperand(0).getValueType() == MVT::i1) \|\|
	isa<ConstantSDNode>(BinOp.getOperand(i))) {
	Inputs.push_back(BinOp.getOperand(i));
	} else if (BinOp.getOperand(i).getOpcode() == ISD::AND \|\|
	BinOp.getOperand(i).getOpcode() == ISD::OR \|\|
	BinOp.getOperand(i).getOpcode() == ISD::XOR \|\|
	BinOp.getOperand(i).getOpcode() == ISD::SELECT \|\|
	BinOp.getOperand(i).getOpcode() == ISD::SELECT_CC \|\|
	BinOp.getOperand(i).getOpcode() == ISD::TRUNCATE \|\|
	BinOp.getOperand(i).getOpcode() == ISD::SIGN_EXTEND \|\|
	BinOp.getOperand(i).getOpcode() == ISD::ZERO_EXTEND \|\|
	BinOp.getOperand(i).getOpcode() == ISD::ANY_EXTEND) {
	BinOps.push_back(BinOp.getOperand(i));
	} else {
	// We have an input that is not an extension or another binary
	// operation; we'll abort this transformation.
	return SDValue();
	}
	}
	}

	// Make sure that this is a self-contained cluster of operations (which
	// is not quite the same thing as saying that everything has only one
	// use).
	for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
	if (isa<ConstantSDNode>(Inputs[i]))
	continue;

	for (const SDNode *User : Inputs[i].getNode()->uses()) {
	if (User != N && !Visited.count(User))
	return SDValue();

	// Make sure that we're not going to promote the non-output-value
	// operand(s) or SELECT or SELECT_CC.
	// FIXME: Although we could sometimes handle this, and it does occur in
	// practice that one of the condition inputs to the select is also one of
	// the outputs, we currently can't deal with this.
	if (User->getOpcode() == ISD::SELECT) {
	if (User->getOperand(0) == Inputs[i])
	return SDValue();
	} else if (User->getOpcode() == ISD::SELECT_CC) {
	if (User->getOperand(0) == Inputs[i] \|\|
	User->getOperand(1) == Inputs[i])
	return SDValue();
	}
	}
	}

	for (unsigned i = 0, ie = PromOps.size(); i != ie; ++i) {
	for (const SDNode *User : PromOps[i].getNode()->uses()) {
	if (User != N && !Visited.count(User))
	return SDValue();

	// Make sure that we're not going to promote the non-output-value
	// operand(s) or SELECT or SELECT_CC.
	// FIXME: Although we could sometimes handle this, and it does occur in
	// practice that one of the condition inputs to the select is also one of
	// the outputs, we currently can't deal with this.
	if (User->getOpcode() == ISD::SELECT) {
	if (User->getOperand(0) == PromOps[i])
	return SDValue();
	} else if (User->getOpcode() == ISD::SELECT_CC) {
	if (User->getOperand(0) == PromOps[i] \|\|
	User->getOperand(1) == PromOps[i])
	return SDValue();
	}
	}
	}

	// Replace all inputs with the extension operand.
	for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
	// Constants may have users outside the cluster of to-be-promoted nodes,
	// and so we need to replace those as we do the promotions.
	if (isa<ConstantSDNode>(Inputs[i]))
	continue;
	else
	DAG.ReplaceAllUsesOfValueWith(Inputs[i], Inputs[i].getOperand(0));
	}

	std::list<HandleSDNode> PromOpHandles;
	for (auto &PromOp : PromOps)
	PromOpHandles.emplace_back(PromOp);

	// Replace all operations (these are all the same, but have a different
	// (i1) return type). DAG.getNode will validate that the types of
	// a binary operator match, so go through the list in reverse so that
	// we've likely promoted both operands first. Any intermediate truncations or
	// extensions disappear.
	while (!PromOpHandles.empty()) {
	SDValue PromOp = PromOpHandles.back().getValue();
	PromOpHandles.pop_back();

	if (PromOp.getOpcode() == ISD::TRUNCATE \|\|
	PromOp.getOpcode() == ISD::SIGN_EXTEND \|\|
	PromOp.getOpcode() == ISD::ZERO_EXTEND \|\|
	PromOp.getOpcode() == ISD::ANY_EXTEND) {
	if (!isa<ConstantSDNode>(PromOp.getOperand(0)) &&
	PromOp.getOperand(0).getValueType() != MVT::i1) {
	// The operand is not yet ready (see comment below).
	PromOpHandles.emplace_front(PromOp);
	continue;
	}

	SDValue RepValue = PromOp.getOperand(0);
	if (isa<ConstantSDNode>(RepValue))
	RepValue = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, RepValue);

	DAG.ReplaceAllUsesOfValueWith(PromOp, RepValue);
	continue;
	}

	unsigned C;
	switch (PromOp.getOpcode()) {
	default: C = 0; break;
	case ISD::SELECT: C = 1; break;
	case ISD::SELECT_CC: C = 2; break;
	}

	if ((!isa<ConstantSDNode>(PromOp.getOperand(C)) &&
	PromOp.getOperand(C).getValueType() != MVT::i1) \|\|
	(!isa<ConstantSDNode>(PromOp.getOperand(C+1)) &&
	PromOp.getOperand(C+1).getValueType() != MVT::i1)) {
	// The to-be-promoted operands of this node have not yet been
	// promoted (this should be rare because we're going through the
	// list backward, but if one of the operands has several users in
	// this cluster of to-be-promoted nodes, it is possible).
	PromOpHandles.emplace_front(PromOp);
	continue;
	}

	SmallVector<SDValue, 3> Ops(PromOp.getNode()->op_begin(),
	PromOp.getNode()->op_end());

	// If there are any constant inputs, make sure they're replaced now.
	for (unsigned i = 0; i < 2; ++i)
	if (isa<ConstantSDNode>(Ops[C+i]))
	Ops[C+i] = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, Ops[C+i]);

	DAG.ReplaceAllUsesOfValueWith(PromOp,
	DAG.getNode(PromOp.getOpcode(), dl, MVT::i1, Ops));
	}

	// Now we're left with the initial truncation itself.
	if (N->getOpcode() == ISD::TRUNCATE)
	return N->getOperand(0);

	// Otherwise, this is a comparison. The operands to be compared have just
	// changed type (to i1), but everything else is the same.
	return SDValue(N, 0);
	}

	SDValue PPCTargetLowering::DAGCombineExtBoolTrunc(SDNode *N,
	DAGCombinerInfo &DCI) const {
	SelectionDAG &DAG = DCI.DAG;
	SDLoc dl(N);

	// If we're tracking CR bits, we need to be careful that we don't have:
	// zext(binary-ops(trunc(x), trunc(y)))
	// or
	// zext(binary-ops(binary-ops(trunc(x), trunc(y)), ...)
	// such that we're unnecessarily moving things into CR bits that can more
	// efficiently stay in GPRs. Note that if we're not certain that the high
	// bits are set as required by the final extension, we still may need to do
	// some masking to get the proper behavior.

	// This same functionality is important on PPC64 when dealing with
	// 32-to-64-bit extensions; these occur often when 32-bit values are used as
	// the return values of functions. Because it is so similar, it is handled
	// here as well.

	if (N->getValueType(0) != MVT::i32 &&
	N->getValueType(0) != MVT::i64)
	return SDValue();

	if (!((N->getOperand(0).getValueType() == MVT::i1 && Subtarget.useCRBits()) \|\|
	(N->getOperand(0).getValueType() == MVT::i32 && Subtarget.isPPC64())))
	return SDValue();

	if (N->getOperand(0).getOpcode() != ISD::AND &&
	N->getOperand(0).getOpcode() != ISD::OR &&
	N->getOperand(0).getOpcode() != ISD::XOR &&
	N->getOperand(0).getOpcode() != ISD::SELECT &&
	N->getOperand(0).getOpcode() != ISD::SELECT_CC)
	return SDValue();

	SmallVector<SDValue, 4> Inputs;
	SmallVector<SDValue, 8> BinOps(1, N->getOperand(0)), PromOps;
	SmallPtrSet<SDNode *, 16> Visited;

	// Visit all inputs, collect all binary operations (and, or, xor and
	// select) that are all fed by truncations.
	while (!BinOps.empty()) {
	SDValue BinOp = BinOps.pop_back_val();

	if (!Visited.insert(BinOp.getNode()).second)
	continue;

	PromOps.push_back(BinOp);

	for (unsigned i = 0, ie = BinOp.getNumOperands(); i != ie; ++i) {
	// The condition of the select is not promoted.
	if (BinOp.getOpcode() == ISD::SELECT && i == 0)
	continue;
	if (BinOp.getOpcode() == ISD::SELECT_CC && i != 2 && i != 3)
	continue;

	if (BinOp.getOperand(i).getOpcode() == ISD::TRUNCATE \|\|
	isa<ConstantSDNode>(BinOp.getOperand(i))) {
	Inputs.push_back(BinOp.getOperand(i));
	} else if (BinOp.getOperand(i).getOpcode() == ISD::AND \|\|
	BinOp.getOperand(i).getOpcode() == ISD::OR \|\|
	BinOp.getOperand(i).getOpcode() == ISD::XOR \|\|
	BinOp.getOperand(i).getOpcode() == ISD::SELECT \|\|
	BinOp.getOperand(i).getOpcode() == ISD::SELECT_CC) {
	BinOps.push_back(BinOp.getOperand(i));
	} else {
	// We have an input that is not a truncation or another binary
	// operation; we'll abort this transformation.
	return SDValue();
	}
	}
	}

	// The operands of a select that must be truncated when the select is
	// promoted because the operand is actually part of the to-be-promoted set.
	DenseMap<SDNode *, EVT> SelectTruncOp[2];

	// Make sure that this is a self-contained cluster of operations (which
	// is not quite the same thing as saying that everything has only one
	// use).
	for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
	if (isa<ConstantSDNode>(Inputs[i]))
	continue;

	for (SDNode *User : Inputs[i].getNode()->uses()) {
	if (User != N && !Visited.count(User))
	return SDValue();

	// If we're going to promote the non-output-value operand(s) or SELECT or
	// SELECT_CC, record them for truncation.
	if (User->getOpcode() == ISD::SELECT) {
	if (User->getOperand(0) == Inputs[i])
	SelectTruncOp[0].insert(std::make_pair(User,
	User->getOperand(0).getValueType()));
	} else if (User->getOpcode() == ISD::SELECT_CC) {
	if (User->getOperand(0) == Inputs[i])
	SelectTruncOp[0].insert(std::make_pair(User,
	User->getOperand(0).getValueType()));
	if (User->getOperand(1) == Inputs[i])
	SelectTruncOp[1].insert(std::make_pair(User,
	User->getOperand(1).getValueType()));
	}
	}
	}

	for (unsigned i = 0, ie = PromOps.size(); i != ie; ++i) {
	for (SDNode *User : PromOps[i].getNode()->uses()) {
	if (User != N && !Visited.count(User))
	return SDValue();

	// If we're going to promote the non-output-value operand(s) or SELECT or
	// SELECT_CC, record them for truncation.
	if (User->getOpcode() == ISD::SELECT) {
	if (User->getOperand(0) == PromOps[i])
	SelectTruncOp[0].insert(std::make_pair(User,
	User->getOperand(0).getValueType()));
	} else if (User->getOpcode() == ISD::SELECT_CC) {
	if (User->getOperand(0) == PromOps[i])
	SelectTruncOp[0].insert(std::make_pair(User,
	User->getOperand(0).getValueType()));
	if (User->getOperand(1) == PromOps[i])
	SelectTruncOp[1].insert(std::make_pair(User,
	User->getOperand(1).getValueType()));
	}
	}
	}

	unsigned PromBits = N->getOperand(0).getValueSizeInBits();
	bool ReallyNeedsExt = false;
	if (N->getOpcode() != ISD::ANY_EXTEND) {
	// If all of the inputs are not already sign/zero extended, then
	// we'll still need to do that at the end.
	for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
	if (isa<ConstantSDNode>(Inputs[i]))
	continue;

	unsigned OpBits =
	Inputs[i].getOperand(0).getValueSizeInBits();
	assert(PromBits < OpBits && "Truncation not to a smaller bit count?");

	if ((N->getOpcode() == ISD::ZERO_EXTEND &&
	!DAG.MaskedValueIsZero(Inputs[i].getOperand(0),
	APInt::getHighBitsSet(OpBits,
	OpBits-PromBits))) \|\|
	(N->getOpcode() == ISD::SIGN_EXTEND &&
	DAG.ComputeNumSignBits(Inputs[i].getOperand(0)) <
	(OpBits-(PromBits-1)))) {
	ReallyNeedsExt = true;
	break;
	}
	}
	}

	// Replace all inputs, either with the truncation operand, or a
	// truncation or extension to the final output type.
	for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
	// Constant inputs need to be replaced with the to-be-promoted nodes that
	// use them because they might have users outside of the cluster of
	// promoted nodes.
	if (isa<ConstantSDNode>(Inputs[i]))
	continue;

	SDValue InSrc = Inputs[i].getOperand(0);
	if (Inputs[i].getValueType() == N->getValueType(0))
	DAG.ReplaceAllUsesOfValueWith(Inputs[i], InSrc);
	else if (N->getOpcode() == ISD::SIGN_EXTEND)
	DAG.ReplaceAllUsesOfValueWith(Inputs[i],
	DAG.getSExtOrTrunc(InSrc, dl, N->getValueType(0)));
	else if (N->getOpcode() == ISD::ZERO_EXTEND)
	DAG.ReplaceAllUsesOfValueWith(Inputs[i],
	DAG.getZExtOrTrunc(InSrc, dl, N->getValueType(0)));
	else
	DAG.ReplaceAllUsesOfValueWith(Inputs[i],
	DAG.getAnyExtOrTrunc(InSrc, dl, N->getValueType(0)));
	}

	std::list<HandleSDNode> PromOpHandles;
	for (auto &PromOp : PromOps)
	PromOpHandles.emplace_back(PromOp);

	// Replace all operations (these are all the same, but have a different
	// (promoted) return type). DAG.getNode will validate that the types of
	// a binary operator match, so go through the list in reverse so that
	// we've likely promoted both operands first.
	while (!PromOpHandles.empty()) {
	SDValue PromOp = PromOpHandles.back().getValue();
	PromOpHandles.pop_back();

	unsigned C;
	switch (PromOp.getOpcode()) {
	default: C = 0; break;
	case ISD::SELECT: C = 1; break;
	case ISD::SELECT_CC: C = 2; break;
	}

	if ((!isa<ConstantSDNode>(PromOp.getOperand(C)) &&
	PromOp.getOperand(C).getValueType() != N->getValueType(0)) \|\|
	(!isa<ConstantSDNode>(PromOp.getOperand(C+1)) &&
	PromOp.getOperand(C+1).getValueType() != N->getValueType(0))) {
	// The to-be-promoted operands of this node have not yet been
	// promoted (this should be rare because we're going through the
	// list backward, but if one of the operands has several users in
	// this cluster of to-be-promoted nodes, it is possible).
	PromOpHandles.emplace_front(PromOp);
	continue;
	}

	// For SELECT and SELECT_CC nodes, we do a similar check for any
	// to-be-promoted comparison inputs.
	if (PromOp.getOpcode() == ISD::SELECT \|\|
	PromOp.getOpcode() == ISD::SELECT_CC) {
	if ((SelectTruncOp[0].count(PromOp.getNode()) &&
	PromOp.getOperand(0).getValueType() != N->getValueType(0)) \|\|
	(SelectTruncOp[1].count(PromOp.getNode()) &&
	PromOp.getOperand(1).getValueType() != N->getValueType(0))) {
	PromOpHandles.emplace_front(PromOp);
	continue;
	}
	}

	SmallVector<SDValue, 3> Ops(PromOp.getNode()->op_begin(),
	PromOp.getNode()->op_end());

	// If this node has constant inputs, then they'll need to be promoted here.
	for (unsigned i = 0; i < 2; ++i) {
	if (!isa<ConstantSDNode>(Ops[C+i]))
	continue;
	if (Ops[C+i].getValueType() == N->getValueType(0))
	continue;

	if (N->getOpcode() == ISD::SIGN_EXTEND)
	Ops[C+i] = DAG.getSExtOrTrunc(Ops[C+i], dl, N->getValueType(0));
	else if (N->getOpcode() == ISD::ZERO_EXTEND)
	Ops[C+i] = DAG.getZExtOrTrunc(Ops[C+i], dl, N->getValueType(0));
	else
	Ops[C+i] = DAG.getAnyExtOrTrunc(Ops[C+i], dl, N->getValueType(0));
	}

	// If we've promoted the comparison inputs of a SELECT or SELECT_CC,
	// truncate them again to the original value type.
	if (PromOp.getOpcode() == ISD::SELECT \|\|
	PromOp.getOpcode() == ISD::SELECT_CC) {
	auto SI0 = SelectTruncOp[0].find(PromOp.getNode());
	if (SI0 != SelectTruncOp[0].end())
	Ops[0] = DAG.getNode(ISD::TRUNCATE, dl, SI0->second, Ops[0]);
	auto SI1 = SelectTruncOp[1].find(PromOp.getNode());
	if (SI1 != SelectTruncOp[1].end())
	Ops[1] = DAG.getNode(ISD::TRUNCATE, dl, SI1->second, Ops[1]);
	}

	DAG.ReplaceAllUsesOfValueWith(PromOp,
	DAG.getNode(PromOp.getOpcode(), dl, N->getValueType(0), Ops));
	}

	// Now we're left with the initial extension itself.
	if (!ReallyNeedsExt)
	return N->getOperand(0);

	// To zero extend, just mask off everything except for the first bit (in the
	// i1 case).
	if (N->getOpcode() == ISD::ZERO_EXTEND)
	return DAG.getNode(ISD::AND, dl, N->getValueType(0), N->getOperand(0),
	DAG.getConstant(APInt::getLowBitsSet(
	N->getValueSizeInBits(0), PromBits),
	dl, N->getValueType(0)));

	assert(N->getOpcode() == ISD::SIGN_EXTEND &&
	"Invalid extension type");
	EVT ShiftAmountTy = getShiftAmountTy(N->getValueType(0), DAG.getDataLayout());
	SDValue ShiftCst =
	DAG.getConstant(N->getValueSizeInBits(0) - PromBits, dl, ShiftAmountTy);
	return DAG.getNode(
	ISD::SRA, dl, N->getValueType(0),
	DAG.getNode(ISD::SHL, dl, N->getValueType(0), N->getOperand(0), ShiftCst),
	ShiftCst);
	}

	SDValue PPCTargetLowering::combineSetCC(SDNode *N,
	DAGCombinerInfo &DCI) const {
	assert(N->getOpcode() == ISD::SETCC &&
	"Should be called with a SETCC node");

	ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
	if (CC == ISD::SETNE \|\| CC == ISD::SETEQ) {
	SDValue LHS = N->getOperand(0);
	SDValue RHS = N->getOperand(1);

	// If there is a '0 - y' pattern, canonicalize the pattern to the RHS.
	if (LHS.getOpcode() == ISD::SUB && isNullConstant(LHS.getOperand(0)) &&
	LHS.hasOneUse())
	std::swap(LHS, RHS);

	// x == 0-y --> x+y == 0
	// x != 0-y --> x+y != 0
	if (RHS.getOpcode() == ISD::SUB && isNullConstant(RHS.getOperand(0)) &&
	RHS.hasOneUse()) {
	SDLoc DL(N);
	SelectionDAG &DAG = DCI.DAG;
	EVT VT = N->getValueType(0);
	EVT OpVT = LHS.getValueType();
	SDValue Add = DAG.getNode(ISD::ADD, DL, OpVT, LHS, RHS.getOperand(1));
	return DAG.getSetCC(DL, VT, Add, DAG.getConstant(0, DL, OpVT), CC);
	}
	}

	return DAGCombineTruncBoolExt(N, DCI);
	}

	// Is this an extending load from an f32 to an f64?
	static bool isFPExtLoad(SDValue Op) {
	if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Op.getNode()))
	return LD->getExtensionType() == ISD::EXTLOAD &&
	Op.getValueType() == MVT::f64;
	return false;
	}

	/// Reduces the number of fp-to-int conversion when building a vector.
	///
	/// If this vector is built out of floating to integer conversions,
	/// transform it to a vector built out of floating point values followed by a
	/// single floating to integer conversion of the vector.
	/// Namely (build_vector (fptosi $A), (fptosi $B), ...)
	/// becomes (fptosi (build_vector ($A, $B, ...)))
	SDValue PPCTargetLowering::
	combineElementTruncationToVectorTruncation(SDNode *N,
	DAGCombinerInfo &DCI) const {
	assert(N->getOpcode() == ISD::BUILD_VECTOR &&
	"Should be called with a BUILD_VECTOR node");

	SelectionDAG &DAG = DCI.DAG;
	SDLoc dl(N);

	SDValue FirstInput = N->getOperand(0);
	assert(FirstInput.getOpcode() == PPCISD::MFVSR &&
	"The input operand must be an fp-to-int conversion.");

	// This combine happens after legalization so the fp_to_[su]i nodes are
	// already converted to PPCSISD nodes.
	unsigned FirstConversion = FirstInput.getOperand(0).getOpcode();
	if (FirstConversion == PPCISD::FCTIDZ \|\|
	FirstConversion == PPCISD::FCTIDUZ \|\|
	FirstConversion == PPCISD::FCTIWZ \|\|
	FirstConversion == PPCISD::FCTIWUZ) {
	bool IsSplat = true;
	bool Is32Bit = FirstConversion == PPCISD::FCTIWZ \|\|
	FirstConversion == PPCISD::FCTIWUZ;
	EVT SrcVT = FirstInput.getOperand(0).getValueType();
	SmallVector<SDValue, 4> Ops;
	EVT TargetVT = N->getValueType(0);
	for (int i = 0, e = N->getNumOperands(); i < e; ++i) {
	SDValue NextOp = N->getOperand(i);
	if (NextOp.getOpcode() != PPCISD::MFVSR)
	return SDValue();
	unsigned NextConversion = NextOp.getOperand(0).getOpcode();
	if (NextConversion != FirstConversion)
	return SDValue();
	// If we are converting to 32-bit integers, we need to add an FP_ROUND.
	// This is not valid if the input was originally double precision. It is
	// also not profitable to do unless this is an extending load in which
	// case doing this combine will allow us to combine consecutive loads.
	if (Is32Bit && !isFPExtLoad(NextOp.getOperand(0).getOperand(0)))
	return SDValue();
	if (N->getOperand(i) != FirstInput)
	IsSplat = false;
	}

	// If this is a splat, we leave it as-is since there will be only a single
	// fp-to-int conversion followed by a splat of the integer. This is better
	// for 32-bit and smaller ints and neutral for 64-bit ints.
	if (IsSplat)
	return SDValue();

	// Now that we know we have the right type of node, get its operands
	for (int i = 0, e = N->getNumOperands(); i < e; ++i) {
	SDValue In = N->getOperand(i).getOperand(0);
	if (Is32Bit) {
	// For 32-bit values, we need to add an FP_ROUND node (if we made it
	// here, we know that all inputs are extending loads so this is safe).
	if (In.isUndef())
	Ops.push_back(DAG.getUNDEF(SrcVT));
	else {
	SDValue Trunc =
	DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, In.getOperand(0),
	DAG.getIntPtrConstant(1, dl, /isTarget=/true));
	Ops.push_back(Trunc);
	}
	} else
	Ops.push_back(In.isUndef() ? DAG.getUNDEF(SrcVT) : In.getOperand(0));
	}

	unsigned Opcode;
	if (FirstConversion == PPCISD::FCTIDZ \|\|
	FirstConversion == PPCISD::FCTIWZ)
	Opcode = ISD::FP_TO_SINT;
	else
	Opcode = ISD::FP_TO_UINT;

	EVT NewVT = TargetVT == MVT::v2i64 ? MVT::v2f64 : MVT::v4f32;
	SDValue BV = DAG.getBuildVector(NewVT, dl, Ops);
	return DAG.getNode(Opcode, dl, TargetVT, BV);
	}
	return SDValue();
	}

	/// Reduce the number of loads when building a vector.
	///
	/// Building a vector out of multiple loads can be converted to a load
	/// of the vector type if the loads are consecutive. If the loads are
	/// consecutive but in descending order, a shuffle is added at the end
	/// to reorder the vector.
	static SDValue combineBVOfConsecutiveLoads(SDNode *N, SelectionDAG &DAG) {
	assert(N->getOpcode() == ISD::BUILD_VECTOR &&
	"Should be called with a BUILD_VECTOR node");

	SDLoc dl(N);

	// Return early for non byte-sized type, as they can't be consecutive.
	if (!N->getValueType(0).getVectorElementType().isByteSized())
	return SDValue();

	bool InputsAreConsecutiveLoads = true;
	bool InputsAreReverseConsecutive = true;
	unsigned ElemSize = N->getValueType(0).getScalarType().getStoreSize();
	SDValue FirstInput = N->getOperand(0);
	bool IsRoundOfExtLoad = false;
	LoadSDNode *FirstLoad = nullptr;

	if (FirstInput.getOpcode() == ISD::FP_ROUND &&
	FirstInput.getOperand(0).getOpcode() == ISD::LOAD) {
	FirstLoad = cast<LoadSDNode>(FirstInput.getOperand(0));
	IsRoundOfExtLoad = FirstLoad->getExtensionType() == ISD::EXTLOAD;
	}
	// Not a build vector of (possibly fp_rounded) loads.
	if ((!IsRoundOfExtLoad && FirstInput.getOpcode() != ISD::LOAD) \|\|
	N->getNumOperands() == 1)
	return SDValue();

	if (!IsRoundOfExtLoad)
	FirstLoad = cast<LoadSDNode>(FirstInput);

	SmallVector<LoadSDNode *, 4> InputLoads;
	InputLoads.push_back(FirstLoad);
	for (int i = 1, e = N->getNumOperands(); i < e; ++i) {
	// If any inputs are fp_round(extload), they all must be.
	if (IsRoundOfExtLoad && N->getOperand(i).getOpcode() != ISD::FP_ROUND)
	return SDValue();

	SDValue NextInput = IsRoundOfExtLoad ? N->getOperand(i).getOperand(0) :
	N->getOperand(i);
	if (NextInput.getOpcode() != ISD::LOAD)
	return SDValue();

	SDValue PreviousInput =
	IsRoundOfExtLoad ? N->getOperand(i-1).getOperand(0) : N->getOperand(i-1);
	LoadSDNode *LD1 = cast<LoadSDNode>(PreviousInput);
	LoadSDNode *LD2 = cast<LoadSDNode>(NextInput);

	// If any inputs are fp_round(extload), they all must be.
	if (IsRoundOfExtLoad && LD2->getExtensionType() != ISD::EXTLOAD)
	return SDValue();

	// We only care about regular loads. The PPC-specific load intrinsics
	// will not lead to a merge opportunity.
	if (!DAG.areNonVolatileConsecutiveLoads(LD2, LD1, ElemSize, 1))
	InputsAreConsecutiveLoads = false;
	if (!DAG.areNonVolatileConsecutiveLoads(LD1, LD2, ElemSize, 1))
	InputsAreReverseConsecutive = false;

	// Exit early if the loads are neither consecutive nor reverse consecutive.
	if (!InputsAreConsecutiveLoads && !InputsAreReverseConsecutive)
	return SDValue();
	InputLoads.push_back(LD2);
	}

	assert(!(InputsAreConsecutiveLoads && InputsAreReverseConsecutive) &&
	"The loads cannot be both consecutive and reverse consecutive.");

	SDValue WideLoad;
	SDValue ReturnSDVal;
	if (InputsAreConsecutiveLoads) {
	assert(FirstLoad && "Input needs to be a LoadSDNode.");
	WideLoad = DAG.getLoad(N->getValueType(0), dl, FirstLoad->getChain(),
	FirstLoad->getBasePtr(), FirstLoad->getPointerInfo(),
	FirstLoad->getAlign());
	ReturnSDVal = WideLoad;
	} else if (InputsAreReverseConsecutive) {
	LoadSDNode *LastLoad = InputLoads.back();
	assert(LastLoad && "Input needs to be a LoadSDNode.");
	WideLoad = DAG.getLoad(N->getValueType(0), dl, LastLoad->getChain(),
	LastLoad->getBasePtr(), LastLoad->getPointerInfo(),
	LastLoad->getAlign());
	SmallVector<int, 16> Ops;
	for (int i = N->getNumOperands() - 1; i >= 0; i--)
	Ops.push_back(i);

	ReturnSDVal = DAG.getVectorShuffle(N->getValueType(0), dl, WideLoad,
	DAG.getUNDEF(N->getValueType(0)), Ops);
	} else
	return SDValue();

	for (auto *LD : InputLoads)
	DAG.makeEquivalentMemoryOrdering(LD, WideLoad);
	return ReturnSDVal;
	}

	// This function adds the required vector_shuffle needed to get
	// the elements of the vector extract in the correct position
	// as specified by the CorrectElems encoding.
	static SDValue addShuffleForVecExtend(SDNode *N, SelectionDAG &DAG,
	SDValue Input, uint64_t Elems,
	uint64_t CorrectElems) {
	SDLoc dl(N);

	unsigned NumElems = Input.getValueType().getVectorNumElements();
	SmallVector<int, 16> ShuffleMask(NumElems, -1);

	// Knowing the element indices being extracted from the original
	// vector and the order in which they're being inserted, just put
	// them at element indices required for the instruction.
	for (unsigned i = 0; i < N->getNumOperands(); i++) {
	if (DAG.getDataLayout().isLittleEndian())
	ShuffleMask[CorrectElems & 0xF] = Elems & 0xF;
	else
	ShuffleMask[(CorrectElems & 0xF0) >> 4] = (Elems & 0xF0) >> 4;
	CorrectElems = CorrectElems >> 8;
	Elems = Elems >> 8;
	}

	SDValue Shuffle =
	DAG.getVectorShuffle(Input.getValueType(), dl, Input,
	DAG.getUNDEF(Input.getValueType()), ShuffleMask);

	EVT VT = N->getValueType(0);
	SDValue Conv = DAG.getBitcast(VT, Shuffle);

	EVT ExtVT = EVT::getVectorVT(*DAG.getContext(),
	Input.getValueType().getVectorElementType(),
	VT.getVectorNumElements());
	return DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, VT, Conv,
	DAG.getValueType(ExtVT));
	}

	// Look for build vector patterns where input operands come from sign
	// extended vector_extract elements of specific indices. If the correct indices
	// aren't used, add a vector shuffle to fix up the indices and create
	// SIGN_EXTEND_INREG node which selects the vector sign extend instructions
	// during instruction selection.
	static SDValue combineBVOfVecSExt(SDNode *N, SelectionDAG &DAG) {
	// This array encodes the indices that the vector sign extend instructions
	// extract from when extending from one type to another for both BE and LE.
	// The right nibble of each byte corresponds to the LE incides.
	// and the left nibble of each byte corresponds to the BE incides.
	// For example: 0x3074B8FC byte->word
	// For LE: the allowed indices are: 0x0,0x4,0x8,0xC
	// For BE: the allowed indices are: 0x3,0x7,0xB,0xF
	// For example: 0x000070F8 byte->double word
	// For LE: the allowed indices are: 0x0,0x8
	// For BE: the allowed indices are: 0x7,0xF
	uint64_t TargetElems[] = {
	0x3074B8FC, // b->w
	0x000070F8, // b->d
	0x10325476, // h->w
	0x00003074, // h->d
	0x00001032, // w->d
	};

	uint64_t Elems = 0;
	int Index;
	SDValue Input;

	auto isSExtOfVecExtract = [&](SDValue Op) -> bool {
	if (!Op)
	return false;
	if (Op.getOpcode() != ISD::SIGN_EXTEND &&
	Op.getOpcode() != ISD::SIGN_EXTEND_INREG)
	return false;

	// A SIGN_EXTEND_INREG might be fed by an ANY_EXTEND to produce a value
	// of the right width.
	SDValue Extract = Op.getOperand(0);
	if (Extract.getOpcode() == ISD::ANY_EXTEND)
	Extract = Extract.getOperand(0);
	if (Extract.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
	return false;

	ConstantSDNode *ExtOp = dyn_cast<ConstantSDNode>(Extract.getOperand(1));
	if (!ExtOp)
	return false;

	Index = ExtOp->getZExtValue();
	if (Input && Input != Extract.getOperand(0))
	return false;

	if (!Input)
	Input = Extract.getOperand(0);

	Elems = Elems << 8;
	Index = DAG.getDataLayout().isLittleEndian() ? Index : Index << 4;
	Elems \|= Index;

	return true;
	};

	// If the build vector operands aren't sign extended vector extracts,
	// of the same input vector, then return.
	for (unsigned i = 0; i < N->getNumOperands(); i++) {
	if (!isSExtOfVecExtract(N->getOperand(i))) {
	return SDValue();
	}
	}

	// If the vector extract indices are not correct, add the appropriate
	// vector_shuffle.
	int TgtElemArrayIdx;
	int InputSize = Input.getValueType().getScalarSizeInBits();
	int OutputSize = N->getValueType(0).getScalarSizeInBits();
	if (InputSize + OutputSize == 40)
	TgtElemArrayIdx = 0;
	else if (InputSize + OutputSize == 72)
	TgtElemArrayIdx = 1;
	else if (InputSize + OutputSize == 48)
	TgtElemArrayIdx = 2;
	else if (InputSize + OutputSize == 80)
	TgtElemArrayIdx = 3;
	else if (InputSize + OutputSize == 96)
	TgtElemArrayIdx = 4;
	else
	return SDValue();

	uint64_t CorrectElems = TargetElems[TgtElemArrayIdx];
	CorrectElems = DAG.getDataLayout().isLittleEndian()
	? CorrectElems & 0x0F0F0F0F0F0F0F0F
	: CorrectElems & 0xF0F0F0F0F0F0F0F0;
	if (Elems != CorrectElems) {
	return addShuffleForVecExtend(N, DAG, Input, Elems, CorrectElems);
	}

	// Regular lowering will catch cases where a shuffle is not needed.
	return SDValue();
	}

	// Look for the pattern of a load from a narrow width to i128, feeding
	// into a BUILD_VECTOR of v1i128. Replace this sequence with a PPCISD node
	// (LXVRZX). This node represents a zero extending load that will be matched
	// to the Load VSX Vector Rightmost instructions.
	static SDValue combineBVZEXTLOAD(SDNode *N, SelectionDAG &DAG) {
	SDLoc DL(N);

	// This combine is only eligible for a BUILD_VECTOR of v1i128.
	if (N->getValueType(0) != MVT::v1i128)
	return SDValue();

	SDValue Operand = N->getOperand(0);
	// Proceed with the transformation if the operand to the BUILD_VECTOR
	// is a load instruction.
	if (Operand.getOpcode() != ISD::LOAD)
	return SDValue();

	auto *LD = cast<LoadSDNode>(Operand);
	EVT MemoryType = LD->getMemoryVT();

	// This transformation is only valid if the we are loading either a byte,
	// halfword, word, or doubleword.
	bool ValidLDType = MemoryType == MVT::i8 \|\| MemoryType == MVT::i16 \|\|
	MemoryType == MVT::i32 \|\| MemoryType == MVT::i64;

	// Ensure that the load from the narrow width is being zero extended to i128.
	if (!ValidLDType \|\|
	(LD->getExtensionType() != ISD::ZEXTLOAD &&
	LD->getExtensionType() != ISD::EXTLOAD))
	return SDValue();

	SDValue LoadOps[] = {
	LD->getChain(), LD->getBasePtr(),
	DAG.getIntPtrConstant(MemoryType.getScalarSizeInBits(), DL)};

	return DAG.getMemIntrinsicNode(PPCISD::LXVRZX, DL,
	DAG.getVTList(MVT::v1i128, MVT::Other),
	LoadOps, MemoryType, LD->getMemOperand());
	}

	SDValue PPCTargetLowering::DAGCombineBuildVector(SDNode *N,
	DAGCombinerInfo &DCI) const {
	assert(N->getOpcode() == ISD::BUILD_VECTOR &&
	"Should be called with a BUILD_VECTOR node");

	SelectionDAG &DAG = DCI.DAG;
	SDLoc dl(N);

	if (!Subtarget.hasVSX())
	return SDValue();

	// The target independent DAG combiner will leave a build_vector of
	// float-to-int conversions intact. We can generate MUCH better code for
	// a float-to-int conversion of a vector of floats.
	SDValue FirstInput = N->getOperand(0);
	if (FirstInput.getOpcode() == PPCISD::MFVSR) {
	SDValue Reduced = combineElementTruncationToVectorTruncation(N, DCI);
	if (Reduced)
	return Reduced;
	}

	// If we're building a vector out of consecutive loads, just load that
	// vector type.
	SDValue Reduced = combineBVOfConsecutiveLoads(N, DAG);
	if (Reduced)
	return Reduced;

	// If we're building a vector out of extended elements from another vector
	// we have P9 vector integer extend instructions. The code assumes legal
	// input types (i.e. it can't handle things like v4i16) so do not run before
	// legalization.
	if (Subtarget.hasP9Altivec() && !DCI.isBeforeLegalize()) {
	Reduced = combineBVOfVecSExt(N, DAG);
	if (Reduced)
	return Reduced;
	}

	// On Power10, the Load VSX Vector Rightmost instructions can be utilized
	// if this is a BUILD_VECTOR of v1i128, and if the operand to the BUILD_VECTOR
	// is a load from <valid narrow width> to i128.
	if (Subtarget.isISA3_1()) {
	SDValue BVOfZLoad = combineBVZEXTLOAD(N, DAG);
	if (BVOfZLoad)
	return BVOfZLoad;
	}

	if (N->getValueType(0) != MVT::v2f64)
	return SDValue();

	// Looking for:
	// (build_vector ([su]int_to_fp (extractelt 0)), [su]int_to_fp (extractelt 1))
	if (FirstInput.getOpcode() != ISD::SINT_TO_FP &&
	FirstInput.getOpcode() != ISD::UINT_TO_FP)
	return SDValue();
	if (N->getOperand(1).getOpcode() != ISD::SINT_TO_FP &&
	N->getOperand(1).getOpcode() != ISD::UINT_TO_FP)
	return SDValue();
	if (FirstInput.getOpcode() != N->getOperand(1).getOpcode())
	return SDValue();

	SDValue Ext1 = FirstInput.getOperand(0);
	SDValue Ext2 = N->getOperand(1).getOperand(0);
	if(Ext1.getOpcode() != ISD::EXTRACT_VECTOR_ELT \|\|
	Ext2.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
	return SDValue();

	ConstantSDNode *Ext1Op = dyn_cast<ConstantSDNode>(Ext1.getOperand(1));
	ConstantSDNode *Ext2Op = dyn_cast<ConstantSDNode>(Ext2.getOperand(1));
	if (!Ext1Op \|\| !Ext2Op)
	return SDValue();
	if (Ext1.getOperand(0).getValueType() != MVT::v4i32 \|\|
	Ext1.getOperand(0) != Ext2.getOperand(0))
	return SDValue();

	int FirstElem = Ext1Op->getZExtValue();
	int SecondElem = Ext2Op->getZExtValue();
	int SubvecIdx;
	if (FirstElem == 0 && SecondElem == 1)
	SubvecIdx = Subtarget.isLittleEndian() ? 1 : 0;
	else if (FirstElem == 2 && SecondElem == 3)
	SubvecIdx = Subtarget.isLittleEndian() ? 0 : 1;
	else
	return SDValue();

	SDValue SrcVec = Ext1.getOperand(0);
	auto NodeType = (N->getOperand(1).getOpcode() == ISD::SINT_TO_FP) ?
	PPCISD::SINT_VEC_TO_FP : PPCISD::UINT_VEC_TO_FP;
	return DAG.getNode(NodeType, dl, MVT::v2f64,
	SrcVec, DAG.getIntPtrConstant(SubvecIdx, dl));
	}

	SDValue PPCTargetLowering::combineFPToIntToFP(SDNode *N,
	DAGCombinerInfo &DCI) const {
	assert((N->getOpcode() == ISD::SINT_TO_FP \|\|
	N->getOpcode() == ISD::UINT_TO_FP) &&
	"Need an int -> FP conversion node here");

	if (useSoftFloat() \|\| !Subtarget.has64BitSupport())
	return SDValue();

	SelectionDAG &DAG = DCI.DAG;
	SDLoc dl(N);
	SDValue Op(N, 0);

	// Don't handle ppc_fp128 here or conversions that are out-of-range capable
	// from the hardware.
	if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64)
	return SDValue();
	if (!Op.getOperand(0).getValueType().isSimple())
	return SDValue();
	if (Op.getOperand(0).getValueType().getSimpleVT() <= MVT(MVT::i1) \|\|
	Op.getOperand(0).getValueType().getSimpleVT() > MVT(MVT::i64))
	return SDValue();

	SDValue FirstOperand(Op.getOperand(0));
	bool SubWordLoad = FirstOperand.getOpcode() == ISD::LOAD &&
	(FirstOperand.getValueType() == MVT::i8 \|\|
	FirstOperand.getValueType() == MVT::i16);
	if (Subtarget.hasP9Vector() && Subtarget.hasP9Altivec() && SubWordLoad) {
	bool Signed = N->getOpcode() == ISD::SINT_TO_FP;
	bool DstDouble = Op.getValueType() == MVT::f64;
	unsigned ConvOp = Signed ?
	(DstDouble ? PPCISD::FCFID : PPCISD::FCFIDS) :
	(DstDouble ? PPCISD::FCFIDU : PPCISD::FCFIDUS);
	SDValue WidthConst =
	DAG.getIntPtrConstant(FirstOperand.getValueType() == MVT::i8 ? 1 : 2,
	dl, false);
	LoadSDNode *LDN = cast<LoadSDNode>(FirstOperand.getNode());
	SDValue Ops[] = { LDN->getChain(), LDN->getBasePtr(), WidthConst };
	SDValue Ld = DAG.getMemIntrinsicNode(PPCISD::LXSIZX, dl,
	DAG.getVTList(MVT::f64, MVT::Other),
	Ops, MVT::i8, LDN->getMemOperand());
	DAG.makeEquivalentMemoryOrdering(LDN, Ld);

	// For signed conversion, we need to sign-extend the value in the VSR
	if (Signed) {
	SDValue ExtOps[] = { Ld, WidthConst };
	SDValue Ext = DAG.getNode(PPCISD::VEXTS, dl, MVT::f64, ExtOps);
	return DAG.getNode(ConvOp, dl, DstDouble ? MVT::f64 : MVT::f32, Ext);
	} else
	return DAG.getNode(ConvOp, dl, DstDouble ? MVT::f64 : MVT::f32, Ld);
	}


	// For i32 intermediate values, unfortunately, the conversion functions
	// leave the upper 32 bits of the value are undefined. Within the set of
	// scalar instructions, we have no method for zero- or sign-extending the
	// value. Thus, we cannot handle i32 intermediate values here.
	if (Op.getOperand(0).getValueType() == MVT::i32)
	return SDValue();

	assert((Op.getOpcode() == ISD::SINT_TO_FP \|\| Subtarget.hasFPCVT()) &&
	"UINT_TO_FP is supported only with FPCVT");

	// If we have FCFIDS, then use it when converting to single-precision.
	// Otherwise, convert to double-precision and then round.
	unsigned FCFOp = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32)
	? (Op.getOpcode() == ISD::UINT_TO_FP ? PPCISD::FCFIDUS
	: PPCISD::FCFIDS)
	: (Op.getOpcode() == ISD::UINT_TO_FP ? PPCISD::FCFIDU
	: PPCISD::FCFID);
	MVT FCFTy = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32)
	? MVT::f32
	: MVT::f64;

	// If we're converting from a float, to an int, and back to a float again,
	// then we don't need the store/load pair at all.
	if ((Op.getOperand(0).getOpcode() == ISD::FP_TO_UINT &&
	Subtarget.hasFPCVT()) \|\|
	(Op.getOperand(0).getOpcode() == ISD::FP_TO_SINT)) {
	SDValue Src = Op.getOperand(0).getOperand(0);
	if (Src.getValueType() == MVT::f32) {
	Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src);
	DCI.AddToWorklist(Src.getNode());
	} else if (Src.getValueType() != MVT::f64) {
	// Make sure that we don't pick up a ppc_fp128 source value.
	return SDValue();
	}

	unsigned FCTOp =
	Op.getOperand(0).getOpcode() == ISD::FP_TO_SINT ? PPCISD::FCTIDZ :
	PPCISD::FCTIDUZ;

	SDValue Tmp = DAG.getNode(FCTOp, dl, MVT::f64, Src);
	SDValue FP = DAG.getNode(FCFOp, dl, FCFTy, Tmp);

	if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) {
	FP = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, FP,
	DAG.getIntPtrConstant(0, dl, /isTarget=/true));
	DCI.AddToWorklist(FP.getNode());
	}

	return FP;
	}

	return SDValue();
	}

	// expandVSXLoadForLE - Convert VSX loads (which may be intrinsics for
	// builtins) into loads with swaps.
	SDValue PPCTargetLowering::expandVSXLoadForLE(SDNode *N,
	DAGCombinerInfo &DCI) const {
	// Delay VSX load for LE combine until after LegalizeOps to prioritize other
	// load combines.
	if (DCI.isBeforeLegalizeOps())
	return SDValue();

	SelectionDAG &DAG = DCI.DAG;
	SDLoc dl(N);
	SDValue Chain;
	SDValue Base;
	MachineMemOperand *MMO;

	switch (N->getOpcode()) {
	default:
	llvm_unreachable("Unexpected opcode for little endian VSX load");
	case ISD::LOAD: {
	LoadSDNode *LD = cast<LoadSDNode>(N);
	Chain = LD->getChain();
	Base = LD->getBasePtr();
	MMO = LD->getMemOperand();
	// If the MMO suggests this isn't a load of a full vector, leave
	// things alone. For a built-in, we have to make the change for
	// correctness, so if there is a size problem that will be a bug.
	if (!MMO->getSize().hasValue() \|\| MMO->getSize().getValue() < 16)
	return SDValue();
	break;
	}
	case ISD::INTRINSIC_W_CHAIN: {
	MemIntrinsicSDNode *Intrin = cast<MemIntrinsicSDNode>(N);
	Chain = Intrin->getChain();
	// Similarly to the store case below, Intrin->getBasePtr() doesn't get
	// us what we want. Get operand 2 instead.
	Base = Intrin->getOperand(2);
	MMO = Intrin->getMemOperand();
	break;
	}
	}

	MVT VecTy = N->getValueType(0).getSimpleVT();

	SDValue LoadOps[] = { Chain, Base };
	SDValue Load = DAG.getMemIntrinsicNode(PPCISD::LXVD2X, dl,
	DAG.getVTList(MVT::v2f64, MVT::Other),
	LoadOps, MVT::v2f64, MMO);

	DCI.AddToWorklist(Load.getNode());
	Chain = Load.getValue(1);
	SDValue Swap = DAG.getNode(
	PPCISD::XXSWAPD, dl, DAG.getVTList(MVT::v2f64, MVT::Other), Chain, Load);
	DCI.AddToWorklist(Swap.getNode());

	// Add a bitcast if the resulting load type doesn't match v2f64.
	if (VecTy != MVT::v2f64) {
	SDValue N = DAG.getNode(ISD::BITCAST, dl, VecTy, Swap);
	DCI.AddToWorklist(N.getNode());
	// Package {bitcast value, swap's chain} to match Load's shape.
	return DAG.getNode(ISD::MERGE_VALUES, dl, DAG.getVTList(VecTy, MVT::Other),
	N, Swap.getValue(1));
	}

	return Swap;
	}

	// expandVSXStoreForLE - Convert VSX stores (which may be intrinsics for
	// builtins) into stores with swaps.
	SDValue PPCTargetLowering::expandVSXStoreForLE(SDNode *N,
	DAGCombinerInfo &DCI) const {
	// Delay VSX store for LE combine until after LegalizeOps to prioritize other
	// store combines.
	if (DCI.isBeforeLegalizeOps())
	return SDValue();

	SelectionDAG &DAG = DCI.DAG;
	SDLoc dl(N);
	SDValue Chain;
	SDValue Base;
	unsigned SrcOpnd;
	MachineMemOperand *MMO;

	switch (N->getOpcode()) {
	default:
	llvm_unreachable("Unexpected opcode for little endian VSX store");
	case ISD::STORE: {
	StoreSDNode *ST = cast<StoreSDNode>(N);
	Chain = ST->getChain();
	Base = ST->getBasePtr();
	MMO = ST->getMemOperand();
	SrcOpnd = 1;
	// If the MMO suggests this isn't a store of a full vector, leave
	// things alone. For a built-in, we have to make the change for
	// correctness, so if there is a size problem that will be a bug.
	if (!MMO->getSize().hasValue() \|\| MMO->getSize().getValue() < 16)
	return SDValue();
	break;
	}
	case ISD::INTRINSIC_VOID: {
	MemIntrinsicSDNode *Intrin = cast<MemIntrinsicSDNode>(N);
	Chain = Intrin->getChain();
	// Intrin->getBasePtr() oddly does not get what we want.
	Base = Intrin->getOperand(3);
	MMO = Intrin->getMemOperand();
	SrcOpnd = 2;
	break;
	}
	}

	SDValue Src = N->getOperand(SrcOpnd);
	MVT VecTy = Src.getValueType().getSimpleVT();

	// All stores are done as v2f64 and possible bit cast.
	if (VecTy != MVT::v2f64) {
	Src = DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, Src);
	DCI.AddToWorklist(Src.getNode());
	}

	SDValue Swap = DAG.getNode(PPCISD::XXSWAPD, dl,
	DAG.getVTList(MVT::v2f64, MVT::Other), Chain, Src);
	DCI.AddToWorklist(Swap.getNode());
	Chain = Swap.getValue(1);
	SDValue StoreOps[] = { Chain, Swap, Base };
	SDValue Store = DAG.getMemIntrinsicNode(PPCISD::STXVD2X, dl,
	DAG.getVTList(MVT::Other),
	StoreOps, VecTy, MMO);
	DCI.AddToWorklist(Store.getNode());
	return Store;
	}

	// Handle DAG combine for STORE (FP_TO_INT F).
	SDValue PPCTargetLowering::combineStoreFPToInt(SDNode *N,
	DAGCombinerInfo &DCI) const {
	SelectionDAG &DAG = DCI.DAG;
	SDLoc dl(N);
	unsigned Opcode = N->getOperand(1).getOpcode();
	(void)Opcode;
	bool Strict = N->getOperand(1)->isStrictFPOpcode();

	assert((Opcode == ISD::FP_TO_SINT \|\| Opcode == ISD::FP_TO_UINT \|\|
	Opcode == ISD::STRICT_FP_TO_SINT \|\| Opcode == ISD::STRICT_FP_TO_UINT)
	&& "Not a FP_TO_INT Instruction!");

	SDValue Val = N->getOperand(1).getOperand(Strict ? 1 : 0);
	EVT Op1VT = N->getOperand(1).getValueType();
	EVT ResVT = Val.getValueType();

	if (!Subtarget.hasVSX() \|\| !Subtarget.hasFPCVT() \|\| !isTypeLegal(ResVT))
	return SDValue();

	// Only perform combine for conversion to i64/i32 or power9 i16/i8.
	bool ValidTypeForStoreFltAsInt =
	(Op1VT == MVT::i32 \|\| (Op1VT == MVT::i64 && Subtarget.isPPC64()) \|\|
	(Subtarget.hasP9Vector() && (Op1VT == MVT::i16 \|\| Op1VT == MVT::i8)));

	// TODO: Lower conversion from f128 on all VSX targets
	if (ResVT == MVT::ppcf128 \|\| (ResVT == MVT::f128 && !Subtarget.hasP9Vector()))
	return SDValue();

	if ((Op1VT != MVT::i64 && !Subtarget.hasP8Vector()) \|\|
	cast<StoreSDNode>(N)->isTruncatingStore() \|\| !ValidTypeForStoreFltAsInt)
	return SDValue();

	Val = convertFPToInt(N->getOperand(1), DAG, Subtarget);

	// Set number of bytes being converted.
	unsigned ByteSize = Op1VT.getScalarSizeInBits() / 8;
	SDValue Ops[] = {N->getOperand(0), Val, N->getOperand(2),
	DAG.getIntPtrConstant(ByteSize, dl, false),
	DAG.getValueType(Op1VT)};

	Val = DAG.getMemIntrinsicNode(PPCISD::ST_VSR_SCAL_INT, dl,
	DAG.getVTList(MVT::Other), Ops,
	cast<StoreSDNode>(N)->getMemoryVT(),
	cast<StoreSDNode>(N)->getMemOperand());

	return Val;
	}

	static bool isAlternatingShuffMask(const ArrayRef<int> &Mask, int NumElts) {
	// Check that the source of the element keeps flipping
	// (i.e. Mask[i] < NumElts -> Mask[i+i] >= NumElts).
	bool PrevElemFromFirstVec = Mask[0] < NumElts;
	for (int i = 1, e = Mask.size(); i < e; i++) {
	if (PrevElemFromFirstVec && Mask[i] < NumElts)
	return false;
	if (!PrevElemFromFirstVec && Mask[i] >= NumElts)
	return false;
	PrevElemFromFirstVec = !PrevElemFromFirstVec;
	}
	return true;
	}

	static bool isSplatBV(SDValue Op) {
	if (Op.getOpcode() != ISD::BUILD_VECTOR)
	return false;
	SDValue FirstOp;

	// Find first non-undef input.
	for (int i = 0, e = Op.getNumOperands(); i < e; i++) {
	FirstOp = Op.getOperand(i);
	if (!FirstOp.isUndef())
	break;
	}

	// All inputs are undef or the same as the first non-undef input.
	for (int i = 1, e = Op.getNumOperands(); i < e; i++)
	if (Op.getOperand(i) != FirstOp && !Op.getOperand(i).isUndef())
	return false;
	return true;
	}

	static SDValue isScalarToVec(SDValue Op) {
	if (Op.getOpcode() == ISD::SCALAR_TO_VECTOR)
	return Op;
	if (Op.getOpcode() != ISD::BITCAST)
	return SDValue();
	Op = Op.getOperand(0);
	if (Op.getOpcode() == ISD::SCALAR_TO_VECTOR)
	return Op;
	return SDValue();
	}

	// Fix up the shuffle mask to account for the fact that the result of
	// scalar_to_vector is not in lane zero. This just takes all values in
	// the ranges specified by the min/max indices and adds the number of
	// elements required to ensure each element comes from the respective
	// position in the valid lane.
	// On little endian, that's just the corresponding element in the other
	// half of the vector. On big endian, it is in the same half but right
	// justified rather than left justified in that half.
	static void fixupShuffleMaskForPermutedSToV(SmallVectorImpl<int> &ShuffV,
	int LHSMaxIdx, int RHSMinIdx,
	int RHSMaxIdx, int HalfVec,
	unsigned ValidLaneWidth,
	const PPCSubtarget &Subtarget) {
	for (int i = 0, e = ShuffV.size(); i < e; i++) {
	int Idx = ShuffV[i];
	if ((Idx >= 0 && Idx < LHSMaxIdx) \|\| (Idx >= RHSMinIdx && Idx < RHSMaxIdx))
	ShuffV[i] +=
	Subtarget.isLittleEndian() ? HalfVec : HalfVec - ValidLaneWidth;
	}
	}

	// Replace a SCALAR_TO_VECTOR with a SCALAR_TO_VECTOR_PERMUTED except if
	// the original is:
	// (<n x Ty> (scalar_to_vector (Ty (extract_elt <n x Ty> %a, C))))
	// In such a case, just change the shuffle mask to extract the element
	// from the permuted index.
	static SDValue getSToVPermuted(SDValue OrigSToV, SelectionDAG &DAG,
	const PPCSubtarget &Subtarget) {
	SDLoc dl(OrigSToV);
	EVT VT = OrigSToV.getValueType();
	assert(OrigSToV.getOpcode() == ISD::SCALAR_TO_VECTOR &&
	"Expecting a SCALAR_TO_VECTOR here");
	SDValue Input = OrigSToV.getOperand(0);

	if (Input.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
	ConstantSDNode *Idx = dyn_cast<ConstantSDNode>(Input.getOperand(1));
	SDValue OrigVector = Input.getOperand(0);

	// Can't handle non-const element indices or different vector types
	// for the input to the extract and the output of the scalar_to_vector.
	if (Idx && VT == OrigVector.getValueType()) {
	unsigned NumElts = VT.getVectorNumElements();
	assert(
	NumElts > 1 &&
	"Cannot produce a permuted scalar_to_vector for one element vector");
	SmallVector<int, 16> NewMask(NumElts, -1);
	unsigned ResultInElt = NumElts / 2;
	ResultInElt -= Subtarget.isLittleEndian() ? 0 : 1;
	NewMask[ResultInElt] = Idx->getZExtValue();
	return DAG.getVectorShuffle(VT, dl, OrigVector, OrigVector, NewMask);
	}
	}
	return DAG.getNode(PPCISD::SCALAR_TO_VECTOR_PERMUTED, dl, VT,
	OrigSToV.getOperand(0));
	}

	// On little endian subtargets, combine shuffles such as:
	// vector_shuffle<16,1,17,3,18,5,19,7,20,9,21,11,22,13,23,15>, <zero>, %b
	// into:
	// vector_shuffle<16,0,17,1,18,2,19,3,20,4,21,5,22,6,23,7>, <zero>, %b
	// because the latter can be matched to a single instruction merge.
	// Furthermore, SCALAR_TO_VECTOR on little endian always involves a permute
	// to put the value into element zero. Adjust the shuffle mask so that the
	// vector can remain in permuted form (to prevent a swap prior to a shuffle).
	// On big endian targets, this is still useful for SCALAR_TO_VECTOR
	// nodes with elements smaller than doubleword because all the ways
	// of getting scalar data into a vector register put the value in the
	// rightmost element of the left half of the vector.
	SDValue PPCTargetLowering::combineVectorShuffle(ShuffleVectorSDNode *SVN,
	SelectionDAG &DAG) const {
	SDValue LHS = SVN->getOperand(0);
	SDValue RHS = SVN->getOperand(1);
	auto Mask = SVN->getMask();
	int NumElts = LHS.getValueType().getVectorNumElements();
	SDValue Res(SVN, 0);
	SDLoc dl(SVN);
	bool IsLittleEndian = Subtarget.isLittleEndian();

	// On big endian targets this is only useful for subtargets with direct moves.
	// On little endian targets it would be useful for all subtargets with VSX.
	// However adding special handling for LE subtargets without direct moves
	// would be wasted effort since the minimum arch for LE is ISA 2.07 (Power8)
	// which includes direct moves.
	if (!Subtarget.hasDirectMove())
	return Res;

	// If this is not a shuffle of a shuffle and the first element comes from
	// the second vector, canonicalize to the commuted form. This will make it
	// more likely to match one of the single instruction patterns.
	if (Mask[0] >= NumElts && LHS.getOpcode() != ISD::VECTOR_SHUFFLE &&
	RHS.getOpcode() != ISD::VECTOR_SHUFFLE) {
	std::swap(LHS, RHS);
	Res = DAG.getCommutedVectorShuffle(*SVN);
	Mask = cast<ShuffleVectorSDNode>(Res)->getMask();
	}

	// Adjust the shuffle mask if either input vector comes from a
	// SCALAR_TO_VECTOR and keep the respective input vector in permuted
	// form (to prevent the need for a swap).
	SmallVector<int, 16> ShuffV(Mask);
	SDValue SToVLHS = isScalarToVec(LHS);
	SDValue SToVRHS = isScalarToVec(RHS);
	if (SToVLHS \|\| SToVRHS) {
	// FIXME: If both LHS and RHS are SCALAR_TO_VECTOR, but are not the
	// same type and have differing element sizes, then do not perform
	// the following transformation. The current transformation for
	// SCALAR_TO_VECTOR assumes that both input vectors have the same
	// element size. This will be updated in the future to account for
	// differing sizes of the LHS and RHS.
	if (SToVLHS && SToVRHS &&
	(SToVLHS.getValueType().getScalarSizeInBits() !=
	SToVRHS.getValueType().getScalarSizeInBits()))
	return Res;

	int NumEltsIn = SToVLHS ? SToVLHS.getValueType().getVectorNumElements()
	: SToVRHS.getValueType().getVectorNumElements();
	int NumEltsOut = ShuffV.size();
	// The width of the "valid lane" (i.e. the lane that contains the value that
	// is vectorized) needs to be expressed in terms of the number of elements
	// of the shuffle. It is thereby the ratio of the values before and after
	// any bitcast.
	unsigned ValidLaneWidth =
	SToVLHS ? SToVLHS.getValueType().getScalarSizeInBits() /
	LHS.getValueType().getScalarSizeInBits()
	: SToVRHS.getValueType().getScalarSizeInBits() /
	RHS.getValueType().getScalarSizeInBits();

	// Initially assume that neither input is permuted. These will be adjusted
	// accordingly if either input is.
	int LHSMaxIdx = -1;
	int RHSMinIdx = -1;
	int RHSMaxIdx = -1;
	int HalfVec = LHS.getValueType().getVectorNumElements() / 2;

	// Get the permuted scalar to vector nodes for the source(s) that come from
	// ISD::SCALAR_TO_VECTOR.
	// On big endian systems, this only makes sense for element sizes smaller
	// than 64 bits since for 64-bit elements, all instructions already put
	// the value into element zero. Since scalar size of LHS and RHS may differ
	// after isScalarToVec, this should be checked using their own sizes.
	if (SToVLHS) {
	if (!IsLittleEndian && SToVLHS.getValueType().getScalarSizeInBits() >= 64)
	return Res;
	// Set up the values for the shuffle vector fixup.
	LHSMaxIdx = NumEltsOut / NumEltsIn;
	SToVLHS = getSToVPermuted(SToVLHS, DAG, Subtarget);
	if (SToVLHS.getValueType() != LHS.getValueType())
	SToVLHS = DAG.getBitcast(LHS.getValueType(), SToVLHS);
	LHS = SToVLHS;
	}
	if (SToVRHS) {
	if (!IsLittleEndian && SToVRHS.getValueType().getScalarSizeInBits() >= 64)
	return Res;
	RHSMinIdx = NumEltsOut;
	RHSMaxIdx = NumEltsOut / NumEltsIn + RHSMinIdx;
	SToVRHS = getSToVPermuted(SToVRHS, DAG, Subtarget);
	if (SToVRHS.getValueType() != RHS.getValueType())
	SToVRHS = DAG.getBitcast(RHS.getValueType(), SToVRHS);
	RHS = SToVRHS;
	}

	// Fix up the shuffle mask to reflect where the desired element actually is.
	// The minimum and maximum indices that correspond to element zero for both
	// the LHS and RHS are computed and will control which shuffle mask entries
	// are to be changed. For example, if the RHS is permuted, any shuffle mask
	// entries in the range [RHSMinIdx,RHSMaxIdx) will be adjusted.
	fixupShuffleMaskForPermutedSToV(ShuffV, LHSMaxIdx, RHSMinIdx, RHSMaxIdx,
	HalfVec, ValidLaneWidth, Subtarget);
	Res = DAG.getVectorShuffle(SVN->getValueType(0), dl, LHS, RHS, ShuffV);

	// We may have simplified away the shuffle. We won't be able to do anything
	// further with it here.
	if (!isa<ShuffleVectorSDNode>(Res))
	return Res;
	Mask = cast<ShuffleVectorSDNode>(Res)->getMask();
	}

	SDValue TheSplat = IsLittleEndian ? RHS : LHS;
	// The common case after we commuted the shuffle is that the RHS is a splat
	// and we have elements coming in from the splat at indices that are not
	// conducive to using a merge.
	// Example:
	// vector_shuffle<0,17,1,19,2,21,3,23,4,25,5,27,6,29,7,31> t1, <zero>
	if (!isSplatBV(TheSplat))
	return Res;

	// We are looking for a mask such that all even elements are from
	// one vector and all odd elements from the other.
	if (!isAlternatingShuffMask(Mask, NumElts))
	return Res;

	// Adjust the mask so we are pulling in the same index from the splat
	// as the index from the interesting vector in consecutive elements.
	if (IsLittleEndian) {
	// Example (even elements from first vector):
	// vector_shuffle<0,16,1,17,2,18,3,19,4,20,5,21,6,22,7,23> t1, <zero>
	if (Mask[0] < NumElts)
	for (int i = 1, e = Mask.size(); i < e; i += 2) {
	if (ShuffV[i] < 0)
	continue;
	// If element from non-splat is undef, pick first element from splat.
	ShuffV[i] = (ShuffV[i - 1] >= 0 ? ShuffV[i - 1] : 0) + NumElts;
	}
	// Example (odd elements from first vector):
	// vector_shuffle<16,0,17,1,18,2,19,3,20,4,21,5,22,6,23,7> t1, <zero>
	else
	for (int i = 0, e = Mask.size(); i < e; i += 2) {
	if (ShuffV[i] < 0)
	continue;
	// If element from non-splat is undef, pick first element from splat.
	ShuffV[i] = (ShuffV[i + 1] >= 0 ? ShuffV[i + 1] : 0) + NumElts;
	}
	} else {
	// Example (even elements from first vector):
	// vector_shuffle<0,16,1,17,2,18,3,19,4,20,5,21,6,22,7,23> <zero>, t1
	if (Mask[0] < NumElts)
	for (int i = 0, e = Mask.size(); i < e; i += 2) {
	if (ShuffV[i] < 0)
	continue;
	// If element from non-splat is undef, pick first element from splat.
	ShuffV[i] = ShuffV[i + 1] >= 0 ? ShuffV[i + 1] - NumElts : 0;
	}
	// Example (odd elements from first vector):
	// vector_shuffle<16,0,17,1,18,2,19,3,20,4,21,5,22,6,23,7> <zero>, t1
	else
	for (int i = 1, e = Mask.size(); i < e; i += 2) {
	if (ShuffV[i] < 0)
	continue;
	// If element from non-splat is undef, pick first element from splat.
	ShuffV[i] = ShuffV[i - 1] >= 0 ? ShuffV[i - 1] - NumElts : 0;
	}
	}

	// If the RHS has undefs, we need to remove them since we may have created
	// a shuffle that adds those instead of the splat value.
	SDValue SplatVal =
	cast<BuildVectorSDNode>(TheSplat.getNode())->getSplatValue();
	TheSplat = DAG.getSplatBuildVector(TheSplat.getValueType(), dl, SplatVal);

	if (IsLittleEndian)
	RHS = TheSplat;
	else
	LHS = TheSplat;
	return DAG.getVectorShuffle(SVN->getValueType(0), dl, LHS, RHS, ShuffV);
	}

	SDValue PPCTargetLowering::combineVReverseMemOP(ShuffleVectorSDNode *SVN,
	LSBaseSDNode *LSBase,
	DAGCombinerInfo &DCI) const {
	assert((ISD::isNormalLoad(LSBase) \|\| ISD::isNormalStore(LSBase)) &&
	"Not a reverse memop pattern!");

	auto IsElementReverse = [](const ShuffleVectorSDNode *SVN) -> bool {
	auto Mask = SVN->getMask();
	int i = 0;
	auto I = Mask.rbegin();
	auto E = Mask.rend();

	for (; I != E; ++I) {
	if (*I != i)
	return false;
	i++;
	}
	return true;
	};

	SelectionDAG &DAG = DCI.DAG;
	EVT VT = SVN->getValueType(0);

	if (!isTypeLegal(VT) \|\| !Subtarget.isLittleEndian() \|\| !Subtarget.hasVSX())
	return SDValue();

	// Before P9, we have PPCVSXSwapRemoval pass to hack the element order.
	// See comment in PPCVSXSwapRemoval.cpp.
	// It is conflict with PPCVSXSwapRemoval opt. So we don't do it.
	if (!Subtarget.hasP9Vector())
	return SDValue();

	if(!IsElementReverse(SVN))
	return SDValue();

	if (LSBase->getOpcode() == ISD::LOAD) {
	// If the load return value 0 has more than one user except the
	// shufflevector instruction, it is not profitable to replace the
	// shufflevector with a reverse load.
	for (SDNode::use_iterator UI = LSBase->use_begin(), UE = LSBase->use_end();
	UI != UE; ++UI)
	if (UI.getUse().getResNo() == 0 && UI->getOpcode() != ISD::VECTOR_SHUFFLE)
	return SDValue();

	SDLoc dl(LSBase);
	SDValue LoadOps[] = {LSBase->getChain(), LSBase->getBasePtr()};
	return DAG.getMemIntrinsicNode(
	PPCISD::LOAD_VEC_BE, dl, DAG.getVTList(VT, MVT::Other), LoadOps,
	LSBase->getMemoryVT(), LSBase->getMemOperand());
	}

	if (LSBase->getOpcode() == ISD::STORE) {
	// If there are other uses of the shuffle, the swap cannot be avoided.
	// Forcing the use of an X-Form (since swapped stores only have
	// X-Forms) without removing the swap is unprofitable.
	if (!SVN->hasOneUse())
	return SDValue();

	SDLoc dl(LSBase);
	SDValue StoreOps[] = {LSBase->getChain(), SVN->getOperand(0),
	LSBase->getBasePtr()};
	return DAG.getMemIntrinsicNode(
	PPCISD::STORE_VEC_BE, dl, DAG.getVTList(MVT::Other), StoreOps,
	LSBase->getMemoryVT(), LSBase->getMemOperand());
	}

	llvm_unreachable("Expected a load or store node here");
	}

	static bool isStoreConditional(SDValue Intrin, unsigned &StoreWidth) {
	unsigned IntrinsicID = Intrin.getConstantOperandVal(1);
	if (IntrinsicID == Intrinsic::ppc_stdcx)
	StoreWidth = 8;
	else if (IntrinsicID == Intrinsic::ppc_stwcx)
	StoreWidth = 4;
	else if (IntrinsicID == Intrinsic::ppc_sthcx)
	StoreWidth = 2;
	else if (IntrinsicID == Intrinsic::ppc_stbcx)
	StoreWidth = 1;
	else
	return false;
	return true;
	}

	SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
	DAGCombinerInfo &DCI) const {
	SelectionDAG &DAG = DCI.DAG;
	SDLoc dl(N);
	switch (N->getOpcode()) {
	default: break;
	case ISD::ADD:
	return combineADD(N, DCI);
	case ISD::AND: {
	// We don't want (and (zext (shift...)), C) if C fits in the width of the
	// original input as that will prevent us from selecting optimal rotates.
	// This only matters if the input to the extend is i32 widened to i64.
	SDValue Op1 = N->getOperand(0);
	SDValue Op2 = N->getOperand(1);
	if ((Op1.getOpcode() != ISD::ZERO_EXTEND &&
	Op1.getOpcode() != ISD::ANY_EXTEND) \|\|
	!isa<ConstantSDNode>(Op2) \|\| N->getValueType(0) != MVT::i64 \|\|
	Op1.getOperand(0).getValueType() != MVT::i32)
	break;
	SDValue NarrowOp = Op1.getOperand(0);
	if (NarrowOp.getOpcode() != ISD::SHL && NarrowOp.getOpcode() != ISD::SRL &&
	NarrowOp.getOpcode() != ISD::ROTL && NarrowOp.getOpcode() != ISD::ROTR)
	break;

	uint64_t Imm = Op2->getAsZExtVal();
	// Make sure that the constant is narrow enough to fit in the narrow type.
	if (!isUInt<32>(Imm))
	break;
	SDValue ConstOp = DAG.getConstant(Imm, dl, MVT::i32);
	SDValue NarrowAnd = DAG.getNode(ISD::AND, dl, MVT::i32, NarrowOp, ConstOp);
	return DAG.getZExtOrTrunc(NarrowAnd, dl, N->getValueType(0));
	}
	case ISD::SHL:
	return combineSHL(N, DCI);
	case ISD::SRA:
	return combineSRA(N, DCI);
	case ISD::SRL:
	return combineSRL(N, DCI);
	case ISD::MUL:
	return combineMUL(N, DCI);
	case ISD::FMA:
	case PPCISD::FNMSUB:
	return combineFMALike(N, DCI);
	case PPCISD::SHL:
	if (isNullConstant(N->getOperand(0))) // 0 << V -> 0.
	return N->getOperand(0);
	break;
	case PPCISD::SRL:
	if (isNullConstant(N->getOperand(0))) // 0 >>u V -> 0.
	return N->getOperand(0);
	break;
	case PPCISD::SRA:
	if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(0))) {
	if (C->isZero() \|\| // 0 >>s V -> 0.
	C->isAllOnes()) // -1 >>s V -> -1.
	return N->getOperand(0);
	}
	break;
	case ISD::SIGN_EXTEND:
	case ISD::ZERO_EXTEND:
	case ISD::ANY_EXTEND:
	return DAGCombineExtBoolTrunc(N, DCI);
	case ISD::TRUNCATE:
	return combineTRUNCATE(N, DCI);
	case ISD::SETCC:
	if (SDValue CSCC = combineSetCC(N, DCI))
	return CSCC;
	[[fallthrough]];
	case ISD::SELECT_CC:
	return DAGCombineTruncBoolExt(N, DCI);
	case ISD::SINT_TO_FP:
	case ISD::UINT_TO_FP:
	return combineFPToIntToFP(N, DCI);
	case ISD::VECTOR_SHUFFLE:
	if (ISD::isNormalLoad(N->getOperand(0).getNode())) {
	LSBaseSDNode* LSBase = cast<LSBaseSDNode>(N->getOperand(0));
	return combineVReverseMemOP(cast<ShuffleVectorSDNode>(N), LSBase, DCI);
	}
	return combineVectorShuffle(cast<ShuffleVectorSDNode>(N), DCI.DAG);
	case ISD::STORE: {

	EVT Op1VT = N->getOperand(1).getValueType();
	unsigned Opcode = N->getOperand(1).getOpcode();

	if (Opcode == ISD::FP_TO_SINT \|\| Opcode == ISD::FP_TO_UINT \|\|
	Opcode == ISD::STRICT_FP_TO_SINT \|\| Opcode == ISD::STRICT_FP_TO_UINT) {
	SDValue Val = combineStoreFPToInt(N, DCI);
	if (Val)
	return Val;
	}

	if (Opcode == ISD::VECTOR_SHUFFLE && ISD::isNormalStore(N)) {
	ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N->getOperand(1));
	SDValue Val= combineVReverseMemOP(SVN, cast<LSBaseSDNode>(N), DCI);
	if (Val)
	return Val;
	}

	// Turn STORE (BSWAP) -> sthbrx/stwbrx.
	if (cast<StoreSDNode>(N)->isUnindexed() && Opcode == ISD::BSWAP &&
	N->getOperand(1).getNode()->hasOneUse() &&
	(Op1VT == MVT::i32 \|\| Op1VT == MVT::i16 \|\|
	(Subtarget.hasLDBRX() && Subtarget.isPPC64() && Op1VT == MVT::i64))) {

	// STBRX can only handle simple types and it makes no sense to store less
	// two bytes in byte-reversed order.
	EVT mVT = cast<StoreSDNode>(N)->getMemoryVT();
	if (mVT.isExtended() \|\| mVT.getSizeInBits() < 16)
	break;

	SDValue BSwapOp = N->getOperand(1).getOperand(0);
	// Do an any-extend to 32-bits if this is a half-word input.
	if (BSwapOp.getValueType() == MVT::i16)
	BSwapOp = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, BSwapOp);

	// If the type of BSWAP operand is wider than stored memory width
	// it need to be shifted to the right side before STBRX.
	if (Op1VT.bitsGT(mVT)) {
	int Shift = Op1VT.getSizeInBits() - mVT.getSizeInBits();
	BSwapOp = DAG.getNode(ISD::SRL, dl, Op1VT, BSwapOp,
	DAG.getConstant(Shift, dl, MVT::i32));
	// Need to truncate if this is a bswap of i64 stored as i32/i16.
	if (Op1VT == MVT::i64)
	BSwapOp = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, BSwapOp);
	}

	SDValue Ops[] = {
	N->getOperand(0), BSwapOp, N->getOperand(2), DAG.getValueType(mVT)
	};
	return
	DAG.getMemIntrinsicNode(PPCISD::STBRX, dl, DAG.getVTList(MVT::Other),
	Ops, cast<StoreSDNode>(N)->getMemoryVT(),
	cast<StoreSDNode>(N)->getMemOperand());
	}

	// STORE Constant:i32<0> -> STORE<trunc to i32> Constant:i64<0>
	// So it can increase the chance of CSE constant construction.
	if (Subtarget.isPPC64() && !DCI.isBeforeLegalize() &&
	isa<ConstantSDNode>(N->getOperand(1)) && Op1VT == MVT::i32) {
	// Need to sign-extended to 64-bits to handle negative values.
	EVT MemVT = cast<StoreSDNode>(N)->getMemoryVT();
	uint64_t Val64 = SignExtend64(N->getConstantOperandVal(1),
	MemVT.getSizeInBits());
	SDValue Const64 = DAG.getConstant(Val64, dl, MVT::i64);

	// DAG.getTruncStore() can't be used here because it doesn't accept
	// the general (base + offset) addressing mode.
	// So we use UpdateNodeOperands and setTruncatingStore instead.
	DAG.UpdateNodeOperands(N, N->getOperand(0), Const64, N->getOperand(2),
	N->getOperand(3));
	cast<StoreSDNode>(N)->setTruncatingStore(true);
	return SDValue(N, 0);
	}

	// For little endian, VSX stores require generating xxswapd/lxvd2x.
	// Not needed on ISA 3.0 based CPUs since we have a non-permuting store.
	if (Op1VT.isSimple()) {
	MVT StoreVT = Op1VT.getSimpleVT();
	if (Subtarget.needsSwapsForVSXMemOps() &&
	(StoreVT == MVT::v2f64 \|\| StoreVT == MVT::v2i64 \|\|
	StoreVT == MVT::v4f32 \|\| StoreVT == MVT::v4i32))
	return expandVSXStoreForLE(N, DCI);
	}
	break;
	}
	case ISD::LOAD: {
	LoadSDNode *LD = cast<LoadSDNode>(N);
	EVT VT = LD->getValueType(0);

	// For little endian, VSX loads require generating lxvd2x/xxswapd.
	// Not needed on ISA 3.0 based CPUs since we have a non-permuting load.
	if (VT.isSimple()) {
	MVT LoadVT = VT.getSimpleVT();
	if (Subtarget.needsSwapsForVSXMemOps() &&
	(LoadVT == MVT::v2f64 \|\| LoadVT == MVT::v2i64 \|\|
	LoadVT == MVT::v4f32 \|\| LoadVT == MVT::v4i32))
	return expandVSXLoadForLE(N, DCI);
	}

	// We sometimes end up with a 64-bit integer load, from which we extract
	// two single-precision floating-point numbers. This happens with
	// std::complex<float>, and other similar structures, because of the way we
	// canonicalize structure copies. However, if we lack direct moves,
	// then the final bitcasts from the extracted integer values to the
	// floating-point numbers turn into store/load pairs. Even with direct moves,
	// just loading the two floating-point numbers is likely better.
	auto ReplaceTwoFloatLoad = [&]() {
	if (VT != MVT::i64)
	return false;

	if (LD->getExtensionType() != ISD::NON_EXTLOAD \|\|
	LD->isVolatile())
	return false;

	// We're looking for a sequence like this:
	// t13: i64,ch = load<LD8[%ref.tmp]> t0, t6, undef:i64
	// t16: i64 = srl t13, Constant:i32<32>
	// t17: i32 = truncate t16
	// t18: f32 = bitcast t17
	// t19: i32 = truncate t13
	// t20: f32 = bitcast t19

	if (!LD->hasNUsesOfValue(2, 0))
	return false;

	auto UI = LD->use_begin();
	while (UI.getUse().getResNo() != 0) ++UI;
	SDNode Trunc = UI++;
	while (UI.getUse().getResNo() != 0) ++UI;
	SDNode RightShift = UI;
	if (Trunc->getOpcode() != ISD::TRUNCATE)
	std::swap(Trunc, RightShift);

	if (Trunc->getOpcode() != ISD::TRUNCATE \|\|
	Trunc->getValueType(0) != MVT::i32 \|\|
	!Trunc->hasOneUse())
	return false;
	if (RightShift->getOpcode() != ISD::SRL \|\|
	!isa<ConstantSDNode>(RightShift->getOperand(1)) \|\|
	RightShift->getConstantOperandVal(1) != 32 \|\|
	!RightShift->hasOneUse())
	return false;

	SDNode Trunc2 = RightShift->use_begin();
	if (Trunc2->getOpcode() != ISD::TRUNCATE \|\|
	Trunc2->getValueType(0) != MVT::i32 \|\|
	!Trunc2->hasOneUse())
	return false;

	SDNode Bitcast = Trunc->use_begin();
	SDNode Bitcast2 = Trunc2->use_begin();

	if (Bitcast->getOpcode() != ISD::BITCAST \|\|
	Bitcast->getValueType(0) != MVT::f32)
	return false;
	if (Bitcast2->getOpcode() != ISD::BITCAST \|\|
	Bitcast2->getValueType(0) != MVT::f32)
	return false;

	if (Subtarget.isLittleEndian())
	std::swap(Bitcast, Bitcast2);

	// Bitcast has the second float (in memory-layout order) and Bitcast2
	// has the first one.

	SDValue BasePtr = LD->getBasePtr();
	if (LD->isIndexed()) {
	assert(LD->getAddressingMode() == ISD::PRE_INC &&
	"Non-pre-inc AM on PPC?");
	BasePtr =
	DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
	LD->getOffset());
	}

	auto MMOFlags =
	LD->getMemOperand()->getFlags() & ~MachineMemOperand::MOVolatile;
	SDValue FloatLoad = DAG.getLoad(MVT::f32, dl, LD->getChain(), BasePtr,
	LD->getPointerInfo(), LD->getAlign(),
	MMOFlags, LD->getAAInfo());
	SDValue AddPtr =
	DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(),
	BasePtr, DAG.getIntPtrConstant(4, dl));
	SDValue FloatLoad2 = DAG.getLoad(
	MVT::f32, dl, SDValue(FloatLoad.getNode(), 1), AddPtr,
	LD->getPointerInfo().getWithOffset(4),
	commonAlignment(LD->getAlign(), 4), MMOFlags, LD->getAAInfo());

	if (LD->isIndexed()) {
	// Note that DAGCombine should re-form any pre-increment load(s) from
	// what is produced here if that makes sense.
	DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), BasePtr);
	}

	DCI.CombineTo(Bitcast2, FloatLoad);
	DCI.CombineTo(Bitcast, FloatLoad2);

	DAG.ReplaceAllUsesOfValueWith(SDValue(LD, LD->isIndexed() ? 2 : 1),
	SDValue(FloatLoad2.getNode(), 1));
	return true;
	};

	if (ReplaceTwoFloatLoad())
	return SDValue(N, 0);

	EVT MemVT = LD->getMemoryVT();
	Type Ty = MemVT.getTypeForEVT(DAG.getContext());
	Align ABIAlignment = DAG.getDataLayout().getABITypeAlign(Ty);
	if (LD->isUnindexed() && VT.isVector() &&
	((Subtarget.hasAltivec() && ISD::isNON_EXTLoad(N) &&
	// P8 and later hardware should just use LOAD.
	!Subtarget.hasP8Vector() &&
	(VT == MVT::v16i8 \|\| VT == MVT::v8i16 \|\| VT == MVT::v4i32 \|\|
	VT == MVT::v4f32))) &&
	LD->getAlign() < ABIAlignment) {
	// This is a type-legal unaligned Altivec load.
	SDValue Chain = LD->getChain();
	SDValue Ptr = LD->getBasePtr();
	bool isLittleEndian = Subtarget.isLittleEndian();

	// This implements the loading of unaligned vectors as described in
	// the venerable Apple Velocity Engine overview. Specifically:
	// https://developer.apple.com/hardwaredrivers/ve/alignment.html
	// https://developer.apple.com/hardwaredrivers/ve/code_optimization.html
	//
	// The general idea is to expand a sequence of one or more unaligned
	// loads into an alignment-based permutation-control instruction (lvsl
	// or lvsr), a series of regular vector loads (which always truncate
	// their input address to an aligned address), and a series of
	// permutations. The results of these permutations are the requested
	// loaded values. The trick is that the last "extra" load is not taken
	// from the address you might suspect (sizeof(vector) bytes after the
	// last requested load), but rather sizeof(vector) - 1 bytes after the
	// last requested vector. The point of this is to avoid a page fault if
	// the base address happened to be aligned. This works because if the
	// base address is aligned, then adding less than a full vector length
	// will cause the last vector in the sequence to be (re)loaded.
	// Otherwise, the next vector will be fetched as you might suspect was
	// necessary.

	// We might be able to reuse the permutation generation from
	// a different base address offset from this one by an aligned amount.
	// The INTRINSIC_WO_CHAIN DAG combine will attempt to perform this
	// optimization later.
	Intrinsic::ID Intr, IntrLD, IntrPerm;
	MVT PermCntlTy, PermTy, LDTy;
	Intr = isLittleEndian ? Intrinsic::ppc_altivec_lvsr
	: Intrinsic::ppc_altivec_lvsl;
	IntrLD = Intrinsic::ppc_altivec_lvx;
	IntrPerm = Intrinsic::ppc_altivec_vperm;
	PermCntlTy = MVT::v16i8;
	PermTy = MVT::v4i32;
	LDTy = MVT::v4i32;

	SDValue PermCntl = BuildIntrinsicOp(Intr, Ptr, DAG, dl, PermCntlTy);

	// Create the new MMO for the new base load. It is like the original MMO,
	// but represents an area in memory almost twice the vector size centered
	// on the original address. If the address is unaligned, we might start
	// reading up to (sizeof(vector)-1) bytes below the address of the
	// original unaligned load.
	MachineFunction &MF = DAG.getMachineFunction();
	MachineMemOperand *BaseMMO =
	MF.getMachineMemOperand(LD->getMemOperand(),
	-(int64_t)MemVT.getStoreSize()+1,
	2*MemVT.getStoreSize()-1);

	// Create the new base load.
	SDValue LDXIntID =
	DAG.getTargetConstant(IntrLD, dl, getPointerTy(MF.getDataLayout()));
	SDValue BaseLoadOps[] = { Chain, LDXIntID, Ptr };
	SDValue BaseLoad =
	DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, dl,
	DAG.getVTList(PermTy, MVT::Other),
	BaseLoadOps, LDTy, BaseMMO);

	// Note that the value of IncOffset (which is provided to the next
	// load's pointer info offset value, and thus used to calculate the
	// alignment), and the value of IncValue (which is actually used to
	// increment the pointer value) are different! This is because we
	// require the next load to appear to be aligned, even though it
	// is actually offset from the base pointer by a lesser amount.
	int IncOffset = VT.getSizeInBits() / 8;
	int IncValue = IncOffset;

	// Walk (both up and down) the chain looking for another load at the real
	// (aligned) offset (the alignment of the other load does not matter in
	// this case). If found, then do not use the offset reduction trick, as
	// that will prevent the loads from being later combined (as they would
	// otherwise be duplicates).
	if (!findConsecutiveLoad(LD, DAG))
	--IncValue;

	SDValue Increment =
	DAG.getConstant(IncValue, dl, getPointerTy(MF.getDataLayout()));
	Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, Increment);

	MachineMemOperand *ExtraMMO =
	MF.getMachineMemOperand(LD->getMemOperand(),
	1, 2*MemVT.getStoreSize()-1);
	SDValue ExtraLoadOps[] = { Chain, LDXIntID, Ptr };
	SDValue ExtraLoad =
	DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, dl,
	DAG.getVTList(PermTy, MVT::Other),
	ExtraLoadOps, LDTy, ExtraMMO);

	SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
	BaseLoad.getValue(1), ExtraLoad.getValue(1));

	// Because vperm has a big-endian bias, we must reverse the order
	// of the input vectors and complement the permute control vector
	// when generating little endian code. We have already handled the
	// latter by using lvsr instead of lvsl, so just reverse BaseLoad
	// and ExtraLoad here.
	SDValue Perm;
	if (isLittleEndian)
	Perm = BuildIntrinsicOp(IntrPerm,
	ExtraLoad, BaseLoad, PermCntl, DAG, dl);
	else
	Perm = BuildIntrinsicOp(IntrPerm,
	BaseLoad, ExtraLoad, PermCntl, DAG, dl);

	if (VT != PermTy)
	Perm = Subtarget.hasAltivec()
	? DAG.getNode(ISD::BITCAST, dl, VT, Perm)
	: DAG.getNode(ISD::FP_ROUND, dl, VT, Perm,
	DAG.getTargetConstant(1, dl, MVT::i64));
	// second argument is 1 because this rounding
	// is always exact.

	// The output of the permutation is our loaded result, the TokenFactor is
	// our new chain.
	DCI.CombineTo(N, Perm, TF);
	return SDValue(N, 0);
	}
	}
	break;
	case ISD::INTRINSIC_WO_CHAIN: {
	bool isLittleEndian = Subtarget.isLittleEndian();
	unsigned IID = N->getConstantOperandVal(0);
	Intrinsic::ID Intr = (isLittleEndian ? Intrinsic::ppc_altivec_lvsr
	: Intrinsic::ppc_altivec_lvsl);
	if (IID == Intr && N->getOperand(1)->getOpcode() == ISD::ADD) {
	SDValue Add = N->getOperand(1);

	int Bits = 4 /* 16 byte alignment */;

	if (DAG.MaskedValueIsZero(Add->getOperand(1),
	APInt::getAllOnes(Bits /* alignment */)
	.zext(Add.getScalarValueSizeInBits()))) {
	SDNode *BasePtr = Add->getOperand(0).getNode();
	for (SDNode *U : BasePtr->uses()) {
	if (U->getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
	U->getConstantOperandVal(0) == IID) {
	// We've found another LVSL/LVSR, and this address is an aligned
	// multiple of that one. The results will be the same, so use the
	// one we've just found instead.

	return SDValue(U, 0);
	}
	}
	}

	if (isa<ConstantSDNode>(Add->getOperand(1))) {
	SDNode *BasePtr = Add->getOperand(0).getNode();
	for (SDNode *U : BasePtr->uses()) {
	if (U->getOpcode() == ISD::ADD &&
	isa<ConstantSDNode>(U->getOperand(1)) &&
	(Add->getConstantOperandVal(1) - U->getConstantOperandVal(1)) %
	(1ULL << Bits) ==
	0) {
	SDNode *OtherAdd = U;
	for (SDNode *V : OtherAdd->uses()) {
	if (V->getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
	V->getConstantOperandVal(0) == IID) {
	return SDValue(V, 0);
	}
	}
	}
	}
	}
	}

	// Combine vmaxsw/h/b(a, a's negation) to abs(a)
	// Expose the vabsduw/h/b opportunity for down stream
	if (!DCI.isAfterLegalizeDAG() && Subtarget.hasP9Altivec() &&
	(IID == Intrinsic::ppc_altivec_vmaxsw \|\|
	IID == Intrinsic::ppc_altivec_vmaxsh \|\|
	IID == Intrinsic::ppc_altivec_vmaxsb)) {
	SDValue V1 = N->getOperand(1);
	SDValue V2 = N->getOperand(2);
	if ((V1.getSimpleValueType() == MVT::v4i32 \|\|
	V1.getSimpleValueType() == MVT::v8i16 \|\|
	V1.getSimpleValueType() == MVT::v16i8) &&
	V1.getSimpleValueType() == V2.getSimpleValueType()) {
	// (0-a, a)
	if (V1.getOpcode() == ISD::SUB &&
	ISD::isBuildVectorAllZeros(V1.getOperand(0).getNode()) &&
	V1.getOperand(1) == V2) {
	return DAG.getNode(ISD::ABS, dl, V2.getValueType(), V2);
	}
	// (a, 0-a)
	if (V2.getOpcode() == ISD::SUB &&
	ISD::isBuildVectorAllZeros(V2.getOperand(0).getNode()) &&
	V2.getOperand(1) == V1) {
	return DAG.getNode(ISD::ABS, dl, V1.getValueType(), V1);
	}
	// (x-y, y-x)
	if (V1.getOpcode() == ISD::SUB && V2.getOpcode() == ISD::SUB &&
	V1.getOperand(0) == V2.getOperand(1) &&
	V1.getOperand(1) == V2.getOperand(0)) {
	return DAG.getNode(ISD::ABS, dl, V1.getValueType(), V1);
	}
	}
	}
	}

	break;
	case ISD::INTRINSIC_W_CHAIN:
	switch (N->getConstantOperandVal(1)) {
	default:
	break;
	case Intrinsic::ppc_altivec_vsum4sbs:
	case Intrinsic::ppc_altivec_vsum4shs:
	case Intrinsic::ppc_altivec_vsum4ubs: {
	// These sum-across intrinsics only have a chain due to the side effect
	// that they may set the SAT bit. If we know the SAT bit will not be set
	// for some inputs, we can replace any uses of their chain with the
	// input chain.
	if (BuildVectorSDNode *BVN =
	dyn_cast<BuildVectorSDNode>(N->getOperand(3))) {
	APInt APSplatBits, APSplatUndef;
	unsigned SplatBitSize;
	bool HasAnyUndefs;
	bool BVNIsConstantSplat = BVN->isConstantSplat(
	APSplatBits, APSplatUndef, SplatBitSize, HasAnyUndefs, 0,
	!Subtarget.isLittleEndian());
	// If the constant splat vector is 0, the SAT bit will not be set.
	if (BVNIsConstantSplat && APSplatBits == 0)
	DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), N->getOperand(0));
	}
	return SDValue();
	}
	case Intrinsic::ppc_vsx_lxvw4x:
	case Intrinsic::ppc_vsx_lxvd2x:
	// For little endian, VSX loads require generating lxvd2x/xxswapd.
	// Not needed on ISA 3.0 based CPUs since we have a non-permuting load.
	if (Subtarget.needsSwapsForVSXMemOps())
	return expandVSXLoadForLE(N, DCI);
	break;
	}
	break;
	case ISD::INTRINSIC_VOID:
	// For little endian, VSX stores require generating xxswapd/stxvd2x.
	// Not needed on ISA 3.0 based CPUs since we have a non-permuting store.
	if (Subtarget.needsSwapsForVSXMemOps()) {
	switch (N->getConstantOperandVal(1)) {
	default:
	break;
	case Intrinsic::ppc_vsx_stxvw4x:
	case Intrinsic::ppc_vsx_stxvd2x:
	return expandVSXStoreForLE(N, DCI);
	}
	}
	break;
	case ISD::BSWAP: {
	// Turn BSWAP (LOAD) -> lhbrx/lwbrx.
	// For subtargets without LDBRX, we can still do better than the default
	// expansion even for 64-bit BSWAP (LOAD).
	bool Is64BitBswapOn64BitTgt =
	Subtarget.isPPC64() && N->getValueType(0) == MVT::i64;
	bool IsSingleUseNormalLd = ISD::isNormalLoad(N->getOperand(0).getNode()) &&
	N->getOperand(0).hasOneUse();
	if (IsSingleUseNormalLd &&
	(N->getValueType(0) == MVT::i32 \|\| N->getValueType(0) == MVT::i16 \|\|
	(Subtarget.hasLDBRX() && Is64BitBswapOn64BitTgt))) {
	SDValue Load = N->getOperand(0);
	LoadSDNode *LD = cast<LoadSDNode>(Load);
	// Create the byte-swapping load.
	SDValue Ops[] = {
	LD->getChain(), // Chain
	LD->getBasePtr(), // Ptr
	DAG.getValueType(N->getValueType(0)) // VT
	};
	SDValue BSLoad =
	DAG.getMemIntrinsicNode(PPCISD::LBRX, dl,
	DAG.getVTList(N->getValueType(0) == MVT::i64 ?
	MVT::i64 : MVT::i32, MVT::Other),
	Ops, LD->getMemoryVT(), LD->getMemOperand());

	// If this is an i16 load, insert the truncate.
	SDValue ResVal = BSLoad;
	if (N->getValueType(0) == MVT::i16)
	ResVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i16, BSLoad);

	// First, combine the bswap away. This makes the value produced by the
	// load dead.
	DCI.CombineTo(N, ResVal);

	// Next, combine the load away, we give it a bogus result value but a real
	// chain result. The result value is dead because the bswap is dead.
	DCI.CombineTo(Load.getNode(), ResVal, BSLoad.getValue(1));

	// Return N so it doesn't get rechecked!
	return SDValue(N, 0);
	}
	// Convert this to two 32-bit bswap loads and a BUILD_PAIR. Do this only
	// before legalization so that the BUILD_PAIR is handled correctly.
	if (!DCI.isBeforeLegalize() \|\| !Is64BitBswapOn64BitTgt \|\|
	!IsSingleUseNormalLd)
	return SDValue();
	LoadSDNode *LD = cast<LoadSDNode>(N->getOperand(0));

	// Can't split volatile or atomic loads.
	if (!LD->isSimple())
	return SDValue();
	SDValue BasePtr = LD->getBasePtr();
	SDValue Lo = DAG.getLoad(MVT::i32, dl, LD->getChain(), BasePtr,
	LD->getPointerInfo(), LD->getAlign());
	Lo = DAG.getNode(ISD::BSWAP, dl, MVT::i32, Lo);
	BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
	DAG.getIntPtrConstant(4, dl));
	MachineMemOperand *NewMMO = DAG.getMachineFunction().getMachineMemOperand(
	LD->getMemOperand(), 4, 4);
	SDValue Hi = DAG.getLoad(MVT::i32, dl, LD->getChain(), BasePtr, NewMMO);
	Hi = DAG.getNode(ISD::BSWAP, dl, MVT::i32, Hi);
	SDValue Res;
	if (Subtarget.isLittleEndian())
	Res = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Hi, Lo);
	else
	Res = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi);
	SDValue TF =
	DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
	Hi.getOperand(0).getValue(1), Lo.getOperand(0).getValue(1));
	DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), TF);
	return Res;
	}
	case PPCISD::VCMP:
	// If a VCMP_rec node already exists with exactly the same operands as this
	// node, use its result instead of this node (VCMP_rec computes both a CR6
	// and a normal output).
	//
	if (!N->getOperand(0).hasOneUse() &&
	!N->getOperand(1).hasOneUse() &&
	!N->getOperand(2).hasOneUse()) {

	// Scan all of the users of the LHS, looking for VCMP_rec's that match.
	SDNode *VCMPrecNode = nullptr;

	SDNode *LHSN = N->getOperand(0).getNode();
	for (SDNode::use_iterator UI = LHSN->use_begin(), E = LHSN->use_end();
	UI != E; ++UI)
	if (UI->getOpcode() == PPCISD::VCMP_rec &&
	UI->getOperand(1) == N->getOperand(1) &&
	UI->getOperand(2) == N->getOperand(2) &&
	UI->getOperand(0) == N->getOperand(0)) {
	VCMPrecNode = *UI;
	break;
	}

	// If there is no VCMP_rec node, or if the flag value has a single use,
	// don't transform this.
	if (!VCMPrecNode \|\| VCMPrecNode->hasNUsesOfValue(0, 1))
	break;

	// Look at the (necessarily single) use of the flag value. If it has a
	// chain, this transformation is more complex. Note that multiple things
	// could use the value result, which we should ignore.
	SDNode *FlagUser = nullptr;
	for (SDNode::use_iterator UI = VCMPrecNode->use_begin();
	FlagUser == nullptr; ++UI) {
	assert(UI != VCMPrecNode->use_end() && "Didn't find user!");
	SDNode User = UI;
	for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i) {
	if (User->getOperand(i) == SDValue(VCMPrecNode, 1)) {
	FlagUser = User;
	break;
	}
	}
	}

	// If the user is a MFOCRF instruction, we know this is safe.
	// Otherwise we give up for right now.
	if (FlagUser->getOpcode() == PPCISD::MFOCRF)
	return SDValue(VCMPrecNode, 0);
	}
	break;
	case ISD::BR_CC: {
	// If this is a branch on an altivec predicate comparison, lower this so
	// that we don't have to do a MFOCRF: instead, branch directly on CR6. This
	// lowering is done pre-legalize, because the legalizer lowers the predicate
	// compare down to code that is difficult to reassemble.
	// This code also handles branches that depend on the result of a store
	// conditional.
	ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(1))->get();
	SDValue LHS = N->getOperand(2), RHS = N->getOperand(3);

	int CompareOpc;
	bool isDot;

	if (!isa<ConstantSDNode>(RHS) \|\| (CC != ISD::SETEQ && CC != ISD::SETNE))
	break;

	// Since we are doing this pre-legalize, the RHS can be a constant of
	// arbitrary bitwidth which may cause issues when trying to get the value
	// from the underlying APInt.
	auto RHSAPInt = RHS->getAsAPIntVal();
	if (!RHSAPInt.isIntN(64))
	break;

	unsigned Val = RHSAPInt.getZExtValue();
	auto isImpossibleCompare = [&]() {
	// If this is a comparison against something other than 0/1, then we know
	// that the condition is never/always true.
	if (Val != 0 && Val != 1) {
	if (CC == ISD::SETEQ) // Cond never true, remove branch.
	return N->getOperand(0);
	// Always !=, turn it into an unconditional branch.
	return DAG.getNode(ISD::BR, dl, MVT::Other,
	N->getOperand(0), N->getOperand(4));
	}
	return SDValue();
	};
	// Combine branches fed by store conditional instructions (st[bhwd]cx).
	unsigned StoreWidth = 0;
	if (LHS.getOpcode() == ISD::INTRINSIC_W_CHAIN &&
	isStoreConditional(LHS, StoreWidth)) {
	if (SDValue Impossible = isImpossibleCompare())
	return Impossible;
	PPC::Predicate CompOpc;
	// eq 0 => ne
	// ne 0 => eq
	// eq 1 => eq
	// ne 1 => ne
	if (Val == 0)
	CompOpc = CC == ISD::SETEQ ? PPC::PRED_NE : PPC::PRED_EQ;
	else
	CompOpc = CC == ISD::SETEQ ? PPC::PRED_EQ : PPC::PRED_NE;

	SDValue Ops[] = {LHS.getOperand(0), LHS.getOperand(2), LHS.getOperand(3),
	DAG.getConstant(StoreWidth, dl, MVT::i32)};
	auto *MemNode = cast<MemSDNode>(LHS);
	SDValue ConstSt = DAG.getMemIntrinsicNode(
	PPCISD::STORE_COND, dl,
	DAG.getVTList(MVT::i32, MVT::Other, MVT::Glue), Ops,
	MemNode->getMemoryVT(), MemNode->getMemOperand());

	SDValue InChain;
	// Unchain the branch from the original store conditional.
	if (N->getOperand(0) == LHS.getValue(1))
	InChain = LHS.getOperand(0);
	else if (N->getOperand(0).getOpcode() == ISD::TokenFactor) {
	SmallVector<SDValue, 4> InChains;
	SDValue InTF = N->getOperand(0);
	for (int i = 0, e = InTF.getNumOperands(); i < e; i++)
	if (InTF.getOperand(i) != LHS.getValue(1))
	InChains.push_back(InTF.getOperand(i));
	InChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, InChains);
	}

	return DAG.getNode(PPCISD::COND_BRANCH, dl, MVT::Other, InChain,
	DAG.getConstant(CompOpc, dl, MVT::i32),
	DAG.getRegister(PPC::CR0, MVT::i32), N->getOperand(4),
	ConstSt.getValue(2));
	}

	if (LHS.getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
	getVectorCompareInfo(LHS, CompareOpc, isDot, Subtarget)) {
	assert(isDot && "Can't compare against a vector result!");

	if (SDValue Impossible = isImpossibleCompare())
	return Impossible;

	bool BranchOnWhenPredTrue = (CC == ISD::SETEQ) ^ (Val == 0);
	// Create the PPCISD altivec 'dot' comparison node.
	SDValue Ops[] = {
	LHS.getOperand(2), // LHS of compare
	LHS.getOperand(3), // RHS of compare
	DAG.getConstant(CompareOpc, dl, MVT::i32)
	};
	EVT VTs[] = { LHS.getOperand(2).getValueType(), MVT::Glue };
	SDValue CompNode = DAG.getNode(PPCISD::VCMP_rec, dl, VTs, Ops);

	// Unpack the result based on how the target uses it.
	PPC::Predicate CompOpc;
	switch (LHS.getConstantOperandVal(1)) {
	default: // Can't happen, don't crash on invalid number though.
	case 0: // Branch on the value of the EQ bit of CR6.
	CompOpc = BranchOnWhenPredTrue ? PPC::PRED_EQ : PPC::PRED_NE;
	break;
	case 1: // Branch on the inverted value of the EQ bit of CR6.
	CompOpc = BranchOnWhenPredTrue ? PPC::PRED_NE : PPC::PRED_EQ;
	break;
	case 2: // Branch on the value of the LT bit of CR6.
	CompOpc = BranchOnWhenPredTrue ? PPC::PRED_LT : PPC::PRED_GE;
	break;
	case 3: // Branch on the inverted value of the LT bit of CR6.
	CompOpc = BranchOnWhenPredTrue ? PPC::PRED_GE : PPC::PRED_LT;
	break;
	}

	return DAG.getNode(PPCISD::COND_BRANCH, dl, MVT::Other, N->getOperand(0),
	DAG.getConstant(CompOpc, dl, MVT::i32),
	DAG.getRegister(PPC::CR6, MVT::i32),
	N->getOperand(4), CompNode.getValue(1));
	}
	break;
	}
	case ISD::BUILD_VECTOR:
	return DAGCombineBuildVector(N, DCI);
	}

	return SDValue();
	}

	SDValue
	PPCTargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
	SelectionDAG &DAG,
	SmallVectorImpl<SDNode *> &Created) const {
	// fold (sdiv X, pow2)
	EVT VT = N->getValueType(0);
	if (VT == MVT::i64 && !Subtarget.isPPC64())
	return SDValue();
	if ((VT != MVT::i32 && VT != MVT::i64) \|\|
	!(Divisor.isPowerOf2() \|\| Divisor.isNegatedPowerOf2()))
	return SDValue();

	SDLoc DL(N);
	SDValue N0 = N->getOperand(0);

	bool IsNegPow2 = Divisor.isNegatedPowerOf2();
	unsigned Lg2 = (IsNegPow2 ? -Divisor : Divisor).countr_zero();
	SDValue ShiftAmt = DAG.getConstant(Lg2, DL, VT);

	SDValue Op = DAG.getNode(PPCISD::SRA_ADDZE, DL, VT, N0, ShiftAmt);
	Created.push_back(Op.getNode());

	if (IsNegPow2) {
	Op = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Op);
	Created.push_back(Op.getNode());
	}

	return Op;
	}

	//===----------------------------------------------------------------------===//
	// Inline Assembly Support
	//===----------------------------------------------------------------------===//

	void PPCTargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
	KnownBits &Known,
	const APInt &DemandedElts,
	const SelectionDAG &DAG,
	unsigned Depth) const {
	Known.resetAll();
	switch (Op.getOpcode()) {
	default: break;
	case PPCISD::LBRX: {
	// lhbrx is known to have the top bits cleared out.
	if (cast<VTSDNode>(Op.getOperand(2))->getVT() == MVT::i16)
	Known.Zero = 0xFFFF0000;
	break;
	}
	case ISD::INTRINSIC_WO_CHAIN: {
	switch (Op.getConstantOperandVal(0)) {
	default: break;
	case Intrinsic::ppc_altivec_vcmpbfp_p:
	case Intrinsic::ppc_altivec_vcmpeqfp_p:
	case Intrinsic::ppc_altivec_vcmpequb_p:
	case Intrinsic::ppc_altivec_vcmpequh_p:
	case Intrinsic::ppc_altivec_vcmpequw_p:
	case Intrinsic::ppc_altivec_vcmpequd_p:
	case Intrinsic::ppc_altivec_vcmpequq_p:
	case Intrinsic::ppc_altivec_vcmpgefp_p:
	case Intrinsic::ppc_altivec_vcmpgtfp_p:
	case Intrinsic::ppc_altivec_vcmpgtsb_p:
	case Intrinsic::ppc_altivec_vcmpgtsh_p:
	case Intrinsic::ppc_altivec_vcmpgtsw_p:
	case Intrinsic::ppc_altivec_vcmpgtsd_p:
	case Intrinsic::ppc_altivec_vcmpgtsq_p:
	case Intrinsic::ppc_altivec_vcmpgtub_p:
	case Intrinsic::ppc_altivec_vcmpgtuh_p:
	case Intrinsic::ppc_altivec_vcmpgtuw_p:
	case Intrinsic::ppc_altivec_vcmpgtud_p:
	case Intrinsic::ppc_altivec_vcmpgtuq_p:
	Known.Zero = ~1U; // All bits but the low one are known to be zero.
	break;
	}
	break;
	}
	case ISD::INTRINSIC_W_CHAIN: {
	switch (Op.getConstantOperandVal(1)) {
	default:
	break;
	case Intrinsic::ppc_load2r:
	// Top bits are cleared for load2r (which is the same as lhbrx).
	Known.Zero = 0xFFFF0000;
	break;
	}
	break;
	}
	}
	}

	Align PPCTargetLowering::getPrefLoopAlignment(MachineLoop *ML) const {
	switch (Subtarget.getCPUDirective()) {
	default: break;
	case PPC::DIR_970:
	case PPC::DIR_PWR4:
	case PPC::DIR_PWR5:
	case PPC::DIR_PWR5X:
	case PPC::DIR_PWR6:
	case PPC::DIR_PWR6X:
	case PPC::DIR_PWR7:
	case PPC::DIR_PWR8:
	case PPC::DIR_PWR9:
	case PPC::DIR_PWR10:
	case PPC::DIR_PWR11:
	case PPC::DIR_PWR_FUTURE: {
	if (!ML)
	break;

	if (!DisableInnermostLoopAlign32) {
	// If the nested loop is an innermost loop, prefer to a 32-byte alignment,
	// so that we can decrease cache misses and branch-prediction misses.
	// Actual alignment of the loop will depend on the hotness check and other
	// logic in alignBlocks.
	if (ML->getLoopDepth() > 1 && ML->getSubLoops().empty())
	return Align(32);
	}

	const PPCInstrInfo *TII = Subtarget.getInstrInfo();

	// For small loops (between 5 and 8 instructions), align to a 32-byte
	// boundary so that the entire loop fits in one instruction-cache line.
	uint64_t LoopSize = 0;
	for (auto I = ML->block_begin(), IE = ML->block_end(); I != IE; ++I)
	for (const MachineInstr &J : **I) {
	LoopSize += TII->getInstSizeInBytes(J);
	if (LoopSize > 32)
	break;
	}

	if (LoopSize > 16 && LoopSize <= 32)
	return Align(32);

	break;
	}
	}

	return TargetLowering::getPrefLoopAlignment(ML);
	}

	/// getConstraintType - Given a constraint, return the type of
	/// constraint it is for this target.
	PPCTargetLowering::ConstraintType
	PPCTargetLowering::getConstraintType(StringRef Constraint) const {
	if (Constraint.size() == 1) {
	switch (Constraint[0]) {
	default: break;
	case 'b':
	case 'r':
	case 'f':
	case 'd':
	case 'v':
	case 'y':
	return C_RegisterClass;
	case 'Z':
	// FIXME: While Z does indicate a memory constraint, it specifically
	// indicates an r+r address (used in conjunction with the 'y' modifier
	// in the replacement string). Currently, we're forcing the base
	// register to be r0 in the asm printer (which is interpreted as zero)
	// and forming the complete address in the second register. This is
	// suboptimal.
	return C_Memory;
	}
	} else if (Constraint == "wc") { // individual CR bits.
	return C_RegisterClass;
	} else if (Constraint == "wa" \|\| Constraint == "wd" \|\|
	Constraint == "wf" \|\| Constraint == "ws" \|\|
	Constraint == "wi" \|\| Constraint == "ww") {
	return C_RegisterClass; // VSX registers.
	}
	return TargetLowering::getConstraintType(Constraint);
	}

	/// Examine constraint type and operand type and determine a weight value.
	/// This object must already have been set up with the operand type
	/// and the current alternative constraint selected.
	TargetLowering::ConstraintWeight
	PPCTargetLowering::getSingleConstraintMatchWeight(
	AsmOperandInfo &info, const char *constraint) const {
	ConstraintWeight weight = CW_Invalid;
	Value *CallOperandVal = info.CallOperandVal;
	// If we don't have a value, we can't do a match,
	// but allow it at the lowest weight.
	if (!CallOperandVal)
	return CW_Default;
	Type *type = CallOperandVal->getType();

	// Look at the constraint type.
	if (StringRef(constraint) == "wc" && type->isIntegerTy(1))
	return CW_Register; // an individual CR bit.
	else if ((StringRef(constraint) == "wa" \|\|
	StringRef(constraint) == "wd" \|\|
	StringRef(constraint) == "wf") &&
	type->isVectorTy())
	return CW_Register;
	else if (StringRef(constraint) == "wi" && type->isIntegerTy(64))
	return CW_Register; // just hold 64-bit integers data.
	else if (StringRef(constraint) == "ws" && type->isDoubleTy())
	return CW_Register;
	else if (StringRef(constraint) == "ww" && type->isFloatTy())
	return CW_Register;

	switch (*constraint) {
	default:
	weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint);
	break;
	case 'b':
	if (type->isIntegerTy())
	weight = CW_Register;
	break;
	case 'f':
	if (type->isFloatTy())
	weight = CW_Register;
	break;
	case 'd':
	if (type->isDoubleTy())
	weight = CW_Register;
	break;
	case 'v':
	if (type->isVectorTy())
	weight = CW_Register;
	break;
	case 'y':
	weight = CW_Register;
	break;
	case 'Z':
	weight = CW_Memory;
	break;
	}
	return weight;
	}

	std::pair<unsigned, const TargetRegisterClass *>
	PPCTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
	StringRef Constraint,
	MVT VT) const {
	if (Constraint.size() == 1) {
	// GCC RS6000 Constraint Letters
	switch (Constraint[0]) {
	case 'b': // R1-R31
	if (VT == MVT::i64 && Subtarget.isPPC64())
	return std::make_pair(0U, &PPC::G8RC_NOX0RegClass);
	return std::make_pair(0U, &PPC::GPRC_NOR0RegClass);
	case 'r': // R0-R31
	if (VT == MVT::i64 && Subtarget.isPPC64())
	return std::make_pair(0U, &PPC::G8RCRegClass);
	return std::make_pair(0U, &PPC::GPRCRegClass);
	// 'd' and 'f' constraints are both defined to be "the floating point
	// registers", where one is for 32-bit and the other for 64-bit. We don't
	// really care overly much here so just give them all the same reg classes.
	case 'd':
	case 'f':
	if (Subtarget.hasSPE()) {
	if (VT == MVT::f32 \|\| VT == MVT::i32)
	return std::make_pair(0U, &PPC::GPRCRegClass);
	if (VT == MVT::f64 \|\| VT == MVT::i64)
	return std::make_pair(0U, &PPC::SPERCRegClass);
	} else {
	if (VT == MVT::f32 \|\| VT == MVT::i32)
	return std::make_pair(0U, &PPC::F4RCRegClass);
	if (VT == MVT::f64 \|\| VT == MVT::i64)
	return std::make_pair(0U, &PPC::F8RCRegClass);
	}
	break;
	case 'v':
	if (Subtarget.hasAltivec() && VT.isVector())
	return std::make_pair(0U, &PPC::VRRCRegClass);
	else if (Subtarget.hasVSX())
	// Scalars in Altivec registers only make sense with VSX.
	return std::make_pair(0U, &PPC::VFRCRegClass);
	break;
	case 'y': // crrc
	return std::make_pair(0U, &PPC::CRRCRegClass);
	}
	} else if (Constraint == "wc" && Subtarget.useCRBits()) {
	// An individual CR bit.
	return std::make_pair(0U, &PPC::CRBITRCRegClass);
	} else if ((Constraint == "wa" \|\| Constraint == "wd" \|\|
	Constraint == "wf" \|\| Constraint == "wi") &&
	Subtarget.hasVSX()) {
	// A VSX register for either a scalar (FP) or vector. There is no
	// support for single precision scalars on subtargets prior to Power8.
	if (VT.isVector())
	return std::make_pair(0U, &PPC::VSRCRegClass);
	if (VT == MVT::f32 && Subtarget.hasP8Vector())
	return std::make_pair(0U, &PPC::VSSRCRegClass);
	return std::make_pair(0U, &PPC::VSFRCRegClass);
	} else if ((Constraint == "ws" \|\| Constraint == "ww") && Subtarget.hasVSX()) {
	if (VT == MVT::f32 && Subtarget.hasP8Vector())
	return std::make_pair(0U, &PPC::VSSRCRegClass);
	else
	return std::make_pair(0U, &PPC::VSFRCRegClass);
	} else if (Constraint == "lr") {
	if (VT == MVT::i64)
	return std::make_pair(0U, &PPC::LR8RCRegClass);
	else
	return std::make_pair(0U, &PPC::LRRCRegClass);
	}

	// Handle special cases of physical registers that are not properly handled
	// by the base class.
	if (Constraint[0] == '{' && Constraint[Constraint.size() - 1] == '}') {
	// If we name a VSX register, we can't defer to the base class because it
	// will not recognize the correct register (their names will be VSL{0-31}
	// and V{0-31} so they won't match). So we match them here.
	if (Constraint.size() > 3 && Constraint[1] == 'v' && Constraint[2] == 's') {
	int VSNum = atoi(Constraint.data() + 3);
	assert(VSNum >= 0 && VSNum <= 63 &&
	"Attempted to access a vsr out of range");
	if (VSNum < 32)
	return std::make_pair(PPC::VSL0 + VSNum, &PPC::VSRCRegClass);
	return std::make_pair(PPC::V0 + VSNum - 32, &PPC::VSRCRegClass);
	}

	// For float registers, we can't defer to the base class as it will match
	// the SPILLTOVSRRC class.
	if (Constraint.size() > 3 && Constraint[1] == 'f') {
	int RegNum = atoi(Constraint.data() + 2);
	if (RegNum > 31 \|\| RegNum < 0)
	report_fatal_error("Invalid floating point register number");
	if (VT == MVT::f32 \|\| VT == MVT::i32)
	return Subtarget.hasSPE()
	? std::make_pair(PPC::R0 + RegNum, &PPC::GPRCRegClass)
	: std::make_pair(PPC::F0 + RegNum, &PPC::F4RCRegClass);
	if (VT == MVT::f64 \|\| VT == MVT::i64)
	return Subtarget.hasSPE()
	? std::make_pair(PPC::S0 + RegNum, &PPC::SPERCRegClass)
	: std::make_pair(PPC::F0 + RegNum, &PPC::F8RCRegClass);
	}
	}

	std::pair<unsigned, const TargetRegisterClass *> R =
	TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);

	// r[0-9]+ are used, on PPC64, to refer to the corresponding 64-bit registers
	// (which we call X[0-9]+). If a 64-bit value has been requested, and a
	// 32-bit GPR has been selected, then 'upgrade' it to the 64-bit parent
	// register.
	// FIXME: If TargetLowering::getRegForInlineAsmConstraint could somehow use
	// the AsmName field from *RegisterInfo.td, then this would not be necessary.
	if (R.first && VT == MVT::i64 && Subtarget.isPPC64() &&
	PPC::GPRCRegClass.contains(R.first))
	return std::make_pair(TRI->getMatchingSuperReg(R.first,
	PPC::sub_32, &PPC::G8RCRegClass),
	&PPC::G8RCRegClass);

	// GCC accepts 'cc' as an alias for 'cr0', and we need to do the same.
	if (!R.second && StringRef("{cc}").equals_insensitive(Constraint)) {
	R.first = PPC::CR0;
	R.second = &PPC::CRRCRegClass;
	}
	// FIXME: This warning should ideally be emitted in the front end.
	const auto &TM = getTargetMachine();
	if (Subtarget.isAIXABI() && !TM.getAIXExtendedAltivecABI()) {
	if (((R.first >= PPC::V20 && R.first <= PPC::V31) \|\|
	(R.first >= PPC::VF20 && R.first <= PPC::VF31)) &&
	(R.second == &PPC::VSRCRegClass \|\| R.second == &PPC::VSFRCRegClass))
	errs() << "warning: vector registers 20 to 32 are reserved in the "
	"default AIX AltiVec ABI and cannot be used\n";
	}

	return R;
	}

	/// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
	/// vector. If it is invalid, don't add anything to Ops.
	void PPCTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
	StringRef Constraint,
	std::vector<SDValue> &Ops,
	SelectionDAG &DAG) const {
	SDValue Result;

	// Only support length 1 constraints.
	if (Constraint.size() > 1)
	return;

	char Letter = Constraint[0];
	switch (Letter) {
	default: break;
	case 'I':
	case 'J':
	case 'K':
	case 'L':
	case 'M':
	case 'N':
	case 'O':
	case 'P': {
	ConstantSDNode *CST = dyn_cast<ConstantSDNode>(Op);
	if (!CST) return; // Must be an immediate to match.
	SDLoc dl(Op);
	int64_t Value = CST->getSExtValue();
	EVT TCVT = MVT::i64; // All constants taken to be 64 bits so that negative
	// numbers are printed as such.
	switch (Letter) {
	default: llvm_unreachable("Unknown constraint letter!");
	case 'I': // "I" is a signed 16-bit constant.
	if (isInt<16>(Value))
	Result = DAG.getTargetConstant(Value, dl, TCVT);
	break;
	case 'J': // "J" is a constant with only the high-order 16 bits nonzero.
	if (isShiftedUInt<16, 16>(Value))
	Result = DAG.getTargetConstant(Value, dl, TCVT);
	break;
	case 'L': // "L" is a signed 16-bit constant shifted left 16 bits.
	if (isShiftedInt<16, 16>(Value))
	Result = DAG.getTargetConstant(Value, dl, TCVT);
	break;
	case 'K': // "K" is a constant with only the low-order 16 bits nonzero.
	if (isUInt<16>(Value))
	Result = DAG.getTargetConstant(Value, dl, TCVT);
	break;
	case 'M': // "M" is a constant that is greater than 31.
	if (Value > 31)
	Result = DAG.getTargetConstant(Value, dl, TCVT);
	break;
	case 'N': // "N" is a positive constant that is an exact power of two.
	if (Value > 0 && isPowerOf2_64(Value))
	Result = DAG.getTargetConstant(Value, dl, TCVT);
	break;
	case 'O': // "O" is the constant zero.
	if (Value == 0)
	Result = DAG.getTargetConstant(Value, dl, TCVT);
	break;
	case 'P': // "P" is a constant whose negation is a signed 16-bit constant.
	if (isInt<16>(-Value))
	Result = DAG.getTargetConstant(Value, dl, TCVT);
	break;
	}
	break;
	}
	}

	if (Result.getNode()) {
	Ops.push_back(Result);
	return;
	}

	// Handle standard constraint letters.
	TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
	}

	void PPCTargetLowering::CollectTargetIntrinsicOperands(const CallInst &I,
	SmallVectorImpl<SDValue> &Ops,
	SelectionDAG &DAG) const {
	if (I.getNumOperands() <= 1)
	return;
	if (!isa<ConstantSDNode>(Ops[1].getNode()))
	return;
	auto IntrinsicID = Ops[1].getNode()->getAsZExtVal();
	if (IntrinsicID != Intrinsic::ppc_tdw && IntrinsicID != Intrinsic::ppc_tw &&
	IntrinsicID != Intrinsic::ppc_trapd && IntrinsicID != Intrinsic::ppc_trap)
	return;

	if (MDNode *MDN = I.getMetadata(LLVMContext::MD_annotation))
	Ops.push_back(DAG.getMDNode(MDN));
	}

	// isLegalAddressingMode - Return true if the addressing mode represented
	// by AM is legal for this target, for a load/store of the specified type.
	bool PPCTargetLowering::isLegalAddressingMode(const DataLayout &DL,
	const AddrMode &AM, Type *Ty,
	unsigned AS,
	Instruction *I) const {
	// Vector type r+i form is supported since power9 as DQ form. We don't check
	// the offset matching DQ form requirement(off % 16 == 0), because on PowerPC,
	// imm form is preferred and the offset can be adjusted to use imm form later
	// in pass PPCLoopInstrFormPrep. Also in LSR, for one LSRUse, it uses min and
	// max offset to check legal addressing mode, we should be a little aggressive
	// to contain other offsets for that LSRUse.
	if (Ty->isVectorTy() && AM.BaseOffs != 0 && !Subtarget.hasP9Vector())
	return false;

	// PPC allows a sign-extended 16-bit immediate field.
	if (AM.BaseOffs <= -(1LL << 16) \|\| AM.BaseOffs >= (1LL << 16)-1)
	return false;

	// No global is ever allowed as a base.
	if (AM.BaseGV)
	return false;

	// PPC only support r+r,
	switch (AM.Scale) {
	case 0: // "r+i" or just "i", depending on HasBaseReg.
	break;
	case 1:
	if (AM.HasBaseReg && AM.BaseOffs) // "r+r+i" is not allowed.
	return false;
	// Otherwise we have r+r or r+i.
	break;
	case 2:
	if (AM.HasBaseReg \|\| AM.BaseOffs) // 2r+r or 2r+i is not allowed.
	return false;
	// Allow 2*r as r+r.
	break;
	default:
	// No other scales are supported.
	return false;
	}

	return true;
	}

	SDValue PPCTargetLowering::LowerRETURNADDR(SDValue Op,
	SelectionDAG &DAG) const {
	MachineFunction &MF = DAG.getMachineFunction();
	MachineFrameInfo &MFI = MF.getFrameInfo();
	MFI.setReturnAddressIsTaken(true);

	if (verifyReturnAddressArgumentIsConstant(Op, DAG))
	return SDValue();

	SDLoc dl(Op);
	unsigned Depth = Op.getConstantOperandVal(0);

	// Make sure the function does not optimize away the store of the RA to
	// the stack.
	PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
	FuncInfo->setLRStoreRequired();
	bool isPPC64 = Subtarget.isPPC64();
	auto PtrVT = getPointerTy(MF.getDataLayout());

	if (Depth > 0) {
	// The link register (return address) is saved in the caller's frame
	// not the callee's stack frame. So we must get the caller's frame
	// address and load the return address at the LR offset from there.
	SDValue FrameAddr =
	DAG.getLoad(Op.getValueType(), dl, DAG.getEntryNode(),
	LowerFRAMEADDR(Op, DAG), MachinePointerInfo());
	SDValue Offset =
	DAG.getConstant(Subtarget.getFrameLowering()->getReturnSaveOffset(), dl,
	isPPC64 ? MVT::i64 : MVT::i32);
	return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(),
	DAG.getNode(ISD::ADD, dl, PtrVT, FrameAddr, Offset),
	MachinePointerInfo());
	}

	// Just load the return address off the stack.
	SDValue RetAddrFI = getReturnAddrFrameIndex(DAG);
	return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), RetAddrFI,
	MachinePointerInfo());
	}

	SDValue PPCTargetLowering::LowerFRAMEADDR(SDValue Op,
	SelectionDAG &DAG) const {
	SDLoc dl(Op);
	unsigned Depth = Op.getConstantOperandVal(0);

	MachineFunction &MF = DAG.getMachineFunction();
	MachineFrameInfo &MFI = MF.getFrameInfo();
	MFI.setFrameAddressIsTaken(true);

	EVT PtrVT = getPointerTy(MF.getDataLayout());
	bool isPPC64 = PtrVT == MVT::i64;

	// Naked functions never have a frame pointer, and so we use r1. For all
	// other functions, this decision must be delayed until during PEI.
	unsigned FrameReg;
	if (MF.getFunction().hasFnAttribute(Attribute::Naked))
	FrameReg = isPPC64 ? PPC::X1 : PPC::R1;
	else
	FrameReg = isPPC64 ? PPC::FP8 : PPC::FP;

	SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg,
	PtrVT);
	while (Depth--)
	FrameAddr = DAG.getLoad(Op.getValueType(), dl, DAG.getEntryNode(),
	FrameAddr, MachinePointerInfo());
	return FrameAddr;
	}

	// FIXME? Maybe this could be a TableGen attribute on some registers and
	// this table could be generated automatically from RegInfo.
	Register PPCTargetLowering::getRegisterByName(const char* RegName, LLT VT,
	const MachineFunction &MF) const {
	bool isPPC64 = Subtarget.isPPC64();

	bool is64Bit = isPPC64 && VT == LLT::scalar(64);
	if (!is64Bit && VT != LLT::scalar(32))
	report_fatal_error("Invalid register global variable type");

	Register Reg = StringSwitch<Register>(RegName)
	.Case("r1", is64Bit ? PPC::X1 : PPC::R1)
	.Case("r2", isPPC64 ? Register() : PPC::R2)
	.Case("r13", (is64Bit ? PPC::X13 : PPC::R13))
	.Default(Register());

	if (Reg)
	return Reg;
	report_fatal_error("Invalid register name global variable");
	}

	bool PPCTargetLowering::isAccessedAsGotIndirect(SDValue GA) const {
	// 32-bit SVR4 ABI access everything as got-indirect.
	if (Subtarget.is32BitELFABI())
	return true;

	// AIX accesses everything indirectly through the TOC, which is similar to
	// the GOT.
	if (Subtarget.isAIXABI())
	return true;

	CodeModel::Model CModel = getTargetMachine().getCodeModel();
	// If it is small or large code model, module locals are accessed
	// indirectly by loading their address from .toc/.got.
	if (CModel == CodeModel::Small \|\| CModel == CodeModel::Large)
	return true;

	// JumpTable and BlockAddress are accessed as got-indirect.
	if (isa<JumpTableSDNode>(GA) \|\| isa<BlockAddressSDNode>(GA))
	return true;

	if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(GA))
	return Subtarget.isGVIndirectSymbol(G->getGlobal());

	return false;
	}

	bool
	PPCTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
	// The PowerPC target isn't yet aware of offsets.
	return false;
	}

	bool PPCTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
	const CallInst &I,
	MachineFunction &MF,
	unsigned Intrinsic) const {
	switch (Intrinsic) {
	case Intrinsic::ppc_atomicrmw_xchg_i128:
	case Intrinsic::ppc_atomicrmw_add_i128:
	case Intrinsic::ppc_atomicrmw_sub_i128:
	case Intrinsic::ppc_atomicrmw_nand_i128:
	case Intrinsic::ppc_atomicrmw_and_i128:
	case Intrinsic::ppc_atomicrmw_or_i128:
	case Intrinsic::ppc_atomicrmw_xor_i128:
	case Intrinsic::ppc_cmpxchg_i128:
	Info.opc = ISD::INTRINSIC_W_CHAIN;
	Info.memVT = MVT::i128;
	Info.ptrVal = I.getArgOperand(0);
	Info.offset = 0;
	Info.align = Align(16);
	Info.flags = MachineMemOperand::MOLoad \| MachineMemOperand::MOStore \|
	MachineMemOperand::MOVolatile;
	return true;
	case Intrinsic::ppc_atomic_load_i128:
	Info.opc = ISD::INTRINSIC_W_CHAIN;
	Info.memVT = MVT::i128;
	Info.ptrVal = I.getArgOperand(0);
	Info.offset = 0;
	Info.align = Align(16);
	Info.flags = MachineMemOperand::MOLoad \| MachineMemOperand::MOVolatile;
	return true;
	case Intrinsic::ppc_atomic_store_i128:
	Info.opc = ISD::INTRINSIC_VOID;
	Info.memVT = MVT::i128;
	Info.ptrVal = I.getArgOperand(2);
	Info.offset = 0;
	Info.align = Align(16);
	Info.flags = MachineMemOperand::MOStore \| MachineMemOperand::MOVolatile;
	return true;
	case Intrinsic::ppc_altivec_lvx:
	case Intrinsic::ppc_altivec_lvxl:
	case Intrinsic::ppc_altivec_lvebx:
	case Intrinsic::ppc_altivec_lvehx:
	case Intrinsic::ppc_altivec_lvewx:
	case Intrinsic::ppc_vsx_lxvd2x:
	case Intrinsic::ppc_vsx_lxvw4x:
	case Intrinsic::ppc_vsx_lxvd2x_be:
	case Intrinsic::ppc_vsx_lxvw4x_be:
	case Intrinsic::ppc_vsx_lxvl:
	case Intrinsic::ppc_vsx_lxvll: {
	EVT VT;
	switch (Intrinsic) {
	case Intrinsic::ppc_altivec_lvebx:
	VT = MVT::i8;
	break;
	case Intrinsic::ppc_altivec_lvehx:
	VT = MVT::i16;
	break;
	case Intrinsic::ppc_altivec_lvewx:
	VT = MVT::i32;
	break;
	case Intrinsic::ppc_vsx_lxvd2x:
	case Intrinsic::ppc_vsx_lxvd2x_be:
	VT = MVT::v2f64;
	break;
	default:
	VT = MVT::v4i32;
	break;
	}

	Info.opc = ISD::INTRINSIC_W_CHAIN;
	Info.memVT = VT;
	Info.ptrVal = I.getArgOperand(0);
	Info.offset = -VT.getStoreSize()+1;
	Info.size = 2*VT.getStoreSize()-1;
	Info.align = Align(1);
	Info.flags = MachineMemOperand::MOLoad;
	return true;
	}
	case Intrinsic::ppc_altivec_stvx:
	case Intrinsic::ppc_altivec_stvxl:
	case Intrinsic::ppc_altivec_stvebx:
	case Intrinsic::ppc_altivec_stvehx:
	case Intrinsic::ppc_altivec_stvewx:
	case Intrinsic::ppc_vsx_stxvd2x:
	case Intrinsic::ppc_vsx_stxvw4x:
	case Intrinsic::ppc_vsx_stxvd2x_be:
	case Intrinsic::ppc_vsx_stxvw4x_be:
	case Intrinsic::ppc_vsx_stxvl:
	case Intrinsic::ppc_vsx_stxvll: {
	EVT VT;
	switch (Intrinsic) {
	case Intrinsic::ppc_altivec_stvebx:
	VT = MVT::i8;
	break;
	case Intrinsic::ppc_altivec_stvehx:
	VT = MVT::i16;
	break;
	case Intrinsic::ppc_altivec_stvewx:
	VT = MVT::i32;
	break;
	case Intrinsic::ppc_vsx_stxvd2x:
	case Intrinsic::ppc_vsx_stxvd2x_be:
	VT = MVT::v2f64;
	break;
	default:
	VT = MVT::v4i32;
	break;
	}

	Info.opc = ISD::INTRINSIC_VOID;
	Info.memVT = VT;
	Info.ptrVal = I.getArgOperand(1);
	Info.offset = -VT.getStoreSize()+1;
	Info.size = 2*VT.getStoreSize()-1;
	Info.align = Align(1);
	Info.flags = MachineMemOperand::MOStore;
	return true;
	}
	case Intrinsic::ppc_stdcx:
	case Intrinsic::ppc_stwcx:
	case Intrinsic::ppc_sthcx:
	case Intrinsic::ppc_stbcx: {
	EVT VT;
	auto Alignment = Align(8);
	switch (Intrinsic) {
	case Intrinsic::ppc_stdcx:
	VT = MVT::i64;
	break;
	case Intrinsic::ppc_stwcx:
	VT = MVT::i32;
	Alignment = Align(4);
	break;
	case Intrinsic::ppc_sthcx:
	VT = MVT::i16;
	Alignment = Align(2);
	break;
	case Intrinsic::ppc_stbcx:
	VT = MVT::i8;
	Alignment = Align(1);
	break;
	}
	Info.opc = ISD::INTRINSIC_W_CHAIN;
	Info.memVT = VT;
	Info.ptrVal = I.getArgOperand(0);
	Info.offset = 0;
	Info.align = Alignment;
	Info.flags = MachineMemOperand::MOStore \| MachineMemOperand::MOVolatile;
	return true;
	}
	default:
	break;
	}

	return false;
	}

	/// It returns EVT::Other if the type should be determined using generic
	/// target-independent logic.
	EVT PPCTargetLowering::getOptimalMemOpType(
	const MemOp &Op, const AttributeList &FuncAttributes) const {
	if (getTargetMachine().getOptLevel() != CodeGenOptLevel::None) {
	// We should use Altivec/VSX loads and stores when available. For unaligned
	// addresses, unaligned VSX loads are only fast starting with the P8.
	if (Subtarget.hasAltivec() && Op.size() >= 16) {
	if (Op.isMemset() && Subtarget.hasVSX()) {
	uint64_t TailSize = Op.size() % 16;
	// For memset lowering, EXTRACT_VECTOR_ELT tries to return constant
	// element if vector element type matches tail store. For tail size
	// 3/4, the tail store is i32, v4i32 cannot be used, need a legal one.
	if (TailSize > 2 && TailSize <= 4) {
	return MVT::v8i16;
	}
	return MVT::v4i32;
	}
	if (Op.isAligned(Align(16)) \|\| Subtarget.hasP8Vector())
	return MVT::v4i32;
	}
	}

	if (Subtarget.isPPC64()) {
	return MVT::i64;
	}

	return MVT::i32;
	}

	/// Returns true if it is beneficial to convert a load of a constant
	/// to just the constant itself.
	bool PPCTargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm,
	Type *Ty) const {
	assert(Ty->isIntegerTy());

	unsigned BitSize = Ty->getPrimitiveSizeInBits();
	return !(BitSize == 0 \|\| BitSize > 64);
	}

	bool PPCTargetLowering::isTruncateFree(Type Ty1, Type Ty2) const {
	if (!Ty1->isIntegerTy() \|\| !Ty2->isIntegerTy())
	return false;
	unsigned NumBits1 = Ty1->getPrimitiveSizeInBits();
	unsigned NumBits2 = Ty2->getPrimitiveSizeInBits();
	return NumBits1 == 64 && NumBits2 == 32;
	}

	bool PPCTargetLowering::isTruncateFree(EVT VT1, EVT VT2) const {
	if (!VT1.isInteger() \|\| !VT2.isInteger())
	return false;
	unsigned NumBits1 = VT1.getSizeInBits();
	unsigned NumBits2 = VT2.getSizeInBits();
	return NumBits1 == 64 && NumBits2 == 32;
	}

	bool PPCTargetLowering::isZExtFree(SDValue Val, EVT VT2) const {
	// Generally speaking, zexts are not free, but they are free when they can be
	// folded with other operations.
	if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Val)) {
	EVT MemVT = LD->getMemoryVT();
	if ((MemVT == MVT::i1 \|\| MemVT == MVT::i8 \|\| MemVT == MVT::i16 \|\|
	(Subtarget.isPPC64() && MemVT == MVT::i32)) &&
	(LD->getExtensionType() == ISD::NON_EXTLOAD \|\|
	LD->getExtensionType() == ISD::ZEXTLOAD))
	return true;
	}

	// FIXME: Add other cases...
	// - 32-bit shifts with a zext to i64
	// - zext after ctlz, bswap, etc.
	// - zext after and by a constant mask

	return TargetLowering::isZExtFree(Val, VT2);
	}

	bool PPCTargetLowering::isFPExtFree(EVT DestVT, EVT SrcVT) const {
	assert(DestVT.isFloatingPoint() && SrcVT.isFloatingPoint() &&
	"invalid fpext types");
	// Extending to float128 is not free.
	if (DestVT == MVT::f128)
	return false;
	return true;
	}

	bool PPCTargetLowering::isLegalICmpImmediate(int64_t Imm) const {
	return isInt<16>(Imm) \|\| isUInt<16>(Imm);
	}

	bool PPCTargetLowering::isLegalAddImmediate(int64_t Imm) const {
	return isInt<16>(Imm) \|\| isUInt<16>(Imm);
	}

	bool PPCTargetLowering::allowsMisalignedMemoryAccesses(EVT VT, unsigned, Align,
	MachineMemOperand::Flags,
	unsigned *Fast) const {
	if (DisablePPCUnaligned)
	return false;

	// PowerPC supports unaligned memory access for simple non-vector types.
	// Although accessing unaligned addresses is not as efficient as accessing
	// aligned addresses, it is generally more efficient than manual expansion,
	// and generally only traps for software emulation when crossing page
	// boundaries.

	if (!VT.isSimple())
	return false;

	if (VT.isFloatingPoint() && !VT.isVector() &&
	!Subtarget.allowsUnalignedFPAccess())
	return false;

	if (VT.getSimpleVT().isVector()) {
	if (Subtarget.hasVSX()) {
	if (VT != MVT::v2f64 && VT != MVT::v2i64 &&
	VT != MVT::v4f32 && VT != MVT::v4i32)
	return false;
	} else {
	return false;
	}
	}

	if (VT == MVT::ppcf128)
	return false;

	if (Fast)
	*Fast = 1;

	return true;
	}

	bool PPCTargetLowering::decomposeMulByConstant(LLVMContext &Context, EVT VT,
	SDValue C) const {
	// Check integral scalar types.
	if (!VT.isScalarInteger())
	return false;
	if (auto *ConstNode = dyn_cast<ConstantSDNode>(C.getNode())) {
	if (!ConstNode->getAPIntValue().isSignedIntN(64))
	return false;
	// This transformation will generate >= 2 operations. But the following
	// cases will generate <= 2 instructions during ISEL. So exclude them.
	// 1. If the constant multiplier fits 16 bits, it can be handled by one
	// HW instruction, ie. MULLI
	// 2. If the multiplier after shifted fits 16 bits, an extra shift
	// instruction is needed than case 1, ie. MULLI and RLDICR
	int64_t Imm = ConstNode->getSExtValue();
	unsigned Shift = llvm::countr_zero<uint64_t>(Imm);
	Imm >>= Shift;
	if (isInt<16>(Imm))
	return false;
	uint64_t UImm = static_cast<uint64_t>(Imm);
	if (isPowerOf2_64(UImm + 1) \|\| isPowerOf2_64(UImm - 1) \|\|
	isPowerOf2_64(1 - UImm) \|\| isPowerOf2_64(-1 - UImm))
	return true;
	}
	return false;
	}

	bool PPCTargetLowering::isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
	EVT VT) const {
	return isFMAFasterThanFMulAndFAdd(
	MF.getFunction(), VT.getTypeForEVT(MF.getFunction().getContext()));
	}

	bool PPCTargetLowering::isFMAFasterThanFMulAndFAdd(const Function &F,
	Type *Ty) const {
	if (Subtarget.hasSPE() \|\| Subtarget.useSoftFloat())
	return false;
	switch (Ty->getScalarType()->getTypeID()) {
	case Type::FloatTyID:
	case Type::DoubleTyID:
	return true;
	case Type::FP128TyID:
	return Subtarget.hasP9Vector();
	default:
	return false;
	}
	}

	// FIXME: add more patterns which are not profitable to hoist.
	bool PPCTargetLowering::isProfitableToHoist(Instruction *I) const {
	if (!I->hasOneUse())
	return true;

	Instruction *User = I->user_back();
	assert(User && "A single use instruction with no uses.");

	switch (I->getOpcode()) {
	case Instruction::FMul: {
	// Don't break FMA, PowerPC prefers FMA.
	if (User->getOpcode() != Instruction::FSub &&
	User->getOpcode() != Instruction::FAdd)
	return true;

	const TargetOptions &Options = getTargetMachine().Options;
	const Function *F = I->getFunction();
	const DataLayout &DL = F->getDataLayout();
	Type *Ty = User->getOperand(0)->getType();

	return !(
	isFMAFasterThanFMulAndFAdd(*F, Ty) &&
	isOperationLegalOrCustom(ISD::FMA, getValueType(DL, Ty)) &&
	(Options.AllowFPOpFusion == FPOpFusion::Fast \|\| Options.UnsafeFPMath));
	}
	case Instruction::Load: {
	// Don't break "store (load float*)" pattern, this pattern will be combined
	// to "store (load int32)" in later InstCombine pass. See function
	// combineLoadToOperationType. On PowerPC, loading a float point takes more
	// cycles than loading a 32 bit integer.
	LoadInst *LI = cast<LoadInst>(I);
	// For the loads that combineLoadToOperationType does nothing, like
	// ordered load, it should be profitable to hoist them.
	// For swifterror load, it can only be used for pointer to pointer type, so
	// later type check should get rid of this case.
	if (!LI->isUnordered())
	return true;

	if (User->getOpcode() != Instruction::Store)
	return true;

	if (I->getType()->getTypeID() != Type::FloatTyID)
	return true;

	return false;
	}
	default:
	return true;
	}
	return true;
	}

	const MCPhysReg *
	PPCTargetLowering::getScratchRegisters(CallingConv::ID) const {
	// LR is a callee-save register, but we must treat it as clobbered by any call
	// site. Hence we include LR in the scratch registers, which are in turn added
	// as implicit-defs for stackmaps and patchpoints. The same reasoning applies
	// to CTR, which is used by any indirect call.
	static const MCPhysReg ScratchRegs[] = {
	PPC::X12, PPC::LR8, PPC::CTR8, 0
	};

	return ScratchRegs;
	}

	Register PPCTargetLowering::getExceptionPointerRegister(
	const Constant *PersonalityFn) const {
	return Subtarget.isPPC64() ? PPC::X3 : PPC::R3;
	}

	Register PPCTargetLowering::getExceptionSelectorRegister(
	const Constant *PersonalityFn) const {
	return Subtarget.isPPC64() ? PPC::X4 : PPC::R4;
	}

	bool
	PPCTargetLowering::shouldExpandBuildVectorWithShuffles(
	EVT VT , unsigned DefinedValues) const {
	if (VT == MVT::v2i64)
	return Subtarget.hasDirectMove(); // Don't need stack ops with direct moves

	if (Subtarget.hasVSX())
	return true;

	return TargetLowering::shouldExpandBuildVectorWithShuffles(VT, DefinedValues);
	}

	Sched::Preference PPCTargetLowering::getSchedulingPreference(SDNode *N) const {
	if (DisableILPPref \|\| Subtarget.enableMachineScheduler())
	return TargetLowering::getSchedulingPreference(N);

	return Sched::ILP;
	}

	// Create a fast isel object.
	FastISel *
	PPCTargetLowering::createFastISel(FunctionLoweringInfo &FuncInfo,
	const TargetLibraryInfo *LibInfo) const {
	return PPC::createFastISel(FuncInfo, LibInfo);
	}

	// 'Inverted' means the FMA opcode after negating one multiplicand.
	// For example, (fma -a b c) = (fnmsub a b c)
	static unsigned invertFMAOpcode(unsigned Opc) {
	switch (Opc) {
	default:
	llvm_unreachable("Invalid FMA opcode for PowerPC!");
	case ISD::FMA:
	return PPCISD::FNMSUB;
	case PPCISD::FNMSUB:
	return ISD::FMA;
	}
	}

	SDValue PPCTargetLowering::getNegatedExpression(SDValue Op, SelectionDAG &DAG,
	bool LegalOps, bool OptForSize,
	NegatibleCost &Cost,
	unsigned Depth) const {
	if (Depth > SelectionDAG::MaxRecursionDepth)
	return SDValue();

	unsigned Opc = Op.getOpcode();
	EVT VT = Op.getValueType();
	SDNodeFlags Flags = Op.getNode()->getFlags();

	switch (Opc) {
	case PPCISD::FNMSUB:
	if (!Op.hasOneUse() \|\| !isTypeLegal(VT))
	break;

	const TargetOptions &Options = getTargetMachine().Options;
	SDValue N0 = Op.getOperand(0);
	SDValue N1 = Op.getOperand(1);
	SDValue N2 = Op.getOperand(2);
	SDLoc Loc(Op);

	NegatibleCost N2Cost = NegatibleCost::Expensive;
	SDValue NegN2 =
	getNegatedExpression(N2, DAG, LegalOps, OptForSize, N2Cost, Depth + 1);

	if (!NegN2)
	return SDValue();

	// (fneg (fnmsub a b c)) => (fnmsub (fneg a) b (fneg c))
	// (fneg (fnmsub a b c)) => (fnmsub a (fneg b) (fneg c))
	// These transformations may change sign of zeroes. For example,
	// -(-ab-(-c))=-0 while -(-(ab-c))=+0 when a=b=c=1.
	if (Flags.hasNoSignedZeros() \|\| Options.NoSignedZerosFPMath) {
	// Try and choose the cheaper one to negate.
	NegatibleCost N0Cost = NegatibleCost::Expensive;
	SDValue NegN0 = getNegatedExpression(N0, DAG, LegalOps, OptForSize,
	N0Cost, Depth + 1);

	NegatibleCost N1Cost = NegatibleCost::Expensive;
	SDValue NegN1 = getNegatedExpression(N1, DAG, LegalOps, OptForSize,
	N1Cost, Depth + 1);

	if (NegN0 && N0Cost <= N1Cost) {
	Cost = std::min(N0Cost, N2Cost);
	return DAG.getNode(Opc, Loc, VT, NegN0, N1, NegN2, Flags);
	} else if (NegN1) {
	Cost = std::min(N1Cost, N2Cost);
	return DAG.getNode(Opc, Loc, VT, N0, NegN1, NegN2, Flags);
	}
	}

	// (fneg (fnmsub a b c)) => (fma a b (fneg c))
	if (isOperationLegal(ISD::FMA, VT)) {
	Cost = N2Cost;
	return DAG.getNode(ISD::FMA, Loc, VT, N0, N1, NegN2, Flags);
	}

	break;
	}

	return TargetLowering::getNegatedExpression(Op, DAG, LegalOps, OptForSize,
	Cost, Depth);
	}

	// Override to enable LOAD_STACK_GUARD lowering on Linux.
	bool PPCTargetLowering::useLoadStackGuardNode() const {
	if (!Subtarget.isTargetLinux())
	return TargetLowering::useLoadStackGuardNode();
	return true;
	}

	// Override to disable global variable loading on Linux and insert AIX canary
	// word declaration.
	void PPCTargetLowering::insertSSPDeclarations(Module &M) const {
	if (Subtarget.isAIXABI()) {
	M.getOrInsertGlobal(AIXSSPCanaryWordName,
	PointerType::getUnqual(M.getContext()));
	return;
	}
	if (!Subtarget.isTargetLinux())
	return TargetLowering::insertSSPDeclarations(M);
	}

	Value *PPCTargetLowering::getSDagStackGuard(const Module &M) const {
	if (Subtarget.isAIXABI())
	return M.getGlobalVariable(AIXSSPCanaryWordName);
	return TargetLowering::getSDagStackGuard(M);
	}

	bool PPCTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
	bool ForCodeSize) const {
	if (!VT.isSimple() \|\| !Subtarget.hasVSX())
	return false;

	switch(VT.getSimpleVT().SimpleTy) {
	default:
	// For FP types that are currently not supported by PPC backend, return
	// false. Examples: f16, f80.
	return false;
	case MVT::f32:
	case MVT::f64: {
	if (Subtarget.hasPrefixInstrs() && Subtarget.hasP10Vector()) {
	// we can materialize all immediatess via XXSPLTI32DX and XXSPLTIDP.
	return true;
	}
	bool IsExact;
	APSInt IntResult(16, false);
	// The rounding mode doesn't really matter because we only care about floats
	// that can be converted to integers exactly.
	Imm.convertToInteger(IntResult, APFloat::rmTowardZero, &IsExact);
	// For exact values in the range [-16, 15] we can materialize the float.
	if (IsExact && IntResult <= 15 && IntResult >= -16)
	return true;
	return Imm.isZero();
	}
	case MVT::ppcf128:
	return Imm.isPosZero();
	}
	}

	// For vector shift operation op, fold
	// (op x, (and y, ((1 << numbits(x)) - 1))) -> (target op x, y)
	static SDValue stripModuloOnShift(const TargetLowering &TLI, SDNode *N,
	SelectionDAG &DAG) {
	SDValue N0 = N->getOperand(0);
	SDValue N1 = N->getOperand(1);
	EVT VT = N0.getValueType();
	unsigned OpSizeInBits = VT.getScalarSizeInBits();
	unsigned Opcode = N->getOpcode();
	unsigned TargetOpcode;

	switch (Opcode) {
	default:
	llvm_unreachable("Unexpected shift operation");
	case ISD::SHL:
	TargetOpcode = PPCISD::SHL;
	break;
	case ISD::SRL:
	TargetOpcode = PPCISD::SRL;
	break;
	case ISD::SRA:
	TargetOpcode = PPCISD::SRA;
	break;
	}

	if (VT.isVector() && TLI.isOperationLegal(Opcode, VT) &&
	N1->getOpcode() == ISD::AND)
	if (ConstantSDNode *Mask = isConstOrConstSplat(N1->getOperand(1)))
	if (Mask->getZExtValue() == OpSizeInBits - 1)
	return DAG.getNode(TargetOpcode, SDLoc(N), VT, N0, N1->getOperand(0));

	return SDValue();
	}

	SDValue PPCTargetLowering::combineSHL(SDNode *N, DAGCombinerInfo &DCI) const {
	if (auto Value = stripModuloOnShift(*this, N, DCI.DAG))
	return Value;

	SDValue N0 = N->getOperand(0);
	ConstantSDNode *CN1 = dyn_cast<ConstantSDNode>(N->getOperand(1));
	if (!Subtarget.isISA3_0() \|\| !Subtarget.isPPC64() \|\|
	N0.getOpcode() != ISD::SIGN_EXTEND \|\|
	N0.getOperand(0).getValueType() != MVT::i32 \|\| CN1 == nullptr \|\|
	N->getValueType(0) != MVT::i64)
	return SDValue();

	// We can't save an operation here if the value is already extended, and
	// the existing shift is easier to combine.
	SDValue ExtsSrc = N0.getOperand(0);
	if (ExtsSrc.getOpcode() == ISD::TRUNCATE &&
	ExtsSrc.getOperand(0).getOpcode() == ISD::AssertSext)
	return SDValue();

	SDLoc DL(N0);
	SDValue ShiftBy = SDValue(CN1, 0);
	// We want the shift amount to be i32 on the extswli, but the shift could
	// have an i64.
	if (ShiftBy.getValueType() == MVT::i64)
	ShiftBy = DCI.DAG.getConstant(CN1->getZExtValue(), DL, MVT::i32);

	return DCI.DAG.getNode(PPCISD::EXTSWSLI, DL, MVT::i64, N0->getOperand(0),
	ShiftBy);
	}

	SDValue PPCTargetLowering::combineSRA(SDNode *N, DAGCombinerInfo &DCI) const {
	if (auto Value = stripModuloOnShift(*this, N, DCI.DAG))
	return Value;

	return SDValue();
	}

	SDValue PPCTargetLowering::combineSRL(SDNode *N, DAGCombinerInfo &DCI) const {
	if (auto Value = stripModuloOnShift(*this, N, DCI.DAG))
	return Value;

	return SDValue();
	}

	// Transform (add X, (zext(setne Z, C))) -> (addze X, (addic (addi Z, -C), -1))
	// Transform (add X, (zext(sete Z, C))) -> (addze X, (subfic (addi Z, -C), 0))
	// When C is zero, the equation (addi Z, -C) can be simplified to Z
	// Requirement: -C in [-32768, 32767], X and Z are MVT::i64 types
	static SDValue combineADDToADDZE(SDNode *N, SelectionDAG &DAG,
	const PPCSubtarget &Subtarget) {
	if (!Subtarget.isPPC64())
	return SDValue();

	SDValue LHS = N->getOperand(0);
	SDValue RHS = N->getOperand(1);

	auto isZextOfCompareWithConstant = [](SDValue Op) {
	if (Op.getOpcode() != ISD::ZERO_EXTEND \|\| !Op.hasOneUse() \|\|
	Op.getValueType() != MVT::i64)
	return false;

	SDValue Cmp = Op.getOperand(0);
	if (Cmp.getOpcode() != ISD::SETCC \|\| !Cmp.hasOneUse() \|\|
	Cmp.getOperand(0).getValueType() != MVT::i64)
	return false;

	if (auto *Constant = dyn_cast<ConstantSDNode>(Cmp.getOperand(1))) {
	int64_t NegConstant = 0 - Constant->getSExtValue();
	// Due to the limitations of the addi instruction,
	// -C is required to be [-32768, 32767].
	return isInt<16>(NegConstant);
	}

	return false;
	};

	bool LHSHasPattern = isZextOfCompareWithConstant(LHS);
	bool RHSHasPattern = isZextOfCompareWithConstant(RHS);

	// If there is a pattern, canonicalize a zext operand to the RHS.
	if (LHSHasPattern && !RHSHasPattern)
	std::swap(LHS, RHS);
	else if (!LHSHasPattern && !RHSHasPattern)
	return SDValue();

	SDLoc DL(N);
	SDVTList VTs = DAG.getVTList(MVT::i64, MVT::Glue);
	SDValue Cmp = RHS.getOperand(0);
	SDValue Z = Cmp.getOperand(0);
	auto *Constant = cast<ConstantSDNode>(Cmp.getOperand(1));
	int64_t NegConstant = 0 - Constant->getSExtValue();

	switch(cast<CondCodeSDNode>(Cmp.getOperand(2))->get()) {
	default: break;
	case ISD::SETNE: {
	// when C == 0
	// --> addze X, (addic Z, -1).carry
	// /
	// add X, (zext(setne Z, C))--
	// \ when -32768 <= -C <= 32767 && C != 0
	// --> addze X, (addic (addi Z, -C), -1).carry
	SDValue Add = DAG.getNode(ISD::ADD, DL, MVT::i64, Z,
	DAG.getConstant(NegConstant, DL, MVT::i64));
	SDValue AddOrZ = NegConstant != 0 ? Add : Z;
	SDValue Addc = DAG.getNode(ISD::ADDC, DL, DAG.getVTList(MVT::i64, MVT::Glue),
	AddOrZ, DAG.getConstant(-1ULL, DL, MVT::i64));
	return DAG.getNode(ISD::ADDE, DL, VTs, LHS, DAG.getConstant(0, DL, MVT::i64),
	SDValue(Addc.getNode(), 1));
	}
	case ISD::SETEQ: {
	// when C == 0
	// --> addze X, (subfic Z, 0).carry
	// /
	// add X, (zext(sete Z, C))--
	// \ when -32768 <= -C <= 32767 && C != 0
	// --> addze X, (subfic (addi Z, -C), 0).carry
	SDValue Add = DAG.getNode(ISD::ADD, DL, MVT::i64, Z,
	DAG.getConstant(NegConstant, DL, MVT::i64));
	SDValue AddOrZ = NegConstant != 0 ? Add : Z;
	SDValue Subc = DAG.getNode(ISD::SUBC, DL, DAG.getVTList(MVT::i64, MVT::Glue),
	DAG.getConstant(0, DL, MVT::i64), AddOrZ);
	return DAG.getNode(ISD::ADDE, DL, VTs, LHS, DAG.getConstant(0, DL, MVT::i64),
	SDValue(Subc.getNode(), 1));
	}
	}

	return SDValue();
	}

	// Transform
	// (add C1, (MAT_PCREL_ADDR GlobalAddr+C2)) to
	// (MAT_PCREL_ADDR GlobalAddr+(C1+C2))
	// In this case both C1 and C2 must be known constants.
	// C1+C2 must fit into a 34 bit signed integer.
	static SDValue combineADDToMAT_PCREL_ADDR(SDNode *N, SelectionDAG &DAG,
	const PPCSubtarget &Subtarget) {
	if (!Subtarget.isUsingPCRelativeCalls())
	return SDValue();

	// Check both Operand 0 and Operand 1 of the ADD node for the PCRel node.
	// If we find that node try to cast the Global Address and the Constant.
	SDValue LHS = N->getOperand(0);
	SDValue RHS = N->getOperand(1);

	if (LHS.getOpcode() != PPCISD::MAT_PCREL_ADDR)
	std::swap(LHS, RHS);

	if (LHS.getOpcode() != PPCISD::MAT_PCREL_ADDR)
	return SDValue();

	// Operand zero of PPCISD::MAT_PCREL_ADDR is the GA node.
	GlobalAddressSDNode *GSDN = dyn_cast<GlobalAddressSDNode>(LHS.getOperand(0));
	ConstantSDNode* ConstNode = dyn_cast<ConstantSDNode>(RHS);

	// Check that both casts succeeded.
	if (!GSDN \|\| !ConstNode)
	return SDValue();

	int64_t NewOffset = GSDN->getOffset() + ConstNode->getSExtValue();
	SDLoc DL(GSDN);

	// The signed int offset needs to fit in 34 bits.
	if (!isInt<34>(NewOffset))
	return SDValue();

	// The new global address is a copy of the old global address except
	// that it has the updated Offset.
	SDValue GA =
	DAG.getTargetGlobalAddress(GSDN->getGlobal(), DL, GSDN->getValueType(0),
	NewOffset, GSDN->getTargetFlags());
	SDValue MatPCRel =
	DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, GSDN->getValueType(0), GA);
	return MatPCRel;
	}

	SDValue PPCTargetLowering::combineADD(SDNode *N, DAGCombinerInfo &DCI) const {
	if (auto Value = combineADDToADDZE(N, DCI.DAG, Subtarget))
	return Value;

	if (auto Value = combineADDToMAT_PCREL_ADDR(N, DCI.DAG, Subtarget))
	return Value;

	return SDValue();
	}

	// Detect TRUNCATE operations on bitcasts of float128 values.
	// What we are looking for here is the situtation where we extract a subset
	// of bits from a 128 bit float.
	// This can be of two forms:
	// 1) BITCAST of f128 feeding TRUNCATE
	// 2) BITCAST of f128 feeding SRL (a shift) feeding TRUNCATE
	// The reason this is required is because we do not have a legal i128 type
	// and so we want to prevent having to store the f128 and then reload part
	// of it.
	SDValue PPCTargetLowering::combineTRUNCATE(SDNode *N,
	DAGCombinerInfo &DCI) const {
	// If we are using CRBits then try that first.
	if (Subtarget.useCRBits()) {
	// Check if CRBits did anything and return that if it did.
	if (SDValue CRTruncValue = DAGCombineTruncBoolExt(N, DCI))
	return CRTruncValue;
	}

	SDLoc dl(N);
	SDValue Op0 = N->getOperand(0);

	// Looking for a truncate of i128 to i64.
	if (Op0.getValueType() != MVT::i128 \|\| N->getValueType(0) != MVT::i64)
	return SDValue();

	int EltToExtract = DCI.DAG.getDataLayout().isBigEndian() ? 1 : 0;

	// SRL feeding TRUNCATE.
	if (Op0.getOpcode() == ISD::SRL) {
	ConstantSDNode *ConstNode = dyn_cast<ConstantSDNode>(Op0.getOperand(1));
	// The right shift has to be by 64 bits.
	if (!ConstNode \|\| ConstNode->getZExtValue() != 64)
	return SDValue();

	// Switch the element number to extract.
	EltToExtract = EltToExtract ? 0 : 1;
	// Update Op0 past the SRL.
	Op0 = Op0.getOperand(0);
	}

	// BITCAST feeding a TRUNCATE possibly via SRL.
	if (Op0.getOpcode() == ISD::BITCAST &&
	Op0.getValueType() == MVT::i128 &&
	Op0.getOperand(0).getValueType() == MVT::f128) {
	SDValue Bitcast = DCI.DAG.getBitcast(MVT::v2i64, Op0.getOperand(0));
	return DCI.DAG.getNode(
	ISD::EXTRACT_VECTOR_ELT, dl, MVT::i64, Bitcast,
	DCI.DAG.getTargetConstant(EltToExtract, dl, MVT::i32));
	}
	return SDValue();
	}

	SDValue PPCTargetLowering::combineMUL(SDNode *N, DAGCombinerInfo &DCI) const {
	SelectionDAG &DAG = DCI.DAG;

	ConstantSDNode *ConstOpOrElement = isConstOrConstSplat(N->getOperand(1));
	if (!ConstOpOrElement)
	return SDValue();

	// An imul is usually smaller than the alternative sequence for legal type.
	if (DAG.getMachineFunction().getFunction().hasMinSize() &&
	isOperationLegal(ISD::MUL, N->getValueType(0)))
	return SDValue();

	auto IsProfitable = [this](bool IsNeg, bool IsAddOne, EVT VT) -> bool {
	switch (this->Subtarget.getCPUDirective()) {
	default:
	// TODO: enhance the condition for subtarget before pwr8
	return false;
	case PPC::DIR_PWR8:
	// type mul add shl
	// scalar 4 1 1
	// vector 7 2 2
	return true;
	case PPC::DIR_PWR9:
	case PPC::DIR_PWR10:
	case PPC::DIR_PWR11:
	case PPC::DIR_PWR_FUTURE:
	// type mul add shl
	// scalar 5 2 2
	// vector 7 2 2

	// The cycle RATIO of related operations are showed as a table above.
	// Because mul is 5(scalar)/7(vector), add/sub/shl are all 2 for both
	// scalar and vector type. For 2 instrs patterns, add/sub + shl
	// are 4, it is always profitable; but for 3 instrs patterns
	// (mul x, -(2^N + 1)) => -(add (shl x, N), x), sub + add + shl are 6.
	// So we should only do it for vector type.
	return IsAddOne && IsNeg ? VT.isVector() : true;
	}
	};

	EVT VT = N->getValueType(0);
	SDLoc DL(N);

	const APInt &MulAmt = ConstOpOrElement->getAPIntValue();
	bool IsNeg = MulAmt.isNegative();
	APInt MulAmtAbs = MulAmt.abs();

	if ((MulAmtAbs - 1).isPowerOf2()) {
	// (mul x, 2^N + 1) => (add (shl x, N), x)
	// (mul x, -(2^N + 1)) => -(add (shl x, N), x)

	if (!IsProfitable(IsNeg, true, VT))
	return SDValue();

	SDValue Op0 = N->getOperand(0);
	SDValue Op1 =
	DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
	DAG.getConstant((MulAmtAbs - 1).logBase2(), DL, VT));
	SDValue Res = DAG.getNode(ISD::ADD, DL, VT, Op0, Op1);

	if (!IsNeg)
	return Res;

	return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Res);
	} else if ((MulAmtAbs + 1).isPowerOf2()) {
	// (mul x, 2^N - 1) => (sub (shl x, N), x)
	// (mul x, -(2^N - 1)) => (sub x, (shl x, N))

	if (!IsProfitable(IsNeg, false, VT))
	return SDValue();

	SDValue Op0 = N->getOperand(0);
	SDValue Op1 =
	DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
	DAG.getConstant((MulAmtAbs + 1).logBase2(), DL, VT));

	if (!IsNeg)
	return DAG.getNode(ISD::SUB, DL, VT, Op1, Op0);
	else
	return DAG.getNode(ISD::SUB, DL, VT, Op0, Op1);

	} else {
	return SDValue();
	}
	}

	// Combine fma-like op (like fnmsub) with fnegs to appropriate op. Do this
	// in combiner since we need to check SD flags and other subtarget features.
	SDValue PPCTargetLowering::combineFMALike(SDNode *N,
	DAGCombinerInfo &DCI) const {
	SDValue N0 = N->getOperand(0);
	SDValue N1 = N->getOperand(1);
	SDValue N2 = N->getOperand(2);
	SDNodeFlags Flags = N->getFlags();
	EVT VT = N->getValueType(0);
	SelectionDAG &DAG = DCI.DAG;
	const TargetOptions &Options = getTargetMachine().Options;
	unsigned Opc = N->getOpcode();
	bool CodeSize = DAG.getMachineFunction().getFunction().hasOptSize();
	bool LegalOps = !DCI.isBeforeLegalizeOps();
	SDLoc Loc(N);

	if (!isOperationLegal(ISD::FMA, VT))
	return SDValue();

	// Allowing transformation to FNMSUB may change sign of zeroes when ab-c=0
	// since (fnmsub a b c)=-0 while c-ab=+0.
	if (!Flags.hasNoSignedZeros() && !Options.NoSignedZerosFPMath)
	return SDValue();

	// (fma (fneg a) b c) => (fnmsub a b c)
	// (fnmsub (fneg a) b c) => (fma a b c)
	if (SDValue NegN0 = getCheaperNegatedExpression(N0, DAG, LegalOps, CodeSize))
	return DAG.getNode(invertFMAOpcode(Opc), Loc, VT, NegN0, N1, N2, Flags);

	// (fma a (fneg b) c) => (fnmsub a b c)
	// (fnmsub a (fneg b) c) => (fma a b c)
	if (SDValue NegN1 = getCheaperNegatedExpression(N1, DAG, LegalOps, CodeSize))
	return DAG.getNode(invertFMAOpcode(Opc), Loc, VT, N0, NegN1, N2, Flags);

	return SDValue();
	}

	bool PPCTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
	// Only duplicate to increase tail-calls for the 64bit SysV ABIs.
	if (!Subtarget.is64BitELFABI())
	return false;

	// If not a tail call then no need to proceed.
	if (!CI->isTailCall())
	return false;

	// If sibling calls have been disabled and tail-calls aren't guaranteed
	// there is no reason to duplicate.
	auto &TM = getTargetMachine();
	if (!TM.Options.GuaranteedTailCallOpt && DisableSCO)
	return false;

	// Can't tail call a function called indirectly, or if it has variadic args.
	const Function *Callee = CI->getCalledFunction();
	if (!Callee \|\| Callee->isVarArg())
	return false;

	// Make sure the callee and caller calling conventions are eligible for tco.
	const Function *Caller = CI->getParent()->getParent();
	if (!areCallingConvEligibleForTCO_64SVR4(Caller->getCallingConv(),
	CI->getCallingConv()))
	return false;

	// If the function is local then we have a good chance at tail-calling it
	return getTargetMachine().shouldAssumeDSOLocal(Callee);
	}

	bool PPCTargetLowering::
	isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const {
	const Value *Mask = AndI.getOperand(1);
	// If the mask is suitable for andi. or andis. we should sink the and.
	if (const ConstantInt *CI = dyn_cast<ConstantInt>(Mask)) {
	// Can't handle constants wider than 64-bits.
	if (CI->getBitWidth() > 64)
	return false;
	int64_t ConstVal = CI->getZExtValue();
	return isUInt<16>(ConstVal) \|\|
	(isUInt<16>(ConstVal >> 16) && !(ConstVal & 0xFFFF));
	}

	// For non-constant masks, we can always use the record-form and.
	return true;
	}

	/// getAddrModeForFlags - Based on the set of address flags, select the most
	/// optimal instruction format to match by.
	PPC::AddrMode PPCTargetLowering::getAddrModeForFlags(unsigned Flags) const {
	// This is not a node we should be handling here.
	if (Flags == PPC::MOF_None)
	return PPC::AM_None;
	// Unaligned D-Forms are tried first, followed by the aligned D-Forms.
	for (auto FlagSet : AddrModesMap.at(PPC::AM_DForm))
	if ((Flags & FlagSet) == FlagSet)
	return PPC::AM_DForm;
	for (auto FlagSet : AddrModesMap.at(PPC::AM_DSForm))
	if ((Flags & FlagSet) == FlagSet)
	return PPC::AM_DSForm;
	for (auto FlagSet : AddrModesMap.at(PPC::AM_DQForm))
	if ((Flags & FlagSet) == FlagSet)
	return PPC::AM_DQForm;
	for (auto FlagSet : AddrModesMap.at(PPC::AM_PrefixDForm))
	if ((Flags & FlagSet) == FlagSet)
	return PPC::AM_PrefixDForm;
	// If no other forms are selected, return an X-Form as it is the most
	// general addressing mode.
	return PPC::AM_XForm;
	}

	/// Set alignment flags based on whether or not the Frame Index is aligned.
	/// Utilized when computing flags for address computation when selecting
	/// load and store instructions.
	static void setAlignFlagsForFI(SDValue N, unsigned &FlagSet,
	SelectionDAG &DAG) {
	bool IsAdd = ((N.getOpcode() == ISD::ADD) \|\| (N.getOpcode() == ISD::OR));
	FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(IsAdd ? N.getOperand(0) : N);
	if (!FI)
	return;
	const MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
	unsigned FrameIndexAlign = MFI.getObjectAlign(FI->getIndex()).value();
	// If this is (add $FI, $S16Imm), the alignment flags are already set
	// based on the immediate. We just need to clear the alignment flags
	// if the FI alignment is weaker.
	if ((FrameIndexAlign % 4) != 0)
	FlagSet &= ~PPC::MOF_RPlusSImm16Mult4;
	if ((FrameIndexAlign % 16) != 0)
	FlagSet &= ~PPC::MOF_RPlusSImm16Mult16;
	// If the address is a plain FrameIndex, set alignment flags based on
	// FI alignment.
	if (!IsAdd) {
	if ((FrameIndexAlign % 4) == 0)
	FlagSet \|= PPC::MOF_RPlusSImm16Mult4;
	if ((FrameIndexAlign % 16) == 0)
	FlagSet \|= PPC::MOF_RPlusSImm16Mult16;
	}
	}

	/// Given a node, compute flags that are used for address computation when
	/// selecting load and store instructions. The flags computed are stored in
	/// FlagSet. This function takes into account whether the node is a constant,
	/// an ADD, OR, or a constant, and computes the address flags accordingly.
	static void computeFlagsForAddressComputation(SDValue N, unsigned &FlagSet,
	SelectionDAG &DAG) {
	// Set the alignment flags for the node depending on if the node is
	// 4-byte or 16-byte aligned.
	auto SetAlignFlagsForImm = [&](uint64_t Imm) {
	if ((Imm & 0x3) == 0)
	FlagSet \|= PPC::MOF_RPlusSImm16Mult4;
	if ((Imm & 0xf) == 0)
	FlagSet \|= PPC::MOF_RPlusSImm16Mult16;
	};

	if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N)) {
	// All 32-bit constants can be computed as LIS + Disp.
	const APInt &ConstImm = CN->getAPIntValue();
	if (ConstImm.isSignedIntN(32)) { // Flag to handle 32-bit constants.
	FlagSet \|= PPC::MOF_AddrIsSImm32;
	SetAlignFlagsForImm(ConstImm.getZExtValue());
	setAlignFlagsForFI(N, FlagSet, DAG);
	}
	if (ConstImm.isSignedIntN(34)) // Flag to handle 34-bit constants.
	FlagSet \|= PPC::MOF_RPlusSImm34;
	else // Let constant materialization handle large constants.
	FlagSet \|= PPC::MOF_NotAddNorCst;
	} else if (N.getOpcode() == ISD::ADD \|\| provablyDisjointOr(DAG, N)) {
	// This address can be represented as an addition of:
	// - Register + Imm16 (possibly a multiple of 4/16)
	// - Register + Imm34
	// - Register + PPCISD::Lo
	// - Register + Register
	// In any case, we won't have to match this as Base + Zero.
	SDValue RHS = N.getOperand(1);
	if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(RHS)) {
	const APInt &ConstImm = CN->getAPIntValue();
	if (ConstImm.isSignedIntN(16)) {
	FlagSet \|= PPC::MOF_RPlusSImm16; // Signed 16-bit immediates.
	SetAlignFlagsForImm(ConstImm.getZExtValue());
	setAlignFlagsForFI(N, FlagSet, DAG);
	}
	if (ConstImm.isSignedIntN(34))
	FlagSet \|= PPC::MOF_RPlusSImm34; // Signed 34-bit immediates.
	else
	FlagSet \|= PPC::MOF_RPlusR; // Register.
	} else if (RHS.getOpcode() == PPCISD::Lo && !RHS.getConstantOperandVal(1))
	FlagSet \|= PPC::MOF_RPlusLo; // PPCISD::Lo.
	else
	FlagSet \|= PPC::MOF_RPlusR;
	} else { // The address computation is not a constant or an addition.
	setAlignFlagsForFI(N, FlagSet, DAG);
	FlagSet \|= PPC::MOF_NotAddNorCst;
	}
	}

	static bool isPCRelNode(SDValue N) {
	return (N.getOpcode() == PPCISD::MAT_PCREL_ADDR \|\|
	isValidPCRelNode<ConstantPoolSDNode>(N) \|\|
	isValidPCRelNode<GlobalAddressSDNode>(N) \|\|
	isValidPCRelNode<JumpTableSDNode>(N) \|\|
	isValidPCRelNode<BlockAddressSDNode>(N));
	}

	/// computeMOFlags - Given a node N and it's Parent (a MemSDNode), compute
	/// the address flags of the load/store instruction that is to be matched.
	unsigned PPCTargetLowering::computeMOFlags(const SDNode *Parent, SDValue N,
	SelectionDAG &DAG) const {
	unsigned FlagSet = PPC::MOF_None;

	// Compute subtarget flags.
	if (!Subtarget.hasP9Vector())
	FlagSet \|= PPC::MOF_SubtargetBeforeP9;
	else
	FlagSet \|= PPC::MOF_SubtargetP9;

	if (Subtarget.hasPrefixInstrs())
	FlagSet \|= PPC::MOF_SubtargetP10;

	if (Subtarget.hasSPE())
	FlagSet \|= PPC::MOF_SubtargetSPE;

	// Check if we have a PCRel node and return early.
	if ((FlagSet & PPC::MOF_SubtargetP10) && isPCRelNode(N))
	return FlagSet;

	// If the node is the paired load/store intrinsics, compute flags for
	// address computation and return early.
	unsigned ParentOp = Parent->getOpcode();
	if (Subtarget.isISA3_1() && ((ParentOp == ISD::INTRINSIC_W_CHAIN) \|\|
	(ParentOp == ISD::INTRINSIC_VOID))) {
	unsigned ID = Parent->getConstantOperandVal(1);
	if ((ID == Intrinsic::ppc_vsx_lxvp) \|\| (ID == Intrinsic::ppc_vsx_stxvp)) {
	SDValue IntrinOp = (ID == Intrinsic::ppc_vsx_lxvp)
	? Parent->getOperand(2)
	: Parent->getOperand(3);
	computeFlagsForAddressComputation(IntrinOp, FlagSet, DAG);
	FlagSet \|= PPC::MOF_Vector;
	return FlagSet;
	}
	}

	// Mark this as something we don't want to handle here if it is atomic
	// or pre-increment instruction.
	if (const LSBaseSDNode *LSB = dyn_cast<LSBaseSDNode>(Parent))
	if (LSB->isIndexed())
	return PPC::MOF_None;

	// Compute in-memory type flags. This is based on if there are scalars,
	// floats or vectors.
	const MemSDNode *MN = dyn_cast<MemSDNode>(Parent);
	assert(MN && "Parent should be a MemSDNode!");
	EVT MemVT = MN->getMemoryVT();
	unsigned Size = MemVT.getSizeInBits();
	if (MemVT.isScalarInteger()) {
	assert(Size <= 128 &&
	"Not expecting scalar integers larger than 16 bytes!");
	if (Size < 32)
	FlagSet \|= PPC::MOF_SubWordInt;
	else if (Size == 32)
	FlagSet \|= PPC::MOF_WordInt;
	else
	FlagSet \|= PPC::MOF_DoubleWordInt;
	} else if (MemVT.isVector() && !MemVT.isFloatingPoint()) { // Integer vectors.
	if (Size == 128)
	FlagSet \|= PPC::MOF_Vector;
	else if (Size == 256) {
	assert(Subtarget.pairedVectorMemops() &&
	"256-bit vectors are only available when paired vector memops is "
	"enabled!");
	FlagSet \|= PPC::MOF_Vector;
	} else
	llvm_unreachable("Not expecting illegal vectors!");
	} else { // Floating point type: can be scalar, f128 or vector types.
	if (Size == 32 \|\| Size == 64)
	FlagSet \|= PPC::MOF_ScalarFloat;
	else if (MemVT == MVT::f128 \|\| MemVT.isVector())
	FlagSet \|= PPC::MOF_Vector;
	else
	llvm_unreachable("Not expecting illegal scalar floats!");
	}

	// Compute flags for address computation.
	computeFlagsForAddressComputation(N, FlagSet, DAG);

	// Compute type extension flags.
	if (const LoadSDNode *LN = dyn_cast<LoadSDNode>(Parent)) {
	switch (LN->getExtensionType()) {
	case ISD::SEXTLOAD:
	FlagSet \|= PPC::MOF_SExt;
	break;
	case ISD::EXTLOAD:
	case ISD::ZEXTLOAD:
	FlagSet \|= PPC::MOF_ZExt;
	break;
	case ISD::NON_EXTLOAD:
	FlagSet \|= PPC::MOF_NoExt;
	break;
	}
	} else
	FlagSet \|= PPC::MOF_NoExt;

	// For integers, no extension is the same as zero extension.
	// We set the extension mode to zero extension so we don't have
	// to add separate entries in AddrModesMap for loads and stores.
	if (MemVT.isScalarInteger() && (FlagSet & PPC::MOF_NoExt)) {
	FlagSet \|= PPC::MOF_ZExt;
	FlagSet &= ~PPC::MOF_NoExt;
	}

	// If we don't have prefixed instructions, 34-bit constants should be
	// treated as PPC::MOF_NotAddNorCst so they can match D-Forms.
	bool IsNonP1034BitConst =
	((PPC::MOF_RPlusSImm34 \| PPC::MOF_AddrIsSImm32 \| PPC::MOF_SubtargetP10) &
	FlagSet) == PPC::MOF_RPlusSImm34;
	if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::OR &&
	IsNonP1034BitConst)
	FlagSet \|= PPC::MOF_NotAddNorCst;

	return FlagSet;
	}

	/// SelectForceXFormMode - Given the specified address, force it to be
	/// represented as an indexed [r+r] operation (an XForm instruction).
	PPC::AddrMode PPCTargetLowering::SelectForceXFormMode(SDValue N, SDValue &Disp,
	SDValue &Base,
	SelectionDAG &DAG) const {

	PPC::AddrMode Mode = PPC::AM_XForm;
	int16_t ForceXFormImm = 0;
	if (provablyDisjointOr(DAG, N) &&
	!isIntS16Immediate(N.getOperand(1), ForceXFormImm)) {
	Disp = N.getOperand(0);
	Base = N.getOperand(1);
	return Mode;
	}

	// If the address is the result of an add, we will utilize the fact that the
	// address calculation includes an implicit add. However, we can reduce
	// register pressure if we do not materialize a constant just for use as the
	// index register. We only get rid of the add if it is not an add of a
	// value and a 16-bit signed constant and both have a single use.
	if (N.getOpcode() == ISD::ADD &&
	(!isIntS16Immediate(N.getOperand(1), ForceXFormImm) \|\|
	!N.getOperand(1).hasOneUse() \|\| !N.getOperand(0).hasOneUse())) {
	Disp = N.getOperand(0);
	Base = N.getOperand(1);
	return Mode;
	}

	// Otherwise, use R0 as the base register.
	Disp = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
	N.getValueType());
	Base = N;

	return Mode;
	}

	bool PPCTargetLowering::splitValueIntoRegisterParts(
	SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts,
	unsigned NumParts, MVT PartVT, std::optional<CallingConv::ID> CC) const {
	EVT ValVT = Val.getValueType();
	// If we are splitting a scalar integer into f64 parts (i.e. so they
	// can be placed into VFRC registers), we need to zero extend and
	// bitcast the values. This will ensure the value is placed into a
	// VSR using direct moves or stack operations as needed.
	if (PartVT == MVT::f64 &&
	(ValVT == MVT::i32 \|\| ValVT == MVT::i16 \|\| ValVT == MVT::i8)) {
	Val = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, Val);
	Val = DAG.getNode(ISD::BITCAST, DL, MVT::f64, Val);
	Parts[0] = Val;
	return true;
	}
	return false;
	}

	SDValue PPCTargetLowering::lowerToLibCall(const char *LibCallName, SDValue Op,
	SelectionDAG &DAG) const {
	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
	TargetLowering::CallLoweringInfo CLI(DAG);
	EVT RetVT = Op.getValueType();
	Type RetTy = RetVT.getTypeForEVT(DAG.getContext());
	SDValue Callee =
	DAG.getExternalSymbol(LibCallName, TLI.getPointerTy(DAG.getDataLayout()));
	bool SignExtend = TLI.shouldSignExtendTypeInLibCall(RetVT, false);
	TargetLowering::ArgListTy Args;
	TargetLowering::ArgListEntry Entry;
	for (const SDValue &N : Op->op_values()) {
	EVT ArgVT = N.getValueType();
	Type ArgTy = ArgVT.getTypeForEVT(DAG.getContext());
	Entry.Node = N;
	Entry.Ty = ArgTy;
	Entry.IsSExt = TLI.shouldSignExtendTypeInLibCall(ArgVT, SignExtend);
	Entry.IsZExt = !Entry.IsSExt;
	Args.push_back(Entry);
	}

	SDValue InChain = DAG.getEntryNode();
	SDValue TCChain = InChain;
	const Function &F = DAG.getMachineFunction().getFunction();
	bool isTailCall =
	TLI.isInTailCallPosition(DAG, Op.getNode(), TCChain) &&
	(RetTy == F.getReturnType() \|\| F.getReturnType()->isVoidTy());
	if (isTailCall)
	InChain = TCChain;
	CLI.setDebugLoc(SDLoc(Op))
	.setChain(InChain)
	.setLibCallee(CallingConv::C, RetTy, Callee, std::move(Args))
	.setTailCall(isTailCall)
	.setSExtResult(SignExtend)
	.setZExtResult(!SignExtend)
	.setIsPostTypeLegalization(true);
	return TLI.LowerCallTo(CLI).first;
	}

	SDValue PPCTargetLowering::lowerLibCallBasedOnType(
	const char LibCallFloatName, const char LibCallDoubleName, SDValue Op,
	SelectionDAG &DAG) const {
	if (Op.getValueType() == MVT::f32)
	return lowerToLibCall(LibCallFloatName, Op, DAG);

	if (Op.getValueType() == MVT::f64)
	return lowerToLibCall(LibCallDoubleName, Op, DAG);

	return SDValue();
	}

	bool PPCTargetLowering::isLowringToMASSFiniteSafe(SDValue Op) const {
	SDNodeFlags Flags = Op.getNode()->getFlags();
	return isLowringToMASSSafe(Op) && Flags.hasNoSignedZeros() &&
	Flags.hasNoNaNs() && Flags.hasNoInfs();
	}

	bool PPCTargetLowering::isLowringToMASSSafe(SDValue Op) const {
	return Op.getNode()->getFlags().hasApproximateFuncs();
	}

	bool PPCTargetLowering::isScalarMASSConversionEnabled() const {
	return getTargetMachine().Options.PPCGenScalarMASSEntries;
	}

	SDValue PPCTargetLowering::lowerLibCallBase(const char *LibCallDoubleName,
	const char *LibCallFloatName,
	const char *LibCallDoubleNameFinite,
	const char *LibCallFloatNameFinite,
	SDValue Op,
	SelectionDAG &DAG) const {
	if (!isScalarMASSConversionEnabled() \|\| !isLowringToMASSSafe(Op))
	return SDValue();

	if (!isLowringToMASSFiniteSafe(Op))
	return lowerLibCallBasedOnType(LibCallFloatName, LibCallDoubleName, Op,
	DAG);

	return lowerLibCallBasedOnType(LibCallFloatNameFinite,
	LibCallDoubleNameFinite, Op, DAG);
	}

	SDValue PPCTargetLowering::lowerPow(SDValue Op, SelectionDAG &DAG) const {
	return lowerLibCallBase("__xl_pow", "__xl_powf", "__xl_pow_finite",
	"__xl_powf_finite", Op, DAG);
	}

	SDValue PPCTargetLowering::lowerSin(SDValue Op, SelectionDAG &DAG) const {
	return lowerLibCallBase("__xl_sin", "__xl_sinf", "__xl_sin_finite",
	"__xl_sinf_finite", Op, DAG);
	}

	SDValue PPCTargetLowering::lowerCos(SDValue Op, SelectionDAG &DAG) const {
	return lowerLibCallBase("__xl_cos", "__xl_cosf", "__xl_cos_finite",
	"__xl_cosf_finite", Op, DAG);
	}

	SDValue PPCTargetLowering::lowerLog(SDValue Op, SelectionDAG &DAG) const {
	return lowerLibCallBase("__xl_log", "__xl_logf", "__xl_log_finite",
	"__xl_logf_finite", Op, DAG);
	}

	SDValue PPCTargetLowering::lowerLog10(SDValue Op, SelectionDAG &DAG) const {
	return lowerLibCallBase("__xl_log10", "__xl_log10f", "__xl_log10_finite",
	"__xl_log10f_finite", Op, DAG);
	}

	SDValue PPCTargetLowering::lowerExp(SDValue Op, SelectionDAG &DAG) const {
	return lowerLibCallBase("__xl_exp", "__xl_expf", "__xl_exp_finite",
	"__xl_expf_finite", Op, DAG);
	}

	// If we happen to match to an aligned D-Form, check if the Frame Index is
	// adequately aligned. If it is not, reset the mode to match to X-Form.
	static void setXFormForUnalignedFI(SDValue N, unsigned Flags,
	PPC::AddrMode &Mode) {
	if (!isa<FrameIndexSDNode>(N))
	return;
	if ((Mode == PPC::AM_DSForm && !(Flags & PPC::MOF_RPlusSImm16Mult4)) \|\|
	(Mode == PPC::AM_DQForm && !(Flags & PPC::MOF_RPlusSImm16Mult16)))
	Mode = PPC::AM_XForm;
	}

	/// SelectOptimalAddrMode - Based on a node N and it's Parent (a MemSDNode),
	/// compute the address flags of the node, get the optimal address mode based
	/// on the flags, and set the Base and Disp based on the address mode.
	PPC::AddrMode PPCTargetLowering::SelectOptimalAddrMode(const SDNode *Parent,
	SDValue N, SDValue &Disp,
	SDValue &Base,
	SelectionDAG &DAG,
	MaybeAlign Align) const {
	SDLoc DL(Parent);

	// Compute the address flags.
	unsigned Flags = computeMOFlags(Parent, N, DAG);

	// Get the optimal address mode based on the Flags.
	PPC::AddrMode Mode = getAddrModeForFlags(Flags);

	// If the address mode is DS-Form or DQ-Form, check if the FI is aligned.
	// Select an X-Form load if it is not.
	setXFormForUnalignedFI(N, Flags, Mode);

	// Set the mode to PC-Relative addressing mode if we have a valid PC-Rel node.
	if ((Mode == PPC::AM_XForm) && isPCRelNode(N)) {
	assert(Subtarget.isUsingPCRelativeCalls() &&
	"Must be using PC-Relative calls when a valid PC-Relative node is "
	"present!");
	Mode = PPC::AM_PCRel;
	}

	// Set Base and Disp accordingly depending on the address mode.
	switch (Mode) {
	case PPC::AM_DForm:
	case PPC::AM_DSForm:
	case PPC::AM_DQForm: {
	// This is a register plus a 16-bit immediate. The base will be the
	// register and the displacement will be the immediate unless it
	// isn't sufficiently aligned.
	if (Flags & PPC::MOF_RPlusSImm16) {
	SDValue Op0 = N.getOperand(0);
	SDValue Op1 = N.getOperand(1);
	int16_t Imm = Op1->getAsZExtVal();
	if (!Align \|\| isAligned(*Align, Imm)) {
	Disp = DAG.getTargetConstant(Imm, DL, N.getValueType());
	Base = Op0;
	if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Op0)) {
	Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
	fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
	}
	break;
	}
	}
	// This is a register plus the @lo relocation. The base is the register
	// and the displacement is the global address.
	else if (Flags & PPC::MOF_RPlusLo) {
	Disp = N.getOperand(1).getOperand(0); // The global address.
	assert(Disp.getOpcode() == ISD::TargetGlobalAddress \|\|
	Disp.getOpcode() == ISD::TargetGlobalTLSAddress \|\|
	Disp.getOpcode() == ISD::TargetConstantPool \|\|
	Disp.getOpcode() == ISD::TargetJumpTable);
	Base = N.getOperand(0);
	break;
	}
	// This is a constant address at most 32 bits. The base will be
	// zero or load-immediate-shifted and the displacement will be
	// the low 16 bits of the address.
	else if (Flags & PPC::MOF_AddrIsSImm32) {
	auto *CN = cast<ConstantSDNode>(N);
	EVT CNType = CN->getValueType(0);
	uint64_t CNImm = CN->getZExtValue();
	// If this address fits entirely in a 16-bit sext immediate field, codegen
	// this as "d, 0".
	int16_t Imm;
	if (isIntS16Immediate(CN, Imm) && (!Align \|\| isAligned(*Align, Imm))) {
	Disp = DAG.getTargetConstant(Imm, DL, CNType);
	Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
	CNType);
	break;
	}
	// Handle 32-bit sext immediate with LIS + Addr mode.
	if ((CNType == MVT::i32 \|\| isInt<32>(CNImm)) &&
	(!Align \|\| isAligned(*Align, CNImm))) {
	int32_t Addr = (int32_t)CNImm;
	// Otherwise, break this down into LIS + Disp.
	Disp = DAG.getTargetConstant((int16_t)Addr, DL, MVT::i32);
	Base =
	DAG.getTargetConstant((Addr - (int16_t)Addr) >> 16, DL, MVT::i32);
	uint32_t LIS = CNType == MVT::i32 ? PPC::LIS : PPC::LIS8;
	Base = SDValue(DAG.getMachineNode(LIS, DL, CNType, Base), 0);
	break;
	}
	}
	// Otherwise, the PPC:MOF_NotAdd flag is set. Load/Store is Non-foldable.
	Disp = DAG.getTargetConstant(0, DL, getPointerTy(DAG.getDataLayout()));
	if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N)) {
	Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
	fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
	} else
	Base = N;
	break;
	}
	case PPC::AM_PrefixDForm: {
	int64_t Imm34 = 0;
	unsigned Opcode = N.getOpcode();
	if (((Opcode == ISD::ADD) \|\| (Opcode == ISD::OR)) &&
	(isIntS34Immediate(N.getOperand(1), Imm34))) {
	// N is an Add/OR Node, and it's operand is a 34-bit signed immediate.
	Disp = DAG.getTargetConstant(Imm34, DL, N.getValueType());
	if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0)))
	Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
	else
	Base = N.getOperand(0);
	} else if (isIntS34Immediate(N, Imm34)) {
	// The address is a 34-bit signed immediate.
	Disp = DAG.getTargetConstant(Imm34, DL, N.getValueType());
	Base = DAG.getRegister(PPC::ZERO8, N.getValueType());
	}
	break;
	}
	case PPC::AM_PCRel: {
	// When selecting PC-Relative instructions, "Base" is not utilized as
	// we select the address as [PC+imm].
	Disp = N;
	break;
	}
	case PPC::AM_None:
	break;
	default: { // By default, X-Form is always available to be selected.
	// When a frame index is not aligned, we also match by XForm.
	FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N);
	Base = FI ? N : N.getOperand(1);
	Disp = FI ? DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
	N.getValueType())
	: N.getOperand(0);
	break;
	}
	}
	return Mode;
	}

	CCAssignFn *PPCTargetLowering::ccAssignFnForCall(CallingConv::ID CC,
	bool Return,
	bool IsVarArg) const {
	switch (CC) {
	case CallingConv::Cold:
	return (Return ? RetCC_PPC_Cold : CC_PPC64_ELF);
	default:
	return CC_PPC64_ELF;
	}
	}

	bool PPCTargetLowering::shouldInlineQuadwordAtomics() const {
	return Subtarget.isPPC64() && Subtarget.hasQuadwordAtomics();
	}

	TargetLowering::AtomicExpansionKind
	PPCTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
	unsigned Size = AI->getType()->getPrimitiveSizeInBits();
	if (shouldInlineQuadwordAtomics() && Size == 128)
	return AtomicExpansionKind::MaskedIntrinsic;

	switch (AI->getOperation()) {
	case AtomicRMWInst::UIncWrap:
	case AtomicRMWInst::UDecWrap:
	return AtomicExpansionKind::CmpXChg;
	default:
	return TargetLowering::shouldExpandAtomicRMWInIR(AI);
	}

	llvm_unreachable("unreachable atomicrmw operation");
	}

	TargetLowering::AtomicExpansionKind
	PPCTargetLowering::shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const {
	unsigned Size = AI->getNewValOperand()->getType()->getPrimitiveSizeInBits();
	if (shouldInlineQuadwordAtomics() && Size == 128)
	return AtomicExpansionKind::MaskedIntrinsic;
	return TargetLowering::shouldExpandAtomicCmpXchgInIR(AI);
	}

	static Intrinsic::ID
	getIntrinsicForAtomicRMWBinOp128(AtomicRMWInst::BinOp BinOp) {
	switch (BinOp) {
	default:
	llvm_unreachable("Unexpected AtomicRMW BinOp");
	case AtomicRMWInst::Xchg:
	return Intrinsic::ppc_atomicrmw_xchg_i128;
	case AtomicRMWInst::Add:
	return Intrinsic::ppc_atomicrmw_add_i128;
	case AtomicRMWInst::Sub:
	return Intrinsic::ppc_atomicrmw_sub_i128;
	case AtomicRMWInst::And:
	return Intrinsic::ppc_atomicrmw_and_i128;
	case AtomicRMWInst::Or:
	return Intrinsic::ppc_atomicrmw_or_i128;
	case AtomicRMWInst::Xor:
	return Intrinsic::ppc_atomicrmw_xor_i128;
	case AtomicRMWInst::Nand:
	return Intrinsic::ppc_atomicrmw_nand_i128;
	}
	}

	Value *PPCTargetLowering::emitMaskedAtomicRMWIntrinsic(
	IRBuilderBase &Builder, AtomicRMWInst AI, Value AlignedAddr, Value *Incr,
	Value Mask, Value ShiftAmt, AtomicOrdering Ord) const {
	assert(shouldInlineQuadwordAtomics() && "Only support quadword now");
	Module *M = Builder.GetInsertBlock()->getParent()->getParent();
	Type *ValTy = Incr->getType();
	assert(ValTy->getPrimitiveSizeInBits() == 128);
	Function *RMW = Intrinsic::getDeclaration(
	M, getIntrinsicForAtomicRMWBinOp128(AI->getOperation()));
	Type *Int64Ty = Type::getInt64Ty(M->getContext());
	Value *IncrLo = Builder.CreateTrunc(Incr, Int64Ty, "incr_lo");
	Value *IncrHi =
	Builder.CreateTrunc(Builder.CreateLShr(Incr, 64), Int64Ty, "incr_hi");
	Value *LoHi = Builder.CreateCall(RMW, {AlignedAddr, IncrLo, IncrHi});
	Value *Lo = Builder.CreateExtractValue(LoHi, 0, "lo");
	Value *Hi = Builder.CreateExtractValue(LoHi, 1, "hi");
	Lo = Builder.CreateZExt(Lo, ValTy, "lo64");
	Hi = Builder.CreateZExt(Hi, ValTy, "hi64");
	return Builder.CreateOr(
	Lo, Builder.CreateShl(Hi, ConstantInt::get(ValTy, 64)), "val64");
	}

	Value *PPCTargetLowering::emitMaskedAtomicCmpXchgIntrinsic(
	IRBuilderBase &Builder, AtomicCmpXchgInst CI, Value AlignedAddr,
	Value CmpVal, Value NewVal, Value *Mask, AtomicOrdering Ord) const {
	assert(shouldInlineQuadwordAtomics() && "Only support quadword now");
	Module *M = Builder.GetInsertBlock()->getParent()->getParent();
	Type *ValTy = CmpVal->getType();
	assert(ValTy->getPrimitiveSizeInBits() == 128);
	Function *IntCmpXchg =
	Intrinsic::getDeclaration(M, Intrinsic::ppc_cmpxchg_i128);
	Type *Int64Ty = Type::getInt64Ty(M->getContext());
	Value *CmpLo = Builder.CreateTrunc(CmpVal, Int64Ty, "cmp_lo");
	Value *CmpHi =
	Builder.CreateTrunc(Builder.CreateLShr(CmpVal, 64), Int64Ty, "cmp_hi");
	Value *NewLo = Builder.CreateTrunc(NewVal, Int64Ty, "new_lo");
	Value *NewHi =
	Builder.CreateTrunc(Builder.CreateLShr(NewVal, 64), Int64Ty, "new_hi");
	emitLeadingFence(Builder, CI, Ord);
	Value *LoHi =
	Builder.CreateCall(IntCmpXchg, {AlignedAddr, CmpLo, CmpHi, NewLo, NewHi});
	emitTrailingFence(Builder, CI, Ord);
	Value *Lo = Builder.CreateExtractValue(LoHi, 0, "lo");
	Value *Hi = Builder.CreateExtractValue(LoHi, 1, "hi");
	Lo = Builder.CreateZExt(Lo, ValTy, "lo64");
	Hi = Builder.CreateZExt(Hi, ValTy, "hi64");
	return Builder.CreateOr(
	Lo, Builder.CreateShl(Hi, ConstantInt::get(ValTy, 64)), "val64");
	}
	diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
	index ba3b4bd701d6..6c0cbeadebf4 100644
	--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
	+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
	@@ -1,3996 +1,3996 @@
	//===-- RISCVInstrInfo.cpp - RISC-V Instruction Information ------ C++ --===//
	//
	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
	// See https://llvm.org/LICENSE.txt for license information.
	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
	//
	//===----------------------------------------------------------------------===//
	//
	// This file contains the RISC-V implementation of the TargetInstrInfo class.
	//
	//===----------------------------------------------------------------------===//

	#include "RISCVInstrInfo.h"
	#include "MCTargetDesc/RISCVMatInt.h"
	#include "RISCV.h"
	#include "RISCVMachineFunctionInfo.h"
	#include "RISCVSubtarget.h"
	#include "RISCVTargetMachine.h"
	#include "llvm/ADT/STLExtras.h"
	#include "llvm/ADT/SmallVector.h"
	#include "llvm/Analysis/MemoryLocation.h"
	#include "llvm/Analysis/ValueTracking.h"
	#include "llvm/CodeGen/LiveIntervals.h"
	#include "llvm/CodeGen/LiveVariables.h"
	#include "llvm/CodeGen/MachineCombinerPattern.h"
	#include "llvm/CodeGen/MachineFunctionPass.h"
	#include "llvm/CodeGen/MachineInstrBuilder.h"
	#include "llvm/CodeGen/MachineRegisterInfo.h"
	#include "llvm/CodeGen/MachineTraceMetrics.h"
	#include "llvm/CodeGen/RegisterScavenging.h"
	#include "llvm/CodeGen/StackMaps.h"
	#include "llvm/IR/DebugInfoMetadata.h"
	#include "llvm/IR/Module.h"
	#include "llvm/MC/MCInstBuilder.h"
	#include "llvm/MC/TargetRegistry.h"
	#include "llvm/Support/ErrorHandling.h"

	using namespace llvm;

	#define GEN_CHECK_COMPRESS_INSTR
	#include "RISCVGenCompressInstEmitter.inc"

	#define GET_INSTRINFO_CTOR_DTOR
	#define GET_INSTRINFO_NAMED_OPS
	#include "RISCVGenInstrInfo.inc"

	static cl::opt<bool> PreferWholeRegisterMove(
	"riscv-prefer-whole-register-move", cl::init(false), cl::Hidden,
	cl::desc("Prefer whole register move for vector registers."));

	static cl::opt<MachineTraceStrategy> ForceMachineCombinerStrategy(
	"riscv-force-machine-combiner-strategy", cl::Hidden,
	cl::desc("Force machine combiner to use a specific strategy for machine "
	"trace metrics evaluation."),
	cl::init(MachineTraceStrategy::TS_NumStrategies),
	cl::values(clEnumValN(MachineTraceStrategy::TS_Local, "local",
	"Local strategy."),
	clEnumValN(MachineTraceStrategy::TS_MinInstrCount, "min-instr",
	"MinInstrCount strategy.")));

	namespace llvm::RISCVVPseudosTable {

	using namespace RISCV;

	#define GET_RISCVVPseudosTable_IMPL
	#include "RISCVGenSearchableTables.inc"

	} // namespace llvm::RISCVVPseudosTable

	namespace llvm::RISCV {

	#define GET_RISCVMaskedPseudosTable_IMPL
	#include "RISCVGenSearchableTables.inc"

	} // end namespace llvm::RISCV

	RISCVInstrInfo::RISCVInstrInfo(RISCVSubtarget &STI)
	: RISCVGenInstrInfo(RISCV::ADJCALLSTACKDOWN, RISCV::ADJCALLSTACKUP),
	STI(STI) {}

	MCInst RISCVInstrInfo::getNop() const {
	if (STI.hasStdExtCOrZca())
	return MCInstBuilder(RISCV::C_NOP);
	return MCInstBuilder(RISCV::ADDI)
	.addReg(RISCV::X0)
	.addReg(RISCV::X0)
	.addImm(0);
	}

	Register RISCVInstrInfo::isLoadFromStackSlot(const MachineInstr &MI,
	int &FrameIndex) const {
	unsigned Dummy;
	return isLoadFromStackSlot(MI, FrameIndex, Dummy);
	}

	Register RISCVInstrInfo::isLoadFromStackSlot(const MachineInstr &MI,
	int &FrameIndex,
	unsigned &MemBytes) const {
	switch (MI.getOpcode()) {
	default:
	return 0;
	case RISCV::LB:
	case RISCV::LBU:
	MemBytes = 1;
	break;
	case RISCV::LH:
	case RISCV::LHU:
	case RISCV::FLH:
	MemBytes = 2;
	break;
	case RISCV::LW:
	case RISCV::FLW:
	case RISCV::LWU:
	MemBytes = 4;
	break;
	case RISCV::LD:
	case RISCV::FLD:
	MemBytes = 8;
	break;
	}

	if (MI.getOperand(1).isFI() && MI.getOperand(2).isImm() &&
	MI.getOperand(2).getImm() == 0) {
	FrameIndex = MI.getOperand(1).getIndex();
	return MI.getOperand(0).getReg();
	}

	return 0;
	}

	Register RISCVInstrInfo::isStoreToStackSlot(const MachineInstr &MI,
	int &FrameIndex) const {
	unsigned Dummy;
	return isStoreToStackSlot(MI, FrameIndex, Dummy);
	}

	Register RISCVInstrInfo::isStoreToStackSlot(const MachineInstr &MI,
	int &FrameIndex,
	unsigned &MemBytes) const {
	switch (MI.getOpcode()) {
	default:
	return 0;
	case RISCV::SB:
	MemBytes = 1;
	break;
	case RISCV::SH:
	case RISCV::FSH:
	MemBytes = 2;
	break;
	case RISCV::SW:
	case RISCV::FSW:
	MemBytes = 4;
	break;
	case RISCV::SD:
	case RISCV::FSD:
	MemBytes = 8;
	break;
	}

	if (MI.getOperand(1).isFI() && MI.getOperand(2).isImm() &&
	MI.getOperand(2).getImm() == 0) {
	FrameIndex = MI.getOperand(1).getIndex();
	return MI.getOperand(0).getReg();
	}

	return 0;
	}

	bool RISCVInstrInfo::isReallyTriviallyReMaterializable(
	const MachineInstr &MI) const {
	if (RISCV::getRVVMCOpcode(MI.getOpcode()) == RISCV::VID_V &&
	MI.getOperand(1).isUndef() &&
	/* After RISCVInsertVSETVLI most pseudos will have implicit uses on vl and
	vtype. Make sure we only rematerialize before RISCVInsertVSETVLI
	i.e. -riscv-vsetvl-after-rvv-regalloc=true */
	!MI.hasRegisterImplicitUseOperand(RISCV::VTYPE))
	return true;
	return TargetInstrInfo::isReallyTriviallyReMaterializable(MI);
	}

	static bool forwardCopyWillClobberTuple(unsigned DstReg, unsigned SrcReg,
	unsigned NumRegs) {
	return DstReg > SrcReg && (DstReg - SrcReg) < NumRegs;
	}

	static bool isConvertibleToVMV_V_V(const RISCVSubtarget &STI,
	const MachineBasicBlock &MBB,
	MachineBasicBlock::const_iterator MBBI,
	MachineBasicBlock::const_iterator &DefMBBI,
	RISCVII::VLMUL LMul) {
	if (PreferWholeRegisterMove)
	return false;

	assert(MBBI->getOpcode() == TargetOpcode::COPY &&
	"Unexpected COPY instruction.");
	Register SrcReg = MBBI->getOperand(1).getReg();
	const TargetRegisterInfo *TRI = STI.getRegisterInfo();

	bool FoundDef = false;
	bool FirstVSetVLI = false;
	unsigned FirstSEW = 0;
	while (MBBI != MBB.begin()) {
	--MBBI;
	if (MBBI->isMetaInstruction())
	continue;

	if (MBBI->getOpcode() == RISCV::PseudoVSETVLI \|\|
	MBBI->getOpcode() == RISCV::PseudoVSETVLIX0 \|\|
	MBBI->getOpcode() == RISCV::PseudoVSETIVLI) {
	// There is a vsetvli between COPY and source define instruction.
	// vy = def_vop ... (producing instruction)
	// ...
	// vsetvli
	// ...
	// vx = COPY vy
	if (!FoundDef) {
	if (!FirstVSetVLI) {
	FirstVSetVLI = true;
	unsigned FirstVType = MBBI->getOperand(2).getImm();
	RISCVII::VLMUL FirstLMul = RISCVVType::getVLMUL(FirstVType);
	FirstSEW = RISCVVType::getSEW(FirstVType);
	// The first encountered vsetvli must have the same lmul as the
	// register class of COPY.
	if (FirstLMul != LMul)
	return false;
	}
	// Only permit `vsetvli x0, x0, vtype` between COPY and the source
	// define instruction.
	if (MBBI->getOperand(0).getReg() != RISCV::X0)
	return false;
	if (MBBI->getOperand(1).isImm())
	return false;
	if (MBBI->getOperand(1).getReg() != RISCV::X0)
	return false;
	continue;
	}

	// MBBI is the first vsetvli before the producing instruction.
	unsigned VType = MBBI->getOperand(2).getImm();
	// If there is a vsetvli between COPY and the producing instruction.
	if (FirstVSetVLI) {
	// If SEW is different, return false.
	if (RISCVVType::getSEW(VType) != FirstSEW)
	return false;
	}

	// If the vsetvli is tail undisturbed, keep the whole register move.
	if (!RISCVVType::isTailAgnostic(VType))
	return false;

	// The checking is conservative. We only have register classes for
	// LMUL = 1/2/4/8. We should be able to convert vmv1r.v to vmv.v.v
	// for fractional LMUL operations. However, we could not use the vsetvli
	// lmul for widening operations. The result of widening operation is
	// 2 x LMUL.
	return LMul == RISCVVType::getVLMUL(VType);
	} else if (MBBI->isInlineAsm() \|\| MBBI->isCall()) {
	return false;
	} else if (MBBI->getNumDefs()) {
	// Check all the instructions which will change VL.
	// For example, vleff has implicit def VL.
	if (MBBI->modifiesRegister(RISCV::VL, /TRI=/nullptr))
	return false;

	// Only converting whole register copies to vmv.v.v when the defining
	// value appears in the explicit operands.
	for (const MachineOperand &MO : MBBI->explicit_operands()) {
	if (!MO.isReg() \|\| !MO.isDef())
	continue;
	if (!FoundDef && TRI->regsOverlap(MO.getReg(), SrcReg)) {
	// We only permit the source of COPY has the same LMUL as the defined
	// operand.
	// There are cases we need to keep the whole register copy if the LMUL
	// is different.
	// For example,
	// $x0 = PseudoVSETIVLI 4, 73 // vsetivli zero, 4, e16,m2,ta,m
	// $v28m4 = PseudoVWADD_VV_M2 $v26m2, $v8m2
	// # The COPY may be created by vlmul_trunc intrinsic.
	// $v26m2 = COPY renamable $v28m2, implicit killed $v28m4
	//
	// After widening, the valid value will be 4 x e32 elements. If we
	// convert the COPY to vmv.v.v, it will only copy 4 x e16 elements.
	// FIXME: The COPY of subregister of Zvlsseg register will not be able
	// to convert to vmv.v.[v\|i] under the constraint.
	if (MO.getReg() != SrcReg)
	return false;

	// In widening reduction instructions with LMUL_1 input vector case,
	// only checking the LMUL is insufficient due to reduction result is
	// always LMUL_1.
	// For example,
	// $x11 = PseudoVSETIVLI 1, 64 // vsetivli a1, 1, e8, m1, ta, mu
	// $v8m1 = PseudoVWREDSUM_VS_M1 $v26, $v27
	// $v26 = COPY killed renamable $v8
	// After widening, The valid value will be 1 x e16 elements. If we
	// convert the COPY to vmv.v.v, it will only copy 1 x e8 elements.
	uint64_t TSFlags = MBBI->getDesc().TSFlags;
	if (RISCVII::isRVVWideningReduction(TSFlags))
	return false;

	// If the producing instruction does not depend on vsetvli, do not
	// convert COPY to vmv.v.v. For example, VL1R_V or PseudoVRELOAD.
	if (!RISCVII::hasSEWOp(TSFlags) \|\| !RISCVII::hasVLOp(TSFlags))
	return false;

	// Found the definition.
	FoundDef = true;
	DefMBBI = MBBI;
	break;
	}
	}
	}
	}

	return false;
	}

	void RISCVInstrInfo::copyPhysRegVector(
	MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
	const DebugLoc &DL, MCRegister DstReg, MCRegister SrcReg, bool KillSrc,
	const TargetRegisterClass *RegClass) const {
	const TargetRegisterInfo *TRI = STI.getRegisterInfo();
	RISCVII::VLMUL LMul = RISCVRI::getLMul(RegClass->TSFlags);
	unsigned NF = RISCVRI::getNF(RegClass->TSFlags);

	uint16_t SrcEncoding = TRI->getEncodingValue(SrcReg);
	uint16_t DstEncoding = TRI->getEncodingValue(DstReg);
	auto [LMulVal, Fractional] = RISCVVType::decodeVLMUL(LMul);
	assert(!Fractional && "It is impossible be fractional lmul here.");
	unsigned NumRegs = NF * LMulVal;
	bool ReversedCopy =
	forwardCopyWillClobberTuple(DstEncoding, SrcEncoding, NumRegs);
	if (ReversedCopy) {
	// If the src and dest overlap when copying a tuple, we need to copy the
	// registers in reverse.
	SrcEncoding += NumRegs - 1;
	DstEncoding += NumRegs - 1;
	}

	unsigned I = 0;
	auto GetCopyInfo = [&](uint16_t SrcEncoding, uint16_t DstEncoding)
	-> std::tuple<RISCVII::VLMUL, const TargetRegisterClass &, unsigned,
	unsigned, unsigned> {
	if (ReversedCopy) {
	// For reversed copying, if there are enough aligned registers(8/4/2), we
	// can do a larger copy(LMUL8/4/2).
	// Besides, we have already known that DstEncoding is larger than
	// SrcEncoding in forwardCopyWillClobberTuple, so the difference between
	// DstEncoding and SrcEncoding should be >= LMUL value we try to use to
	// avoid clobbering.
	uint16_t Diff = DstEncoding - SrcEncoding;
	if (I + 8 <= NumRegs && Diff >= 8 && SrcEncoding % 8 == 7 &&
	DstEncoding % 8 == 7)
	return {RISCVII::LMUL_8, RISCV::VRM8RegClass, RISCV::VMV8R_V,
	RISCV::PseudoVMV_V_V_M8, RISCV::PseudoVMV_V_I_M8};
	if (I + 4 <= NumRegs && Diff >= 4 && SrcEncoding % 4 == 3 &&
	DstEncoding % 4 == 3)
	return {RISCVII::LMUL_4, RISCV::VRM4RegClass, RISCV::VMV4R_V,
	RISCV::PseudoVMV_V_V_M4, RISCV::PseudoVMV_V_I_M4};
	if (I + 2 <= NumRegs && Diff >= 2 && SrcEncoding % 2 == 1 &&
	DstEncoding % 2 == 1)
	return {RISCVII::LMUL_2, RISCV::VRM2RegClass, RISCV::VMV2R_V,
	RISCV::PseudoVMV_V_V_M2, RISCV::PseudoVMV_V_I_M2};
	// Or we should do LMUL1 copying.
	return {RISCVII::LMUL_1, RISCV::VRRegClass, RISCV::VMV1R_V,
	RISCV::PseudoVMV_V_V_M1, RISCV::PseudoVMV_V_I_M1};
	}

	// For forward copying, if source register encoding and destination register
	// encoding are aligned to 8/4/2, we can do a LMUL8/4/2 copying.
	if (I + 8 <= NumRegs && SrcEncoding % 8 == 0 && DstEncoding % 8 == 0)
	return {RISCVII::LMUL_8, RISCV::VRM8RegClass, RISCV::VMV8R_V,
	RISCV::PseudoVMV_V_V_M8, RISCV::PseudoVMV_V_I_M8};
	if (I + 4 <= NumRegs && SrcEncoding % 4 == 0 && DstEncoding % 4 == 0)
	return {RISCVII::LMUL_4, RISCV::VRM4RegClass, RISCV::VMV4R_V,
	RISCV::PseudoVMV_V_V_M4, RISCV::PseudoVMV_V_I_M4};
	if (I + 2 <= NumRegs && SrcEncoding % 2 == 0 && DstEncoding % 2 == 0)
	return {RISCVII::LMUL_2, RISCV::VRM2RegClass, RISCV::VMV2R_V,
	RISCV::PseudoVMV_V_V_M2, RISCV::PseudoVMV_V_I_M2};
	// Or we should do LMUL1 copying.
	return {RISCVII::LMUL_1, RISCV::VRRegClass, RISCV::VMV1R_V,
	RISCV::PseudoVMV_V_V_M1, RISCV::PseudoVMV_V_I_M1};
	};
	auto FindRegWithEncoding = [TRI](const TargetRegisterClass &RegClass,
	uint16_t Encoding) {
	MCRegister Reg = RISCV::V0 + Encoding;
	if (&RegClass == &RISCV::VRRegClass)
	return Reg;
	return TRI->getMatchingSuperReg(Reg, RISCV::sub_vrm1_0, &RegClass);
	};
	while (I != NumRegs) {
	// For non-segment copying, we only do this once as the registers are always
	// aligned.
	// For segment copying, we may do this several times. If the registers are
	// aligned to larger LMUL, we can eliminate some copyings.
	auto [LMulCopied, RegClass, Opc, VVOpc, VIOpc] =
	GetCopyInfo(SrcEncoding, DstEncoding);
	auto [NumCopied, _] = RISCVVType::decodeVLMUL(LMulCopied);

	MachineBasicBlock::const_iterator DefMBBI;
	if (LMul == LMulCopied &&
	isConvertibleToVMV_V_V(STI, MBB, MBBI, DefMBBI, LMul)) {
	Opc = VVOpc;
	if (DefMBBI->getOpcode() == VIOpc)
	Opc = VIOpc;
	}

	// Emit actual copying.
	// For reversed copying, the encoding should be decreased.
	MCRegister ActualSrcReg = FindRegWithEncoding(
	RegClass, ReversedCopy ? (SrcEncoding - NumCopied + 1) : SrcEncoding);
	MCRegister ActualDstReg = FindRegWithEncoding(
	RegClass, ReversedCopy ? (DstEncoding - NumCopied + 1) : DstEncoding);

	auto MIB = BuildMI(MBB, MBBI, DL, get(Opc), ActualDstReg);
	bool UseVMV_V_I = RISCV::getRVVMCOpcode(Opc) == RISCV::VMV_V_I;
	bool UseVMV = UseVMV_V_I \|\| RISCV::getRVVMCOpcode(Opc) == RISCV::VMV_V_V;
	if (UseVMV)
	MIB.addReg(ActualDstReg, RegState::Undef);
	if (UseVMV_V_I)
	MIB = MIB.add(DefMBBI->getOperand(2));
	else
	MIB = MIB.addReg(ActualSrcReg, getKillRegState(KillSrc));
	if (UseVMV) {
	const MCInstrDesc &Desc = DefMBBI->getDesc();
	MIB.add(DefMBBI->getOperand(RISCVII::getVLOpNum(Desc))); // AVL
	MIB.add(DefMBBI->getOperand(RISCVII::getSEWOpNum(Desc))); // SEW
	MIB.addImm(0); // tu, mu
	MIB.addReg(RISCV::VL, RegState::Implicit);
	MIB.addReg(RISCV::VTYPE, RegState::Implicit);
	}

	// If we are copying reversely, we should decrease the encoding.
	SrcEncoding += (ReversedCopy ? -NumCopied : NumCopied);
	DstEncoding += (ReversedCopy ? -NumCopied : NumCopied);
	I += NumCopied;
	}
	}

	void RISCVInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
	MachineBasicBlock::iterator MBBI,
	const DebugLoc &DL, MCRegister DstReg,
	MCRegister SrcReg, bool KillSrc) const {
	const TargetRegisterInfo *TRI = STI.getRegisterInfo();

	if (RISCV::GPRRegClass.contains(DstReg, SrcReg)) {
	BuildMI(MBB, MBBI, DL, get(RISCV::ADDI), DstReg)
	.addReg(SrcReg, getKillRegState(KillSrc))
	.addImm(0);
	return;
	}

	if (RISCV::GPRPairRegClass.contains(DstReg, SrcReg)) {
	// Emit an ADDI for both parts of GPRPair.
	BuildMI(MBB, MBBI, DL, get(RISCV::ADDI),
	TRI->getSubReg(DstReg, RISCV::sub_gpr_even))
	.addReg(TRI->getSubReg(SrcReg, RISCV::sub_gpr_even),
	getKillRegState(KillSrc))
	.addImm(0);
	BuildMI(MBB, MBBI, DL, get(RISCV::ADDI),
	TRI->getSubReg(DstReg, RISCV::sub_gpr_odd))
	.addReg(TRI->getSubReg(SrcReg, RISCV::sub_gpr_odd),
	getKillRegState(KillSrc))
	.addImm(0);
	return;
	}

	// Handle copy from csr
	if (RISCV::VCSRRegClass.contains(SrcReg) &&
	RISCV::GPRRegClass.contains(DstReg)) {
	BuildMI(MBB, MBBI, DL, get(RISCV::CSRRS), DstReg)
	.addImm(RISCVSysReg::lookupSysRegByName(TRI->getName(SrcReg))->Encoding)
	.addReg(RISCV::X0);
	return;
	}

	if (RISCV::FPR16RegClass.contains(DstReg, SrcReg)) {
	unsigned Opc;
	if (STI.hasStdExtZfh()) {
	Opc = RISCV::FSGNJ_H;
	} else {
	assert(STI.hasStdExtF() &&
	(STI.hasStdExtZfhmin() \|\| STI.hasStdExtZfbfmin()) &&
	"Unexpected extensions");
	// Zfhmin/Zfbfmin doesn't have FSGNJ_H, replace FSGNJ_H with FSGNJ_S.
	DstReg = TRI->getMatchingSuperReg(DstReg, RISCV::sub_16,
	&RISCV::FPR32RegClass);
	SrcReg = TRI->getMatchingSuperReg(SrcReg, RISCV::sub_16,
	&RISCV::FPR32RegClass);
	Opc = RISCV::FSGNJ_S;
	}
	BuildMI(MBB, MBBI, DL, get(Opc), DstReg)
	.addReg(SrcReg, getKillRegState(KillSrc))
	.addReg(SrcReg, getKillRegState(KillSrc));
	return;
	}

	if (RISCV::FPR32RegClass.contains(DstReg, SrcReg)) {
	BuildMI(MBB, MBBI, DL, get(RISCV::FSGNJ_S), DstReg)
	.addReg(SrcReg, getKillRegState(KillSrc))
	.addReg(SrcReg, getKillRegState(KillSrc));
	return;
	}

	if (RISCV::FPR64RegClass.contains(DstReg, SrcReg)) {
	BuildMI(MBB, MBBI, DL, get(RISCV::FSGNJ_D), DstReg)
	.addReg(SrcReg, getKillRegState(KillSrc))
	.addReg(SrcReg, getKillRegState(KillSrc));
	return;
	}

	if (RISCV::FPR32RegClass.contains(DstReg) &&
	RISCV::GPRRegClass.contains(SrcReg)) {
	BuildMI(MBB, MBBI, DL, get(RISCV::FMV_W_X), DstReg)
	.addReg(SrcReg, getKillRegState(KillSrc));
	return;
	}

	if (RISCV::GPRRegClass.contains(DstReg) &&
	RISCV::FPR32RegClass.contains(SrcReg)) {
	BuildMI(MBB, MBBI, DL, get(RISCV::FMV_X_W), DstReg)
	.addReg(SrcReg, getKillRegState(KillSrc));
	return;
	}

	if (RISCV::FPR64RegClass.contains(DstReg) &&
	RISCV::GPRRegClass.contains(SrcReg)) {
	assert(STI.getXLen() == 64 && "Unexpected GPR size");
	BuildMI(MBB, MBBI, DL, get(RISCV::FMV_D_X), DstReg)
	.addReg(SrcReg, getKillRegState(KillSrc));
	return;
	}

	if (RISCV::GPRRegClass.contains(DstReg) &&
	RISCV::FPR64RegClass.contains(SrcReg)) {
	assert(STI.getXLen() == 64 && "Unexpected GPR size");
	BuildMI(MBB, MBBI, DL, get(RISCV::FMV_X_D), DstReg)
	.addReg(SrcReg, getKillRegState(KillSrc));
	return;
	}

	// VR->VR copies.
	static const TargetRegisterClass *RVVRegClasses[] = {
	&RISCV::VRRegClass, &RISCV::VRM2RegClass, &RISCV::VRM4RegClass,
	&RISCV::VRM8RegClass, &RISCV::VRN2M1RegClass, &RISCV::VRN2M2RegClass,
	&RISCV::VRN2M4RegClass, &RISCV::VRN3M1RegClass, &RISCV::VRN3M2RegClass,
	&RISCV::VRN4M1RegClass, &RISCV::VRN4M2RegClass, &RISCV::VRN5M1RegClass,
	&RISCV::VRN6M1RegClass, &RISCV::VRN7M1RegClass, &RISCV::VRN8M1RegClass};
	for (const auto &RegClass : RVVRegClasses) {
	if (RegClass->contains(DstReg, SrcReg)) {
	copyPhysRegVector(MBB, MBBI, DL, DstReg, SrcReg, KillSrc, RegClass);
	return;
	}
	}

	llvm_unreachable("Impossible reg-to-reg copy");
	}

	void RISCVInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
	MachineBasicBlock::iterator I,
	Register SrcReg, bool IsKill, int FI,
	const TargetRegisterClass *RC,
	const TargetRegisterInfo *TRI,
	Register VReg) const {
	MachineFunction *MF = MBB.getParent();
	MachineFrameInfo &MFI = MF->getFrameInfo();

	unsigned Opcode;
	bool IsScalableVector = true;
	if (RISCV::GPRRegClass.hasSubClassEq(RC)) {
	Opcode = TRI->getRegSizeInBits(RISCV::GPRRegClass) == 32 ?
	RISCV::SW : RISCV::SD;
	IsScalableVector = false;
	} else if (RISCV::GPRPairRegClass.hasSubClassEq(RC)) {
	Opcode = RISCV::PseudoRV32ZdinxSD;
	IsScalableVector = false;
	} else if (RISCV::FPR16RegClass.hasSubClassEq(RC)) {
	Opcode = RISCV::FSH;
	IsScalableVector = false;
	} else if (RISCV::FPR32RegClass.hasSubClassEq(RC)) {
	Opcode = RISCV::FSW;
	IsScalableVector = false;
	} else if (RISCV::FPR64RegClass.hasSubClassEq(RC)) {
	Opcode = RISCV::FSD;
	IsScalableVector = false;
	} else if (RISCV::VRRegClass.hasSubClassEq(RC)) {
	Opcode = RISCV::VS1R_V;
	} else if (RISCV::VRM2RegClass.hasSubClassEq(RC)) {
	Opcode = RISCV::VS2R_V;
	} else if (RISCV::VRM4RegClass.hasSubClassEq(RC)) {
	Opcode = RISCV::VS4R_V;
	} else if (RISCV::VRM8RegClass.hasSubClassEq(RC)) {
	Opcode = RISCV::VS8R_V;
	} else if (RISCV::VRN2M1RegClass.hasSubClassEq(RC))
	Opcode = RISCV::PseudoVSPILL2_M1;
	else if (RISCV::VRN2M2RegClass.hasSubClassEq(RC))
	Opcode = RISCV::PseudoVSPILL2_M2;
	else if (RISCV::VRN2M4RegClass.hasSubClassEq(RC))
	Opcode = RISCV::PseudoVSPILL2_M4;
	else if (RISCV::VRN3M1RegClass.hasSubClassEq(RC))
	Opcode = RISCV::PseudoVSPILL3_M1;
	else if (RISCV::VRN3M2RegClass.hasSubClassEq(RC))
	Opcode = RISCV::PseudoVSPILL3_M2;
	else if (RISCV::VRN4M1RegClass.hasSubClassEq(RC))
	Opcode = RISCV::PseudoVSPILL4_M1;
	else if (RISCV::VRN4M2RegClass.hasSubClassEq(RC))
	Opcode = RISCV::PseudoVSPILL4_M2;
	else if (RISCV::VRN5M1RegClass.hasSubClassEq(RC))
	Opcode = RISCV::PseudoVSPILL5_M1;
	else if (RISCV::VRN6M1RegClass.hasSubClassEq(RC))
	Opcode = RISCV::PseudoVSPILL6_M1;
	else if (RISCV::VRN7M1RegClass.hasSubClassEq(RC))
	Opcode = RISCV::PseudoVSPILL7_M1;
	else if (RISCV::VRN8M1RegClass.hasSubClassEq(RC))
	Opcode = RISCV::PseudoVSPILL8_M1;
	else
	llvm_unreachable("Can't store this register to stack slot");

	if (IsScalableVector) {
	MachineMemOperand *MMO = MF->getMachineMemOperand(
	MachinePointerInfo::getFixedStack(*MF, FI), MachineMemOperand::MOStore,
	LocationSize::beforeOrAfterPointer(), MFI.getObjectAlign(FI));

	MFI.setStackID(FI, TargetStackID::ScalableVector);
	BuildMI(MBB, I, DebugLoc(), get(Opcode))
	.addReg(SrcReg, getKillRegState(IsKill))
	.addFrameIndex(FI)
	.addMemOperand(MMO);
	} else {
	MachineMemOperand *MMO = MF->getMachineMemOperand(
	MachinePointerInfo::getFixedStack(*MF, FI), MachineMemOperand::MOStore,
	MFI.getObjectSize(FI), MFI.getObjectAlign(FI));

	BuildMI(MBB, I, DebugLoc(), get(Opcode))
	.addReg(SrcReg, getKillRegState(IsKill))
	.addFrameIndex(FI)
	.addImm(0)
	.addMemOperand(MMO);
	}
	}

	void RISCVInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
	MachineBasicBlock::iterator I,
	Register DstReg, int FI,
	const TargetRegisterClass *RC,
	const TargetRegisterInfo *TRI,
	Register VReg) const {
	MachineFunction *MF = MBB.getParent();
	MachineFrameInfo &MFI = MF->getFrameInfo();

	unsigned Opcode;
	bool IsScalableVector = true;
	if (RISCV::GPRRegClass.hasSubClassEq(RC)) {
	Opcode = TRI->getRegSizeInBits(RISCV::GPRRegClass) == 32 ?
	RISCV::LW : RISCV::LD;
	IsScalableVector = false;
	} else if (RISCV::GPRPairRegClass.hasSubClassEq(RC)) {
	Opcode = RISCV::PseudoRV32ZdinxLD;
	IsScalableVector = false;
	} else if (RISCV::FPR16RegClass.hasSubClassEq(RC)) {
	Opcode = RISCV::FLH;
	IsScalableVector = false;
	} else if (RISCV::FPR32RegClass.hasSubClassEq(RC)) {
	Opcode = RISCV::FLW;
	IsScalableVector = false;
	} else if (RISCV::FPR64RegClass.hasSubClassEq(RC)) {
	Opcode = RISCV::FLD;
	IsScalableVector = false;
	} else if (RISCV::VRRegClass.hasSubClassEq(RC)) {
	Opcode = RISCV::VL1RE8_V;
	} else if (RISCV::VRM2RegClass.hasSubClassEq(RC)) {
	Opcode = RISCV::VL2RE8_V;
	} else if (RISCV::VRM4RegClass.hasSubClassEq(RC)) {
	Opcode = RISCV::VL4RE8_V;
	} else if (RISCV::VRM8RegClass.hasSubClassEq(RC)) {
	Opcode = RISCV::VL8RE8_V;
	} else if (RISCV::VRN2M1RegClass.hasSubClassEq(RC))
	Opcode = RISCV::PseudoVRELOAD2_M1;
	else if (RISCV::VRN2M2RegClass.hasSubClassEq(RC))
	Opcode = RISCV::PseudoVRELOAD2_M2;
	else if (RISCV::VRN2M4RegClass.hasSubClassEq(RC))
	Opcode = RISCV::PseudoVRELOAD2_M4;
	else if (RISCV::VRN3M1RegClass.hasSubClassEq(RC))
	Opcode = RISCV::PseudoVRELOAD3_M1;
	else if (RISCV::VRN3M2RegClass.hasSubClassEq(RC))
	Opcode = RISCV::PseudoVRELOAD3_M2;
	else if (RISCV::VRN4M1RegClass.hasSubClassEq(RC))
	Opcode = RISCV::PseudoVRELOAD4_M1;
	else if (RISCV::VRN4M2RegClass.hasSubClassEq(RC))
	Opcode = RISCV::PseudoVRELOAD4_M2;
	else if (RISCV::VRN5M1RegClass.hasSubClassEq(RC))
	Opcode = RISCV::PseudoVRELOAD5_M1;
	else if (RISCV::VRN6M1RegClass.hasSubClassEq(RC))
	Opcode = RISCV::PseudoVRELOAD6_M1;
	else if (RISCV::VRN7M1RegClass.hasSubClassEq(RC))
	Opcode = RISCV::PseudoVRELOAD7_M1;
	else if (RISCV::VRN8M1RegClass.hasSubClassEq(RC))
	Opcode = RISCV::PseudoVRELOAD8_M1;
	else
	llvm_unreachable("Can't load this register from stack slot");

	if (IsScalableVector) {
	MachineMemOperand *MMO = MF->getMachineMemOperand(
	MachinePointerInfo::getFixedStack(*MF, FI), MachineMemOperand::MOLoad,
	LocationSize::beforeOrAfterPointer(), MFI.getObjectAlign(FI));

	MFI.setStackID(FI, TargetStackID::ScalableVector);
	BuildMI(MBB, I, DebugLoc(), get(Opcode), DstReg)
	.addFrameIndex(FI)
	.addMemOperand(MMO);
	} else {
	MachineMemOperand *MMO = MF->getMachineMemOperand(
	MachinePointerInfo::getFixedStack(*MF, FI), MachineMemOperand::MOLoad,
	MFI.getObjectSize(FI), MFI.getObjectAlign(FI));

	BuildMI(MBB, I, DebugLoc(), get(Opcode), DstReg)
	.addFrameIndex(FI)
	.addImm(0)
	.addMemOperand(MMO);
	}
	}

	MachineInstr *RISCVInstrInfo::foldMemoryOperandImpl(
	MachineFunction &MF, MachineInstr &MI, ArrayRef<unsigned> Ops,
	MachineBasicBlock::iterator InsertPt, int FrameIndex, LiveIntervals *LIS,
	VirtRegMap *VRM) const {
	const MachineFrameInfo &MFI = MF.getFrameInfo();

	// The below optimizations narrow the load so they are only valid for little
	// endian.
	// TODO: Support big endian by adding an offset into the frame object?
	if (MF.getDataLayout().isBigEndian())
	return nullptr;

	// Fold load from stack followed by sext.b/sext.h/sext.w/zext.b/zext.h/zext.w.
	if (Ops.size() != 1 \|\| Ops[0] != 1)
	return nullptr;

	unsigned LoadOpc;
	switch (MI.getOpcode()) {
	default:
	if (RISCV::isSEXT_W(MI)) {
	LoadOpc = RISCV::LW;
	break;
	}
	if (RISCV::isZEXT_W(MI)) {
	LoadOpc = RISCV::LWU;
	break;
	}
	if (RISCV::isZEXT_B(MI)) {
	LoadOpc = RISCV::LBU;
	break;
	}
	return nullptr;
	case RISCV::SEXT_H:
	LoadOpc = RISCV::LH;
	break;
	case RISCV::SEXT_B:
	LoadOpc = RISCV::LB;
	break;
	case RISCV::ZEXT_H_RV32:
	case RISCV::ZEXT_H_RV64:
	LoadOpc = RISCV::LHU;
	break;
	}

	MachineMemOperand *MMO = MF.getMachineMemOperand(
	MachinePointerInfo::getFixedStack(MF, FrameIndex),
	MachineMemOperand::MOLoad, MFI.getObjectSize(FrameIndex),
	MFI.getObjectAlign(FrameIndex));

	Register DstReg = MI.getOperand(0).getReg();
	return BuildMI(*MI.getParent(), InsertPt, MI.getDebugLoc(), get(LoadOpc),
	DstReg)
	.addFrameIndex(FrameIndex)
	.addImm(0)
	.addMemOperand(MMO);
	}

	void RISCVInstrInfo::movImm(MachineBasicBlock &MBB,
	MachineBasicBlock::iterator MBBI,
	const DebugLoc &DL, Register DstReg, uint64_t Val,
	MachineInstr::MIFlag Flag, bool DstRenamable,
	bool DstIsDead) const {
	Register SrcReg = RISCV::X0;

	// For RV32, allow a sign or unsigned 32 bit value.
	if (!STI.is64Bit() && !isInt<32>(Val)) {
	// If have a uimm32 it will still fit in a register so we can allow it.
	if (!isUInt<32>(Val))
	report_fatal_error("Should only materialize 32-bit constants for RV32");

	// Sign extend for generateInstSeq.
	Val = SignExtend64<32>(Val);
	}

	RISCVMatInt::InstSeq Seq = RISCVMatInt::generateInstSeq(Val, STI);
	assert(!Seq.empty());

	bool SrcRenamable = false;
	unsigned Num = 0;

	for (const RISCVMatInt::Inst &Inst : Seq) {
	bool LastItem = ++Num == Seq.size();
	unsigned DstRegState = getDeadRegState(DstIsDead && LastItem) \|
	getRenamableRegState(DstRenamable);
	unsigned SrcRegState = getKillRegState(SrcReg != RISCV::X0) \|
	getRenamableRegState(SrcRenamable);
	switch (Inst.getOpndKind()) {
	case RISCVMatInt::Imm:
	BuildMI(MBB, MBBI, DL, get(Inst.getOpcode()))
	.addReg(DstReg, RegState::Define \| DstRegState)
	.addImm(Inst.getImm())
	.setMIFlag(Flag);
	break;
	case RISCVMatInt::RegX0:
	BuildMI(MBB, MBBI, DL, get(Inst.getOpcode()))
	.addReg(DstReg, RegState::Define \| DstRegState)
	.addReg(SrcReg, SrcRegState)
	.addReg(RISCV::X0)
	.setMIFlag(Flag);
	break;
	case RISCVMatInt::RegReg:
	BuildMI(MBB, MBBI, DL, get(Inst.getOpcode()))
	.addReg(DstReg, RegState::Define \| DstRegState)
	.addReg(SrcReg, SrcRegState)
	.addReg(SrcReg, SrcRegState)
	.setMIFlag(Flag);
	break;
	case RISCVMatInt::RegImm:
	BuildMI(MBB, MBBI, DL, get(Inst.getOpcode()))
	.addReg(DstReg, RegState::Define \| DstRegState)
	.addReg(SrcReg, SrcRegState)
	.addImm(Inst.getImm())
	.setMIFlag(Flag);
	break;
	}

	// Only the first instruction has X0 as its source.
	SrcReg = DstReg;
	SrcRenamable = DstRenamable;
	}
	}

	static RISCVCC::CondCode getCondFromBranchOpc(unsigned Opc) {
	switch (Opc) {
	default:
	return RISCVCC::COND_INVALID;
	case RISCV::CV_BEQIMM:
	return RISCVCC::COND_EQ;
	case RISCV::CV_BNEIMM:
	return RISCVCC::COND_NE;
	case RISCV::BEQ:
	return RISCVCC::COND_EQ;
	case RISCV::BNE:
	return RISCVCC::COND_NE;
	case RISCV::BLT:
	return RISCVCC::COND_LT;
	case RISCV::BGE:
	return RISCVCC::COND_GE;
	case RISCV::BLTU:
	return RISCVCC::COND_LTU;
	case RISCV::BGEU:
	return RISCVCC::COND_GEU;
	}
	}

	// The contents of values added to Cond are not examined outside of
	// RISCVInstrInfo, giving us flexibility in what to push to it. For RISCV, we
	// push BranchOpcode, Reg1, Reg2.
	static void parseCondBranch(MachineInstr &LastInst, MachineBasicBlock *&Target,
	SmallVectorImpl<MachineOperand> &Cond) {
	// Block ends with fall-through condbranch.
	assert(LastInst.getDesc().isConditionalBranch() &&
	"Unknown conditional branch");
	Target = LastInst.getOperand(2).getMBB();
	unsigned CC = getCondFromBranchOpc(LastInst.getOpcode());
	Cond.push_back(MachineOperand::CreateImm(CC));
	Cond.push_back(LastInst.getOperand(0));
	Cond.push_back(LastInst.getOperand(1));
	}

	unsigned RISCVCC::getBrCond(RISCVCC::CondCode CC, bool Imm) {
	switch (CC) {
	default:
	llvm_unreachable("Unknown condition code!");
	case RISCVCC::COND_EQ:
	return Imm ? RISCV::CV_BEQIMM : RISCV::BEQ;
	case RISCVCC::COND_NE:
	return Imm ? RISCV::CV_BNEIMM : RISCV::BNE;
	case RISCVCC::COND_LT:
	return RISCV::BLT;
	case RISCVCC::COND_GE:
	return RISCV::BGE;
	case RISCVCC::COND_LTU:
	return RISCV::BLTU;
	case RISCVCC::COND_GEU:
	return RISCV::BGEU;
	}
	}

	const MCInstrDesc &RISCVInstrInfo::getBrCond(RISCVCC::CondCode CC,
	bool Imm) const {
	return get(RISCVCC::getBrCond(CC, Imm));
	}

	RISCVCC::CondCode RISCVCC::getOppositeBranchCondition(RISCVCC::CondCode CC) {
	switch (CC) {
	default:
	llvm_unreachable("Unrecognized conditional branch");
	case RISCVCC::COND_EQ:
	return RISCVCC::COND_NE;
	case RISCVCC::COND_NE:
	return RISCVCC::COND_EQ;
	case RISCVCC::COND_LT:
	return RISCVCC::COND_GE;
	case RISCVCC::COND_GE:
	return RISCVCC::COND_LT;
	case RISCVCC::COND_LTU:
	return RISCVCC::COND_GEU;
	case RISCVCC::COND_GEU:
	return RISCVCC::COND_LTU;
	}
	}

	bool RISCVInstrInfo::analyzeBranch(MachineBasicBlock &MBB,
	MachineBasicBlock *&TBB,
	MachineBasicBlock *&FBB,
	SmallVectorImpl<MachineOperand> &Cond,
	bool AllowModify) const {
	TBB = FBB = nullptr;
	Cond.clear();

	// If the block has no terminators, it just falls into the block after it.
	MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr();
	if (I == MBB.end() \|\| !isUnpredicatedTerminator(*I))
	return false;

	// Count the number of terminators and find the first unconditional or
	// indirect branch.
	MachineBasicBlock::iterator FirstUncondOrIndirectBr = MBB.end();
	int NumTerminators = 0;
	for (auto J = I.getReverse(); J != MBB.rend() && isUnpredicatedTerminator(*J);
	J++) {
	NumTerminators++;
	if (J->getDesc().isUnconditionalBranch() \|\|
	J->getDesc().isIndirectBranch()) {
	FirstUncondOrIndirectBr = J.getReverse();
	}
	}

	// If AllowModify is true, we can erase any terminators after
	// FirstUncondOrIndirectBR.
	if (AllowModify && FirstUncondOrIndirectBr != MBB.end()) {
	while (std::next(FirstUncondOrIndirectBr) != MBB.end()) {
	std::next(FirstUncondOrIndirectBr)->eraseFromParent();
	NumTerminators--;
	}
	I = FirstUncondOrIndirectBr;
	}

	// We can't handle blocks that end in an indirect branch.
	if (I->getDesc().isIndirectBranch())
	return true;

	// We can't handle Generic branch opcodes from Global ISel.
	if (I->isPreISelOpcode())
	return true;

	// We can't handle blocks with more than 2 terminators.
	if (NumTerminators > 2)
	return true;

	// Handle a single unconditional branch.
	if (NumTerminators == 1 && I->getDesc().isUnconditionalBranch()) {
	TBB = getBranchDestBlock(*I);
	return false;
	}

	// Handle a single conditional branch.
	if (NumTerminators == 1 && I->getDesc().isConditionalBranch()) {
	parseCondBranch(*I, TBB, Cond);
	return false;
	}

	// Handle a conditional branch followed by an unconditional branch.
	if (NumTerminators == 2 && std::prev(I)->getDesc().isConditionalBranch() &&
	I->getDesc().isUnconditionalBranch()) {
	parseCondBranch(*std::prev(I), TBB, Cond);
	FBB = getBranchDestBlock(*I);
	return false;
	}

	// Otherwise, we can't handle this.
	return true;
	}

	unsigned RISCVInstrInfo::removeBranch(MachineBasicBlock &MBB,
	int *BytesRemoved) const {
	if (BytesRemoved)
	*BytesRemoved = 0;
	MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr();
	if (I == MBB.end())
	return 0;

	if (!I->getDesc().isUnconditionalBranch() &&
	!I->getDesc().isConditionalBranch())
	return 0;

	// Remove the branch.
	if (BytesRemoved)
	BytesRemoved += getInstSizeInBytes(I);
	I->eraseFromParent();

	I = MBB.end();

	if (I == MBB.begin())
	return 1;
	--I;
	if (!I->getDesc().isConditionalBranch())
	return 1;

	// Remove the branch.
	if (BytesRemoved)
	BytesRemoved += getInstSizeInBytes(I);
	I->eraseFromParent();
	return 2;
	}

	// Inserts a branch into the end of the specific MachineBasicBlock, returning
	// the number of instructions inserted.
	unsigned RISCVInstrInfo::insertBranch(
	MachineBasicBlock &MBB, MachineBasicBlock TBB, MachineBasicBlock FBB,
	ArrayRef<MachineOperand> Cond, const DebugLoc &DL, int *BytesAdded) const {
	if (BytesAdded)
	*BytesAdded = 0;

	// Shouldn't be a fall through.
	assert(TBB && "insertBranch must not be told to insert a fallthrough");
	assert((Cond.size() == 3 \|\| Cond.size() == 0) &&
	"RISC-V branch conditions have two components!");

	// Unconditional branch.
	if (Cond.empty()) {
	MachineInstr &MI = *BuildMI(&MBB, DL, get(RISCV::PseudoBR)).addMBB(TBB);
	if (BytesAdded)
	*BytesAdded += getInstSizeInBytes(MI);
	return 1;
	}

	// Either a one or two-way conditional branch.
	auto CC = static_cast<RISCVCC::CondCode>(Cond[0].getImm());
	MachineInstr &CondMI = *BuildMI(&MBB, DL, getBrCond(CC, Cond[2].isImm()))
	.add(Cond[1])
	.add(Cond[2])
	.addMBB(TBB);
	if (BytesAdded)
	*BytesAdded += getInstSizeInBytes(CondMI);

	// One-way conditional branch.
	if (!FBB)
	return 1;

	// Two-way conditional branch.
	MachineInstr &MI = *BuildMI(&MBB, DL, get(RISCV::PseudoBR)).addMBB(FBB);
	if (BytesAdded)
	*BytesAdded += getInstSizeInBytes(MI);
	return 2;
	}

	void RISCVInstrInfo::insertIndirectBranch(MachineBasicBlock &MBB,
	MachineBasicBlock &DestBB,
	MachineBasicBlock &RestoreBB,
	const DebugLoc &DL, int64_t BrOffset,
	RegScavenger *RS) const {
	assert(RS && "RegScavenger required for long branching");
	assert(MBB.empty() &&
	"new block should be inserted for expanding unconditional branch");
	assert(MBB.pred_size() == 1);
	assert(RestoreBB.empty() &&
	"restore block should be inserted for restoring clobbered registers");

	MachineFunction *MF = MBB.getParent();
	MachineRegisterInfo &MRI = MF->getRegInfo();
	RISCVMachineFunctionInfo *RVFI = MF->getInfo<RISCVMachineFunctionInfo>();
	const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();

	if (!isInt<32>(BrOffset))
	report_fatal_error(
	"Branch offsets outside of the signed 32-bit range not supported");

	// FIXME: A virtual register must be used initially, as the register
	// scavenger won't work with empty blocks (SIInstrInfo::insertIndirectBranch
	// uses the same workaround).
	Register ScratchReg = MRI.createVirtualRegister(&RISCV::GPRJALRRegClass);
	auto II = MBB.end();
	// We may also update the jump target to RestoreBB later.
	MachineInstr &MI = *BuildMI(MBB, II, DL, get(RISCV::PseudoJump))
	.addReg(ScratchReg, RegState::Define \| RegState::Dead)
	.addMBB(&DestBB, RISCVII::MO_CALL);

	RS->enterBasicBlockEnd(MBB);
	Register TmpGPR =
	RS->scavengeRegisterBackwards(RISCV::GPRRegClass, MI.getIterator(),
	/RestoreAfter=/false, /SpAdj=/0,
	/AllowSpill=/false);
	if (TmpGPR != RISCV::NoRegister)
	RS->setRegUsed(TmpGPR);
	else {
	// The case when there is no scavenged register needs special handling.

	// Pick s11 because it doesn't make a difference.
	TmpGPR = RISCV::X27;

	int FrameIndex = RVFI->getBranchRelaxationScratchFrameIndex();
	if (FrameIndex == -1)
	report_fatal_error("underestimated function size");

	storeRegToStackSlot(MBB, MI, TmpGPR, /IsKill=/true, FrameIndex,
	&RISCV::GPRRegClass, TRI, Register());
	TRI->eliminateFrameIndex(std::prev(MI.getIterator()),
	/SpAdj=/0, /FIOperandNum=/1);

	MI.getOperand(1).setMBB(&RestoreBB);

	loadRegFromStackSlot(RestoreBB, RestoreBB.end(), TmpGPR, FrameIndex,
	&RISCV::GPRRegClass, TRI, Register());
	TRI->eliminateFrameIndex(RestoreBB.back(),
	/SpAdj=/0, /FIOperandNum=/1);
	}

	MRI.replaceRegWith(ScratchReg, TmpGPR);
	MRI.clearVirtRegs();
	}

	bool RISCVInstrInfo::reverseBranchCondition(
	SmallVectorImpl<MachineOperand> &Cond) const {
	assert((Cond.size() == 3) && "Invalid branch condition!");
	auto CC = static_cast<RISCVCC::CondCode>(Cond[0].getImm());
	Cond[0].setImm(getOppositeBranchCondition(CC));
	return false;
	}

	bool RISCVInstrInfo::optimizeCondBranch(MachineInstr &MI) const {
	MachineBasicBlock *MBB = MI.getParent();
	MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();

	MachineBasicBlock TBB, FBB;
	SmallVector<MachineOperand, 3> Cond;
	if (analyzeBranch(MBB, TBB, FBB, Cond, /AllowModify=*/false))
	return false;

	RISCVCC::CondCode CC = static_cast<RISCVCC::CondCode>(Cond[0].getImm());
	assert(CC != RISCVCC::COND_INVALID);

	if (CC == RISCVCC::COND_EQ \|\| CC == RISCVCC::COND_NE)
	return false;

	// For two constants C0 and C1 from
	// ```
	// li Y, C0
	// li Z, C1
	// ```
	// 1. if C1 = C0 + 1
	// we can turn:
	// (a) blt Y, X -> bge X, Z
	// (b) bge Y, X -> blt X, Z
	//
	// 2. if C1 = C0 - 1
	// we can turn:
	// (a) blt X, Y -> bge Z, X
	// (b) bge X, Y -> blt Z, X
	//
	// To make sure this optimization is really beneficial, we only
	// optimize for cases where Y had only one use (i.e. only used by the branch).

	// Right now we only care about LI (i.e. ADDI x0, imm)
	auto isLoadImm = [](const MachineInstr *MI, int64_t &Imm) -> bool {
	if (MI->getOpcode() == RISCV::ADDI && MI->getOperand(1).isReg() &&
	MI->getOperand(1).getReg() == RISCV::X0) {
	Imm = MI->getOperand(2).getImm();
	return true;
	}
	return false;
	};
	// Either a load from immediate instruction or X0.
	auto isFromLoadImm = [&](const MachineOperand &Op, int64_t &Imm) -> bool {
	if (!Op.isReg())
	return false;
	Register Reg = Op.getReg();
	return Reg.isVirtual() && isLoadImm(MRI.getVRegDef(Reg), Imm);
	};

	MachineOperand &LHS = MI.getOperand(0);
	MachineOperand &RHS = MI.getOperand(1);
	// Try to find the register for constant Z; return
	// invalid register otherwise.
	auto searchConst = [&](int64_t C1) -> Register {
	MachineBasicBlock::reverse_iterator II(&MI), E = MBB->rend();
	auto DefC1 = std::find_if(++II, E, [&](const MachineInstr &I) -> bool {
	int64_t Imm;
	return isLoadImm(&I, Imm) && Imm == C1 &&
	I.getOperand(0).getReg().isVirtual();
	});
	if (DefC1 != E)
	return DefC1->getOperand(0).getReg();

	return Register();
	};

	bool Modify = false;
	int64_t C0;
	if (isFromLoadImm(LHS, C0) && MRI.hasOneUse(LHS.getReg())) {
	// Might be case 1.
	// Signed integer overflow is UB. (UINT64_MAX is bigger so we don't need
	// to worry about unsigned overflow here)
	if (C0 < INT64_MAX)
	if (Register RegZ = searchConst(C0 + 1)) {
	reverseBranchCondition(Cond);
	Cond[1] = MachineOperand::CreateReg(RHS.getReg(), /isDef=/false);
	Cond[2] = MachineOperand::CreateReg(RegZ, /isDef=/false);
	// We might extend the live range of Z, clear its kill flag to
	// account for this.
	MRI.clearKillFlags(RegZ);
	Modify = true;
	}
	} else if (isFromLoadImm(RHS, C0) && MRI.hasOneUse(RHS.getReg())) {
	// Might be case 2.
	// For unsigned cases, we don't want C1 to wrap back to UINT64_MAX
	// when C0 is zero.
	if ((CC == RISCVCC::COND_GE \|\| CC == RISCVCC::COND_LT) \|\| C0)
	if (Register RegZ = searchConst(C0 - 1)) {
	reverseBranchCondition(Cond);
	Cond[1] = MachineOperand::CreateReg(RegZ, /isDef=/false);
	Cond[2] = MachineOperand::CreateReg(LHS.getReg(), /isDef=/false);
	// We might extend the live range of Z, clear its kill flag to
	// account for this.
	MRI.clearKillFlags(RegZ);
	Modify = true;
	}
	}

	if (!Modify)
	return false;

	// Build the new branch and remove the old one.
	BuildMI(*MBB, MI, MI.getDebugLoc(),
	getBrCond(static_cast<RISCVCC::CondCode>(Cond[0].getImm())))
	.add(Cond[1])
	.add(Cond[2])
	.addMBB(TBB);
	MI.eraseFromParent();

	return true;
	}

	MachineBasicBlock *
	RISCVInstrInfo::getBranchDestBlock(const MachineInstr &MI) const {
	assert(MI.getDesc().isBranch() && "Unexpected opcode!");
	// The branch target is always the last operand.
	int NumOp = MI.getNumExplicitOperands();
	return MI.getOperand(NumOp - 1).getMBB();
	}

	bool RISCVInstrInfo::isBranchOffsetInRange(unsigned BranchOp,
	int64_t BrOffset) const {
	unsigned XLen = STI.getXLen();
	// Ideally we could determine the supported branch offset from the
	// RISCVII::FormMask, but this can't be used for Pseudo instructions like
	// PseudoBR.
	switch (BranchOp) {
	default:
	llvm_unreachable("Unexpected opcode!");
	case RISCV::BEQ:
	case RISCV::BNE:
	case RISCV::BLT:
	case RISCV::BGE:
	case RISCV::BLTU:
	case RISCV::BGEU:
	case RISCV::CV_BEQIMM:
	case RISCV::CV_BNEIMM:
	return isIntN(13, BrOffset);
	case RISCV::JAL:
	case RISCV::PseudoBR:
	return isIntN(21, BrOffset);
	case RISCV::PseudoJump:
	return isIntN(32, SignExtend64(BrOffset + 0x800, XLen));
	}
	}

	// If the operation has a predicated pseudo instruction, return the pseudo
	// instruction opcode. Otherwise, return RISCV::INSTRUCTION_LIST_END.
	// TODO: Support more operations.
	unsigned getPredicatedOpcode(unsigned Opcode) {
	switch (Opcode) {
	case RISCV::ADD: return RISCV::PseudoCCADD; break;
	case RISCV::SUB: return RISCV::PseudoCCSUB; break;
	case RISCV::SLL: return RISCV::PseudoCCSLL; break;
	case RISCV::SRL: return RISCV::PseudoCCSRL; break;
	case RISCV::SRA: return RISCV::PseudoCCSRA; break;
	case RISCV::AND: return RISCV::PseudoCCAND; break;
	case RISCV::OR: return RISCV::PseudoCCOR; break;
	case RISCV::XOR: return RISCV::PseudoCCXOR; break;

	case RISCV::ADDI: return RISCV::PseudoCCADDI; break;
	case RISCV::SLLI: return RISCV::PseudoCCSLLI; break;
	case RISCV::SRLI: return RISCV::PseudoCCSRLI; break;
	case RISCV::SRAI: return RISCV::PseudoCCSRAI; break;
	case RISCV::ANDI: return RISCV::PseudoCCANDI; break;
	case RISCV::ORI: return RISCV::PseudoCCORI; break;
	case RISCV::XORI: return RISCV::PseudoCCXORI; break;

	case RISCV::ADDW: return RISCV::PseudoCCADDW; break;
	case RISCV::SUBW: return RISCV::PseudoCCSUBW; break;
	case RISCV::SLLW: return RISCV::PseudoCCSLLW; break;
	case RISCV::SRLW: return RISCV::PseudoCCSRLW; break;
	case RISCV::SRAW: return RISCV::PseudoCCSRAW; break;

	case RISCV::ADDIW: return RISCV::PseudoCCADDIW; break;
	case RISCV::SLLIW: return RISCV::PseudoCCSLLIW; break;
	case RISCV::SRLIW: return RISCV::PseudoCCSRLIW; break;
	case RISCV::SRAIW: return RISCV::PseudoCCSRAIW; break;

	case RISCV::ANDN: return RISCV::PseudoCCANDN; break;
	case RISCV::ORN: return RISCV::PseudoCCORN; break;
	case RISCV::XNOR: return RISCV::PseudoCCXNOR; break;
	}

	return RISCV::INSTRUCTION_LIST_END;
	}

	/// Identify instructions that can be folded into a CCMOV instruction, and
	/// return the defining instruction.
	static MachineInstr *canFoldAsPredicatedOp(Register Reg,
	const MachineRegisterInfo &MRI,
	const TargetInstrInfo *TII) {
	if (!Reg.isVirtual())
	return nullptr;
	if (!MRI.hasOneNonDBGUse(Reg))
	return nullptr;
	MachineInstr *MI = MRI.getVRegDef(Reg);
	if (!MI)
	return nullptr;
	// Check if MI can be predicated and folded into the CCMOV.
	if (getPredicatedOpcode(MI->getOpcode()) == RISCV::INSTRUCTION_LIST_END)
	return nullptr;
	// Don't predicate li idiom.
	if (MI->getOpcode() == RISCV::ADDI && MI->getOperand(1).isReg() &&
	MI->getOperand(1).getReg() == RISCV::X0)
	return nullptr;
	// Check if MI has any other defs or physreg uses.
	for (const MachineOperand &MO : llvm::drop_begin(MI->operands())) {
	// Reject frame index operands, PEI can't handle the predicated pseudos.
	if (MO.isFI() \|\| MO.isCPI() \|\| MO.isJTI())
	return nullptr;
	if (!MO.isReg())
	continue;
	// MI can't have any tied operands, that would conflict with predication.
	if (MO.isTied())
	return nullptr;
	if (MO.isDef())
	return nullptr;
	// Allow constant physregs.
	if (MO.getReg().isPhysical() && !MRI.isConstantPhysReg(MO.getReg()))
	return nullptr;
	}
	bool DontMoveAcrossStores = true;
	if (!MI->isSafeToMove(/* AliasAnalysis = */ nullptr, DontMoveAcrossStores))
	return nullptr;
	return MI;
	}

	bool RISCVInstrInfo::analyzeSelect(const MachineInstr &MI,
	SmallVectorImpl<MachineOperand> &Cond,
	unsigned &TrueOp, unsigned &FalseOp,
	bool &Optimizable) const {
	assert(MI.getOpcode() == RISCV::PseudoCCMOVGPR &&
	"Unknown select instruction");
	// CCMOV operands:
	// 0: Def.
	// 1: LHS of compare.
	// 2: RHS of compare.
	// 3: Condition code.
	// 4: False use.
	// 5: True use.
	TrueOp = 5;
	FalseOp = 4;
	Cond.push_back(MI.getOperand(1));
	Cond.push_back(MI.getOperand(2));
	Cond.push_back(MI.getOperand(3));
	// We can only fold when we support short forward branch opt.
	Optimizable = STI.hasShortForwardBranchOpt();
	return false;
	}

	MachineInstr *
	RISCVInstrInfo::optimizeSelect(MachineInstr &MI,
	SmallPtrSetImpl<MachineInstr *> &SeenMIs,
	bool PreferFalse) const {
	assert(MI.getOpcode() == RISCV::PseudoCCMOVGPR &&
	"Unknown select instruction");
	if (!STI.hasShortForwardBranchOpt())
	return nullptr;

	MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
	MachineInstr *DefMI =
	canFoldAsPredicatedOp(MI.getOperand(5).getReg(), MRI, this);
	bool Invert = !DefMI;
	if (!DefMI)
	DefMI = canFoldAsPredicatedOp(MI.getOperand(4).getReg(), MRI, this);
	if (!DefMI)
	return nullptr;

	// Find new register class to use.
	MachineOperand FalseReg = MI.getOperand(Invert ? 5 : 4);
	Register DestReg = MI.getOperand(0).getReg();
	const TargetRegisterClass *PreviousClass = MRI.getRegClass(FalseReg.getReg());
	if (!MRI.constrainRegClass(DestReg, PreviousClass))
	return nullptr;

	unsigned PredOpc = getPredicatedOpcode(DefMI->getOpcode());
	assert(PredOpc != RISCV::INSTRUCTION_LIST_END && "Unexpected opcode!");

	// Create a new predicated version of DefMI.
	MachineInstrBuilder NewMI =
	BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), get(PredOpc), DestReg);

	// Copy the condition portion.
	NewMI.add(MI.getOperand(1));
	NewMI.add(MI.getOperand(2));

	// Add condition code, inverting if necessary.
	auto CC = static_cast<RISCVCC::CondCode>(MI.getOperand(3).getImm());
	if (Invert)
	CC = RISCVCC::getOppositeBranchCondition(CC);
	NewMI.addImm(CC);

	// Copy the false register.
	NewMI.add(FalseReg);

	// Copy all the DefMI operands.
	const MCInstrDesc &DefDesc = DefMI->getDesc();
	for (unsigned i = 1, e = DefDesc.getNumOperands(); i != e; ++i)
	NewMI.add(DefMI->getOperand(i));

	// Update SeenMIs set: register newly created MI and erase removed DefMI.
	SeenMIs.insert(NewMI);
	SeenMIs.erase(DefMI);

	// If MI is inside a loop, and DefMI is outside the loop, then kill flags on
	// DefMI would be invalid when tranferred inside the loop. Checking for a
	// loop is expensive, but at least remove kill flags if they are in different
	// BBs.
	if (DefMI->getParent() != MI.getParent())
	NewMI->clearKillInfo();

	// The caller will erase MI, but not DefMI.
	DefMI->eraseFromParent();
	return NewMI;
	}

	unsigned RISCVInstrInfo::getInstSizeInBytes(const MachineInstr &MI) const {
	if (MI.isMetaInstruction())
	return 0;

	unsigned Opcode = MI.getOpcode();

	if (Opcode == TargetOpcode::INLINEASM \|\|
	Opcode == TargetOpcode::INLINEASM_BR) {
	const MachineFunction &MF = *MI.getParent()->getParent();
	return getInlineAsmLength(MI.getOperand(0).getSymbolName(),
	*MF.getTarget().getMCAsmInfo());
	}

	if (!MI.memoperands_empty()) {
	MachineMemOperand MMO = (MI.memoperands_begin());
	if (STI.hasStdExtZihintntl() && MMO->isNonTemporal()) {
	if (STI.hasStdExtCOrZca() && STI.enableRVCHintInstrs()) {
	if (isCompressibleInst(MI, STI))
	return 4; // c.ntl.all + c.load/c.store
	return 6; // c.ntl.all + load/store
	}
	return 8; // ntl.all + load/store
	}
	}

	if (Opcode == TargetOpcode::BUNDLE)
	return getInstBundleLength(MI);

	if (MI.getParent() && MI.getParent()->getParent()) {
	if (isCompressibleInst(MI, STI))
	return 2;
	}

	switch (Opcode) {
	case TargetOpcode::STACKMAP:
	// The upper bound for a stackmap intrinsic is the full length of its shadow
	return StackMapOpers(&MI).getNumPatchBytes();
	case TargetOpcode::PATCHPOINT:
	// The size of the patchpoint intrinsic is the number of bytes requested
	return PatchPointOpers(&MI).getNumPatchBytes();
	case TargetOpcode::STATEPOINT: {
	// The size of the statepoint intrinsic is the number of bytes requested
	unsigned NumBytes = StatepointOpers(&MI).getNumPatchBytes();
	// No patch bytes means at most a PseudoCall is emitted
	return std::max(NumBytes, 8U);
	}
	default:
	return get(Opcode).getSize();
	}
	}

	unsigned RISCVInstrInfo::getInstBundleLength(const MachineInstr &MI) const {
	unsigned Size = 0;
	MachineBasicBlock::const_instr_iterator I = MI.getIterator();
	MachineBasicBlock::const_instr_iterator E = MI.getParent()->instr_end();
	while (++I != E && I->isInsideBundle()) {
	assert(!I->isBundle() && "No nested bundle!");
	Size += getInstSizeInBytes(*I);
	}
	return Size;
	}

	bool RISCVInstrInfo::isAsCheapAsAMove(const MachineInstr &MI) const {
	const unsigned Opcode = MI.getOpcode();
	switch (Opcode) {
	default:
	break;
	case RISCV::FSGNJ_D:
	case RISCV::FSGNJ_S:
	case RISCV::FSGNJ_H:
	case RISCV::FSGNJ_D_INX:
	case RISCV::FSGNJ_D_IN32X:
	case RISCV::FSGNJ_S_INX:
	case RISCV::FSGNJ_H_INX:
	// The canonical floating-point move is fsgnj rd, rs, rs.
	return MI.getOperand(1).isReg() && MI.getOperand(2).isReg() &&
	MI.getOperand(1).getReg() == MI.getOperand(2).getReg();
	case RISCV::ADDI:
	case RISCV::ORI:
	case RISCV::XORI:
	return (MI.getOperand(1).isReg() &&
	MI.getOperand(1).getReg() == RISCV::X0) \|\|
	(MI.getOperand(2).isImm() && MI.getOperand(2).getImm() == 0);
	}
	return MI.isAsCheapAsAMove();
	}

	std::optional<DestSourcePair>
	RISCVInstrInfo::isCopyInstrImpl(const MachineInstr &MI) const {
	if (MI.isMoveReg())
	return DestSourcePair{MI.getOperand(0), MI.getOperand(1)};
	switch (MI.getOpcode()) {
	default:
	break;
	case RISCV::ADDI:
	// Operand 1 can be a frameindex but callers expect registers
	if (MI.getOperand(1).isReg() && MI.getOperand(2).isImm() &&
	MI.getOperand(2).getImm() == 0)
	return DestSourcePair{MI.getOperand(0), MI.getOperand(1)};
	break;
	case RISCV::FSGNJ_D:
	case RISCV::FSGNJ_S:
	case RISCV::FSGNJ_H:
	case RISCV::FSGNJ_D_INX:
	case RISCV::FSGNJ_D_IN32X:
	case RISCV::FSGNJ_S_INX:
	case RISCV::FSGNJ_H_INX:
	// The canonical floating-point move is fsgnj rd, rs, rs.
	if (MI.getOperand(1).isReg() && MI.getOperand(2).isReg() &&
	MI.getOperand(1).getReg() == MI.getOperand(2).getReg())
	return DestSourcePair{MI.getOperand(0), MI.getOperand(1)};
	break;
	}
	return std::nullopt;
	}

	MachineTraceStrategy RISCVInstrInfo::getMachineCombinerTraceStrategy() const {
	if (ForceMachineCombinerStrategy.getNumOccurrences() == 0) {
	// The option is unused. Choose Local strategy only for in-order cores. When
	// scheduling model is unspecified, use MinInstrCount strategy as more
	// generic one.
	const auto &SchedModel = STI.getSchedModel();
	return (!SchedModel.hasInstrSchedModel() \|\| SchedModel.isOutOfOrder())
	? MachineTraceStrategy::TS_MinInstrCount
	: MachineTraceStrategy::TS_Local;
	}
	// The strategy was forced by the option.
	return ForceMachineCombinerStrategy;
	}

	void RISCVInstrInfo::finalizeInsInstrs(
	MachineInstr &Root, unsigned &Pattern,
	SmallVectorImpl<MachineInstr *> &InsInstrs) const {
	int16_t FrmOpIdx =
	RISCV::getNamedOperandIdx(Root.getOpcode(), RISCV::OpName::frm);
	if (FrmOpIdx < 0) {
	assert(all_of(InsInstrs,
	[](MachineInstr *MI) {
	return RISCV::getNamedOperandIdx(MI->getOpcode(),
	RISCV::OpName::frm) < 0;
	}) &&
	"New instructions require FRM whereas the old one does not have it");
	return;
	}

	const MachineOperand &FRM = Root.getOperand(FrmOpIdx);
	MachineFunction &MF = *Root.getMF();

	for (auto *NewMI : InsInstrs) {
	// We'd already added the FRM operand.
	if (static_cast<unsigned>(RISCV::getNamedOperandIdx(
	NewMI->getOpcode(), RISCV::OpName::frm)) != NewMI->getNumOperands())
	continue;
	MachineInstrBuilder MIB(MF, NewMI);
	MIB.add(FRM);
	if (FRM.getImm() == RISCVFPRndMode::DYN)
	MIB.addUse(RISCV::FRM, RegState::Implicit);
	}
	}

	static bool isFADD(unsigned Opc) {
	switch (Opc) {
	default:
	return false;
	case RISCV::FADD_H:
	case RISCV::FADD_S:
	case RISCV::FADD_D:
	return true;
	}
	}

	static bool isFSUB(unsigned Opc) {
	switch (Opc) {
	default:
	return false;
	case RISCV::FSUB_H:
	case RISCV::FSUB_S:
	case RISCV::FSUB_D:
	return true;
	}
	}

	static bool isFMUL(unsigned Opc) {
	switch (Opc) {
	default:
	return false;
	case RISCV::FMUL_H:
	case RISCV::FMUL_S:
	case RISCV::FMUL_D:
	return true;
	}
	}

	bool RISCVInstrInfo::isVectorAssociativeAndCommutative(const MachineInstr &Inst,
	bool Invert) const {
	#define OPCODE_LMUL_CASE(OPC) \
	case RISCV::OPC##_M1: \
	case RISCV::OPC##_M2: \
	case RISCV::OPC##_M4: \
	case RISCV::OPC##_M8: \
	case RISCV::OPC##_MF2: \
	case RISCV::OPC##_MF4: \
	case RISCV::OPC##_MF8

	#define OPCODE_LMUL_MASK_CASE(OPC) \
	case RISCV::OPC##_M1_MASK: \
	case RISCV::OPC##_M2_MASK: \
	case RISCV::OPC##_M4_MASK: \
	case RISCV::OPC##_M8_MASK: \
	case RISCV::OPC##_MF2_MASK: \
	case RISCV::OPC##_MF4_MASK: \
	case RISCV::OPC##_MF8_MASK

	unsigned Opcode = Inst.getOpcode();
	if (Invert) {
	if (auto InvOpcode = getInverseOpcode(Opcode))
	Opcode = *InvOpcode;
	else
	return false;
	}

	// clang-format off
	switch (Opcode) {
	default:
	return false;
	OPCODE_LMUL_CASE(PseudoVADD_VV):
	OPCODE_LMUL_MASK_CASE(PseudoVADD_VV):
	OPCODE_LMUL_CASE(PseudoVMUL_VV):
	OPCODE_LMUL_MASK_CASE(PseudoVMUL_VV):
	return true;
	}
	// clang-format on

	#undef OPCODE_LMUL_MASK_CASE
	#undef OPCODE_LMUL_CASE
	}

	bool RISCVInstrInfo::areRVVInstsReassociable(const MachineInstr &Root,
	const MachineInstr &Prev) const {
	if (!areOpcodesEqualOrInverse(Root.getOpcode(), Prev.getOpcode()))
	return false;

	assert(Root.getMF() == Prev.getMF());
	const MachineRegisterInfo *MRI = &Root.getMF()->getRegInfo();
	const TargetRegisterInfo *TRI = MRI->getTargetRegisterInfo();

	// Make sure vtype operands are also the same.
	const MCInstrDesc &Desc = get(Root.getOpcode());
	const uint64_t TSFlags = Desc.TSFlags;

	auto checkImmOperand = [&](unsigned OpIdx) {
	return Root.getOperand(OpIdx).getImm() == Prev.getOperand(OpIdx).getImm();
	};

	auto checkRegOperand = [&](unsigned OpIdx) {
	return Root.getOperand(OpIdx).getReg() == Prev.getOperand(OpIdx).getReg();
	};

	// PassThru
	// TODO: Potentially we can loosen the condition to consider Root to be
	// associable with Prev if Root has NoReg as passthru. In which case we
	// also need to loosen the condition on vector policies between these.
	if (!checkRegOperand(1))
	return false;

	// SEW
	if (RISCVII::hasSEWOp(TSFlags) &&
	!checkImmOperand(RISCVII::getSEWOpNum(Desc)))
	return false;

	// Mask
	if (RISCVII::usesMaskPolicy(TSFlags)) {
	const MachineBasicBlock *MBB = Root.getParent();
	const MachineBasicBlock::const_reverse_iterator It1(&Root);
	const MachineBasicBlock::const_reverse_iterator It2(&Prev);
	Register MI1VReg;

	bool SeenMI2 = false;
	for (auto End = MBB->rend(), It = It1; It != End; ++It) {
	if (It == It2) {
	SeenMI2 = true;
	if (!MI1VReg.isValid())
	// There is no V0 def between Root and Prev; they're sharing the
	// same V0.
	break;
	}

	if (It->modifiesRegister(RISCV::V0, TRI)) {
	Register SrcReg = It->getOperand(1).getReg();
	// If it's not VReg it'll be more difficult to track its defs, so
	// bailing out here just to be safe.
	if (!SrcReg.isVirtual())
	return false;

	if (!MI1VReg.isValid()) {
	// This is the V0 def for Root.
	MI1VReg = SrcReg;
	continue;
	}

	// Some random mask updates.
	if (!SeenMI2)
	continue;

	// This is the V0 def for Prev; check if it's the same as that of
	// Root.
	if (MI1VReg != SrcReg)
	return false;
	else
	break;
	}
	}

	// If we haven't encountered Prev, it's likely that this function was
	// called in a wrong way (e.g. Root is before Prev).
	assert(SeenMI2 && "Prev is expected to appear before Root");
	}

	// Tail / Mask policies
	if (RISCVII::hasVecPolicyOp(TSFlags) &&
	!checkImmOperand(RISCVII::getVecPolicyOpNum(Desc)))
	return false;

	// VL
	if (RISCVII::hasVLOp(TSFlags)) {
	unsigned OpIdx = RISCVII::getVLOpNum(Desc);
	const MachineOperand &Op1 = Root.getOperand(OpIdx);
	const MachineOperand &Op2 = Prev.getOperand(OpIdx);
	if (Op1.getType() != Op2.getType())
	return false;
	switch (Op1.getType()) {
	case MachineOperand::MO_Register:
	if (Op1.getReg() != Op2.getReg())
	return false;
	break;
	case MachineOperand::MO_Immediate:
	if (Op1.getImm() != Op2.getImm())
	return false;
	break;
	default:
	llvm_unreachable("Unrecognized VL operand type");
	}
	}

	// Rounding modes
	if (RISCVII::hasRoundModeOp(TSFlags) &&
	!checkImmOperand(RISCVII::getVLOpNum(Desc) - 1))
	return false;

	return true;
	}

	// Most of our RVV pseudos have passthru operand, so the real operands
	// start from index = 2.
	bool RISCVInstrInfo::hasReassociableVectorSibling(const MachineInstr &Inst,
	bool &Commuted) const {
	const MachineBasicBlock *MBB = Inst.getParent();
	const MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
	assert(RISCVII::isFirstDefTiedToFirstUse(get(Inst.getOpcode())) &&
	"Expect the present of passthrough operand.");
	MachineInstr *MI1 = MRI.getUniqueVRegDef(Inst.getOperand(2).getReg());
	MachineInstr *MI2 = MRI.getUniqueVRegDef(Inst.getOperand(3).getReg());

	// If only one operand has the same or inverse opcode and it's the second
	// source operand, the operands must be commuted.
	Commuted = !areRVVInstsReassociable(Inst, *MI1) &&
	areRVVInstsReassociable(Inst, *MI2);
	if (Commuted)
	std::swap(MI1, MI2);

	return areRVVInstsReassociable(Inst, *MI1) &&
	(isVectorAssociativeAndCommutative(*MI1) \|\|
	isVectorAssociativeAndCommutative(MI1, / Invert */ true)) &&
	hasReassociableOperands(*MI1, MBB) &&
	MRI.hasOneNonDBGUse(MI1->getOperand(0).getReg());
	}

	bool RISCVInstrInfo::hasReassociableOperands(
	const MachineInstr &Inst, const MachineBasicBlock *MBB) const {
	if (!isVectorAssociativeAndCommutative(Inst) &&
	!isVectorAssociativeAndCommutative(Inst, /Invert=/true))
	return TargetInstrInfo::hasReassociableOperands(Inst, MBB);

	const MachineOperand &Op1 = Inst.getOperand(2);
	const MachineOperand &Op2 = Inst.getOperand(3);
	const MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();

	// We need virtual register definitions for the operands that we will
	// reassociate.
	MachineInstr *MI1 = nullptr;
	MachineInstr *MI2 = nullptr;
	if (Op1.isReg() && Op1.getReg().isVirtual())
	MI1 = MRI.getUniqueVRegDef(Op1.getReg());
	if (Op2.isReg() && Op2.getReg().isVirtual())
	MI2 = MRI.getUniqueVRegDef(Op2.getReg());

	// And at least one operand must be defined in MBB.
	return MI1 && MI2 && (MI1->getParent() == MBB \|\| MI2->getParent() == MBB);
	}

	void RISCVInstrInfo::getReassociateOperandIndices(
	const MachineInstr &Root, unsigned Pattern,
	std::array<unsigned, 5> &OperandIndices) const {
	TargetInstrInfo::getReassociateOperandIndices(Root, Pattern, OperandIndices);
	if (RISCV::getRVVMCOpcode(Root.getOpcode())) {
	// Skip the passthrough operand, so increment all indices by one.
	for (unsigned I = 0; I < 5; ++I)
	++OperandIndices[I];
	}
	}

	bool RISCVInstrInfo::hasReassociableSibling(const MachineInstr &Inst,
	bool &Commuted) const {
	if (isVectorAssociativeAndCommutative(Inst) \|\|
	isVectorAssociativeAndCommutative(Inst, /Invert=/true))
	return hasReassociableVectorSibling(Inst, Commuted);

	if (!TargetInstrInfo::hasReassociableSibling(Inst, Commuted))
	return false;

	const MachineRegisterInfo &MRI = Inst.getMF()->getRegInfo();
	unsigned OperandIdx = Commuted ? 2 : 1;
	const MachineInstr &Sibling =
	*MRI.getVRegDef(Inst.getOperand(OperandIdx).getReg());

	int16_t InstFrmOpIdx =
	RISCV::getNamedOperandIdx(Inst.getOpcode(), RISCV::OpName::frm);
	int16_t SiblingFrmOpIdx =
	RISCV::getNamedOperandIdx(Sibling.getOpcode(), RISCV::OpName::frm);

	return (InstFrmOpIdx < 0 && SiblingFrmOpIdx < 0) \|\|
	RISCV::hasEqualFRM(Inst, Sibling);
	}

	bool RISCVInstrInfo::isAssociativeAndCommutative(const MachineInstr &Inst,
	bool Invert) const {
	if (isVectorAssociativeAndCommutative(Inst, Invert))
	return true;

	unsigned Opc = Inst.getOpcode();
	if (Invert) {
	auto InverseOpcode = getInverseOpcode(Opc);
	if (!InverseOpcode)
	return false;
	Opc = *InverseOpcode;
	}

	if (isFADD(Opc) \|\| isFMUL(Opc))
	return Inst.getFlag(MachineInstr::MIFlag::FmReassoc) &&
	Inst.getFlag(MachineInstr::MIFlag::FmNsz);

	switch (Opc) {
	default:
	return false;
	case RISCV::ADD:
	case RISCV::ADDW:
	case RISCV::AND:
	case RISCV::OR:
	case RISCV::XOR:
	// From RISC-V ISA spec, if both the high and low bits of the same product
	// are required, then the recommended code sequence is:
	//
	// MULH[[S]U] rdh, rs1, rs2
	// MUL rdl, rs1, rs2
	// (source register specifiers must be in same order and rdh cannot be the
	// same as rs1 or rs2)
	//
	// Microarchitectures can then fuse these into a single multiply operation
	// instead of performing two separate multiplies.
	// MachineCombiner may reassociate MUL operands and lose the fusion
	// opportunity.
	case RISCV::MUL:
	case RISCV::MULW:
	case RISCV::MIN:
	case RISCV::MINU:
	case RISCV::MAX:
	case RISCV::MAXU:
	case RISCV::FMIN_H:
	case RISCV::FMIN_S:
	case RISCV::FMIN_D:
	case RISCV::FMAX_H:
	case RISCV::FMAX_S:
	case RISCV::FMAX_D:
	return true;
	}

	return false;
	}

	std::optional<unsigned>
	RISCVInstrInfo::getInverseOpcode(unsigned Opcode) const {
	#define RVV_OPC_LMUL_CASE(OPC, INV) \
	case RISCV::OPC##_M1: \
	return RISCV::INV##_M1; \
	case RISCV::OPC##_M2: \
	return RISCV::INV##_M2; \
	case RISCV::OPC##_M4: \
	return RISCV::INV##_M4; \
	case RISCV::OPC##_M8: \
	return RISCV::INV##_M8; \
	case RISCV::OPC##_MF2: \
	return RISCV::INV##_MF2; \
	case RISCV::OPC##_MF4: \
	return RISCV::INV##_MF4; \
	case RISCV::OPC##_MF8: \
	return RISCV::INV##_MF8

	#define RVV_OPC_LMUL_MASK_CASE(OPC, INV) \
	case RISCV::OPC##_M1_MASK: \
	return RISCV::INV##_M1_MASK; \
	case RISCV::OPC##_M2_MASK: \
	return RISCV::INV##_M2_MASK; \
	case RISCV::OPC##_M4_MASK: \
	return RISCV::INV##_M4_MASK; \
	case RISCV::OPC##_M8_MASK: \
	return RISCV::INV##_M8_MASK; \
	case RISCV::OPC##_MF2_MASK: \
	return RISCV::INV##_MF2_MASK; \
	case RISCV::OPC##_MF4_MASK: \
	return RISCV::INV##_MF4_MASK; \
	case RISCV::OPC##_MF8_MASK: \
	return RISCV::INV##_MF8_MASK

	switch (Opcode) {
	default:
	return std::nullopt;
	case RISCV::FADD_H:
	return RISCV::FSUB_H;
	case RISCV::FADD_S:
	return RISCV::FSUB_S;
	case RISCV::FADD_D:
	return RISCV::FSUB_D;
	case RISCV::FSUB_H:
	return RISCV::FADD_H;
	case RISCV::FSUB_S:
	return RISCV::FADD_S;
	case RISCV::FSUB_D:
	return RISCV::FADD_D;
	case RISCV::ADD:
	return RISCV::SUB;
	case RISCV::SUB:
	return RISCV::ADD;
	case RISCV::ADDW:
	return RISCV::SUBW;
	case RISCV::SUBW:
	return RISCV::ADDW;
	// clang-format off
	RVV_OPC_LMUL_CASE(PseudoVADD_VV, PseudoVSUB_VV);
	RVV_OPC_LMUL_MASK_CASE(PseudoVADD_VV, PseudoVSUB_VV);
	RVV_OPC_LMUL_CASE(PseudoVSUB_VV, PseudoVADD_VV);
	RVV_OPC_LMUL_MASK_CASE(PseudoVSUB_VV, PseudoVADD_VV);
	// clang-format on
	}

	#undef RVV_OPC_LMUL_MASK_CASE
	#undef RVV_OPC_LMUL_CASE
	}

	static bool canCombineFPFusedMultiply(const MachineInstr &Root,
	const MachineOperand &MO,
	bool DoRegPressureReduce) {
	if (!MO.isReg() \|\| !MO.getReg().isVirtual())
	return false;
	const MachineRegisterInfo &MRI = Root.getMF()->getRegInfo();
	MachineInstr *MI = MRI.getVRegDef(MO.getReg());
	if (!MI \|\| !isFMUL(MI->getOpcode()))
	return false;

	if (!Root.getFlag(MachineInstr::MIFlag::FmContract) \|\|
	!MI->getFlag(MachineInstr::MIFlag::FmContract))
	return false;

	// Try combining even if fmul has more than one use as it eliminates
	// dependency between fadd(fsub) and fmul. However, it can extend liveranges
	// for fmul operands, so reject the transformation in register pressure
	// reduction mode.
	if (DoRegPressureReduce && !MRI.hasOneNonDBGUse(MI->getOperand(0).getReg()))
	return false;

	// Do not combine instructions from different basic blocks.
	if (Root.getParent() != MI->getParent())
	return false;
	return RISCV::hasEqualFRM(Root, *MI);
	}

	static bool getFPFusedMultiplyPatterns(MachineInstr &Root,
	SmallVectorImpl<unsigned> &Patterns,
	bool DoRegPressureReduce) {
	unsigned Opc = Root.getOpcode();
	bool IsFAdd = isFADD(Opc);
	if (!IsFAdd && !isFSUB(Opc))
	return false;
	bool Added = false;
	if (canCombineFPFusedMultiply(Root, Root.getOperand(1),
	DoRegPressureReduce)) {
	Patterns.push_back(IsFAdd ? RISCVMachineCombinerPattern::FMADD_AX
	: RISCVMachineCombinerPattern::FMSUB);
	Added = true;
	}
	if (canCombineFPFusedMultiply(Root, Root.getOperand(2),
	DoRegPressureReduce)) {
	Patterns.push_back(IsFAdd ? RISCVMachineCombinerPattern::FMADD_XA
	: RISCVMachineCombinerPattern::FNMSUB);
	Added = true;
	}
	return Added;
	}

	static bool getFPPatterns(MachineInstr &Root,
	SmallVectorImpl<unsigned> &Patterns,
	bool DoRegPressureReduce) {
	return getFPFusedMultiplyPatterns(Root, Patterns, DoRegPressureReduce);
	}

	/// Utility routine that checks if \param MO is defined by an
	/// \param CombineOpc instruction in the basic block \param MBB
	static const MachineInstr *canCombine(const MachineBasicBlock &MBB,
	const MachineOperand &MO,
	unsigned CombineOpc) {
	const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
	const MachineInstr *MI = nullptr;

	if (MO.isReg() && MO.getReg().isVirtual())
	MI = MRI.getUniqueVRegDef(MO.getReg());
	// And it needs to be in the trace (otherwise, it won't have a depth).
	if (!MI \|\| MI->getParent() != &MBB \|\| MI->getOpcode() != CombineOpc)
	return nullptr;
	// Must only used by the user we combine with.
	if (!MRI.hasOneNonDBGUse(MI->getOperand(0).getReg()))
	return nullptr;

	return MI;
	}

	/// Utility routine that checks if \param MO is defined by a SLLI in \param
	/// MBB that can be combined by splitting across 2 SHXADD instructions. The
	/// first SHXADD shift amount is given by \param OuterShiftAmt.
	static bool canCombineShiftIntoShXAdd(const MachineBasicBlock &MBB,
	const MachineOperand &MO,
	unsigned OuterShiftAmt) {
	const MachineInstr *ShiftMI = canCombine(MBB, MO, RISCV::SLLI);
	if (!ShiftMI)
	return false;

	unsigned InnerShiftAmt = ShiftMI->getOperand(2).getImm();
	if (InnerShiftAmt < OuterShiftAmt \|\| (InnerShiftAmt - OuterShiftAmt) > 3)
	return false;

	return true;
	}

	// Returns the shift amount from a SHXADD instruction. Returns 0 if the
	// instruction is not a SHXADD.
	static unsigned getSHXADDShiftAmount(unsigned Opc) {
	switch (Opc) {
	default:
	return 0;
	case RISCV::SH1ADD:
	return 1;
	case RISCV::SH2ADD:
	return 2;
	case RISCV::SH3ADD:
	return 3;
	}
	}

	// Look for opportunities to combine (sh3add Z, (add X, (slli Y, 5))) into
	// (sh3add (sh2add Y, Z), X).
	static bool getSHXADDPatterns(const MachineInstr &Root,
	SmallVectorImpl<unsigned> &Patterns) {
	unsigned ShiftAmt = getSHXADDShiftAmount(Root.getOpcode());
	if (!ShiftAmt)
	return false;

	const MachineBasicBlock &MBB = *Root.getParent();

	const MachineInstr *AddMI = canCombine(MBB, Root.getOperand(2), RISCV::ADD);
	if (!AddMI)
	return false;

	bool Found = false;
	if (canCombineShiftIntoShXAdd(MBB, AddMI->getOperand(1), ShiftAmt)) {
	Patterns.push_back(RISCVMachineCombinerPattern::SHXADD_ADD_SLLI_OP1);
	Found = true;
	}
	if (canCombineShiftIntoShXAdd(MBB, AddMI->getOperand(2), ShiftAmt)) {
	Patterns.push_back(RISCVMachineCombinerPattern::SHXADD_ADD_SLLI_OP2);
	Found = true;
	}

	return Found;
	}

	CombinerObjective RISCVInstrInfo::getCombinerObjective(unsigned Pattern) const {
	switch (Pattern) {
	case RISCVMachineCombinerPattern::FMADD_AX:
	case RISCVMachineCombinerPattern::FMADD_XA:
	case RISCVMachineCombinerPattern::FMSUB:
	case RISCVMachineCombinerPattern::FNMSUB:
	return CombinerObjective::MustReduceDepth;
	default:
	return TargetInstrInfo::getCombinerObjective(Pattern);
	}
	}

	bool RISCVInstrInfo::getMachineCombinerPatterns(
	MachineInstr &Root, SmallVectorImpl<unsigned> &Patterns,
	bool DoRegPressureReduce) const {

	if (getFPPatterns(Root, Patterns, DoRegPressureReduce))
	return true;

	if (getSHXADDPatterns(Root, Patterns))
	return true;

	return TargetInstrInfo::getMachineCombinerPatterns(Root, Patterns,
	DoRegPressureReduce);
	}

	static unsigned getFPFusedMultiplyOpcode(unsigned RootOpc, unsigned Pattern) {
	switch (RootOpc) {
	default:
	llvm_unreachable("Unexpected opcode");
	case RISCV::FADD_H:
	return RISCV::FMADD_H;
	case RISCV::FADD_S:
	return RISCV::FMADD_S;
	case RISCV::FADD_D:
	return RISCV::FMADD_D;
	case RISCV::FSUB_H:
	return Pattern == RISCVMachineCombinerPattern::FMSUB ? RISCV::FMSUB_H
	: RISCV::FNMSUB_H;
	case RISCV::FSUB_S:
	return Pattern == RISCVMachineCombinerPattern::FMSUB ? RISCV::FMSUB_S
	: RISCV::FNMSUB_S;
	case RISCV::FSUB_D:
	return Pattern == RISCVMachineCombinerPattern::FMSUB ? RISCV::FMSUB_D
	: RISCV::FNMSUB_D;
	}
	}

	static unsigned getAddendOperandIdx(unsigned Pattern) {
	switch (Pattern) {
	default:
	llvm_unreachable("Unexpected pattern");
	case RISCVMachineCombinerPattern::FMADD_AX:
	case RISCVMachineCombinerPattern::FMSUB:
	return 2;
	case RISCVMachineCombinerPattern::FMADD_XA:
	case RISCVMachineCombinerPattern::FNMSUB:
	return 1;
	}
	}

	static void combineFPFusedMultiply(MachineInstr &Root, MachineInstr &Prev,
	unsigned Pattern,
	SmallVectorImpl<MachineInstr *> &InsInstrs,
	SmallVectorImpl<MachineInstr *> &DelInstrs) {
	MachineFunction *MF = Root.getMF();
	MachineRegisterInfo &MRI = MF->getRegInfo();
	const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();

	MachineOperand &Mul1 = Prev.getOperand(1);
	MachineOperand &Mul2 = Prev.getOperand(2);
	MachineOperand &Dst = Root.getOperand(0);
	MachineOperand &Addend = Root.getOperand(getAddendOperandIdx(Pattern));

	Register DstReg = Dst.getReg();
	unsigned FusedOpc = getFPFusedMultiplyOpcode(Root.getOpcode(), Pattern);
	uint32_t IntersectedFlags = Root.getFlags() & Prev.getFlags();
	DebugLoc MergedLoc =
	DILocation::getMergedLocation(Root.getDebugLoc(), Prev.getDebugLoc());

	bool Mul1IsKill = Mul1.isKill();
	bool Mul2IsKill = Mul2.isKill();
	bool AddendIsKill = Addend.isKill();

	// We need to clear kill flags since we may be extending the live range past
	// a kill. If the mul had kill flags, we can preserve those since we know
	// where the previous range stopped.
	MRI.clearKillFlags(Mul1.getReg());
	MRI.clearKillFlags(Mul2.getReg());

	MachineInstrBuilder MIB =
	BuildMI(*MF, MergedLoc, TII->get(FusedOpc), DstReg)
	.addReg(Mul1.getReg(), getKillRegState(Mul1IsKill))
	.addReg(Mul2.getReg(), getKillRegState(Mul2IsKill))
	.addReg(Addend.getReg(), getKillRegState(AddendIsKill))
	.setMIFlags(IntersectedFlags);

	InsInstrs.push_back(MIB);
	if (MRI.hasOneNonDBGUse(Prev.getOperand(0).getReg()))
	DelInstrs.push_back(&Prev);
	DelInstrs.push_back(&Root);
	}

	// Combine patterns like (sh3add Z, (add X, (slli Y, 5))) to
	// (sh3add (sh2add Y, Z), X) if the shift amount can be split across two
	// shXadd instructions. The outer shXadd keeps its original opcode.
	static void
	genShXAddAddShift(MachineInstr &Root, unsigned AddOpIdx,
	SmallVectorImpl<MachineInstr *> &InsInstrs,
	SmallVectorImpl<MachineInstr *> &DelInstrs,
	DenseMap<unsigned, unsigned> &InstrIdxForVirtReg) {
	MachineFunction *MF = Root.getMF();
	MachineRegisterInfo &MRI = MF->getRegInfo();
	const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();

	unsigned OuterShiftAmt = getSHXADDShiftAmount(Root.getOpcode());
	assert(OuterShiftAmt != 0 && "Unexpected opcode");

	MachineInstr *AddMI = MRI.getUniqueVRegDef(Root.getOperand(2).getReg());
	MachineInstr *ShiftMI =
	MRI.getUniqueVRegDef(AddMI->getOperand(AddOpIdx).getReg());

	unsigned InnerShiftAmt = ShiftMI->getOperand(2).getImm();
	assert(InnerShiftAmt >= OuterShiftAmt && "Unexpected shift amount");

	unsigned InnerOpc;
	switch (InnerShiftAmt - OuterShiftAmt) {
	default:
	llvm_unreachable("Unexpected shift amount");
	case 0:
	InnerOpc = RISCV::ADD;
	break;
	case 1:
	InnerOpc = RISCV::SH1ADD;
	break;
	case 2:
	InnerOpc = RISCV::SH2ADD;
	break;
	case 3:
	InnerOpc = RISCV::SH3ADD;
	break;
	}

	const MachineOperand &X = AddMI->getOperand(3 - AddOpIdx);
	const MachineOperand &Y = ShiftMI->getOperand(1);
	const MachineOperand &Z = Root.getOperand(1);

	Register NewVR = MRI.createVirtualRegister(&RISCV::GPRRegClass);

	auto MIB1 = BuildMI(*MF, MIMetadata(Root), TII->get(InnerOpc), NewVR)
	.addReg(Y.getReg(), getKillRegState(Y.isKill()))
	.addReg(Z.getReg(), getKillRegState(Z.isKill()));
	auto MIB2 = BuildMI(*MF, MIMetadata(Root), TII->get(Root.getOpcode()),
	Root.getOperand(0).getReg())
	.addReg(NewVR, RegState::Kill)
	.addReg(X.getReg(), getKillRegState(X.isKill()));

	InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
	InsInstrs.push_back(MIB1);
	InsInstrs.push_back(MIB2);
	DelInstrs.push_back(ShiftMI);
	DelInstrs.push_back(AddMI);
	DelInstrs.push_back(&Root);
	}

	void RISCVInstrInfo::genAlternativeCodeSequence(
	MachineInstr &Root, unsigned Pattern,
	SmallVectorImpl<MachineInstr *> &InsInstrs,
	SmallVectorImpl<MachineInstr *> &DelInstrs,
	DenseMap<unsigned, unsigned> &InstrIdxForVirtReg) const {
	MachineRegisterInfo &MRI = Root.getMF()->getRegInfo();
	switch (Pattern) {
	default:
	TargetInstrInfo::genAlternativeCodeSequence(Root, Pattern, InsInstrs,
	DelInstrs, InstrIdxForVirtReg);
	return;
	case RISCVMachineCombinerPattern::FMADD_AX:
	case RISCVMachineCombinerPattern::FMSUB: {
	MachineInstr &Prev = *MRI.getVRegDef(Root.getOperand(1).getReg());
	combineFPFusedMultiply(Root, Prev, Pattern, InsInstrs, DelInstrs);
	return;
	}
	case RISCVMachineCombinerPattern::FMADD_XA:
	case RISCVMachineCombinerPattern::FNMSUB: {
	MachineInstr &Prev = *MRI.getVRegDef(Root.getOperand(2).getReg());
	combineFPFusedMultiply(Root, Prev, Pattern, InsInstrs, DelInstrs);
	return;
	}
	case RISCVMachineCombinerPattern::SHXADD_ADD_SLLI_OP1:
	genShXAddAddShift(Root, 1, InsInstrs, DelInstrs, InstrIdxForVirtReg);
	return;
	case RISCVMachineCombinerPattern::SHXADD_ADD_SLLI_OP2:
	genShXAddAddShift(Root, 2, InsInstrs, DelInstrs, InstrIdxForVirtReg);
	return;
	}
	}

	bool RISCVInstrInfo::verifyInstruction(const MachineInstr &MI,
	StringRef &ErrInfo) const {
	MCInstrDesc const &Desc = MI.getDesc();

	for (const auto &[Index, Operand] : enumerate(Desc.operands())) {
	unsigned OpType = Operand.OperandType;
	if (OpType >= RISCVOp::OPERAND_FIRST_RISCV_IMM &&
	OpType <= RISCVOp::OPERAND_LAST_RISCV_IMM) {
	const MachineOperand &MO = MI.getOperand(Index);
	if (MO.isImm()) {
	int64_t Imm = MO.getImm();
	bool Ok;
	switch (OpType) {
	default:
	llvm_unreachable("Unexpected operand type");

	// clang-format off
	#define CASE_OPERAND_UIMM(NUM) \
	case RISCVOp::OPERAND_UIMM##NUM: \
	Ok = isUInt<NUM>(Imm); \
	break;
	CASE_OPERAND_UIMM(1)
	CASE_OPERAND_UIMM(2)
	CASE_OPERAND_UIMM(3)
	CASE_OPERAND_UIMM(4)
	CASE_OPERAND_UIMM(5)
	CASE_OPERAND_UIMM(6)
	CASE_OPERAND_UIMM(7)
	CASE_OPERAND_UIMM(8)
	CASE_OPERAND_UIMM(12)
	CASE_OPERAND_UIMM(20)
	// clang-format on
	case RISCVOp::OPERAND_UIMM2_LSB0:
	Ok = isShiftedUInt<1, 1>(Imm);
	break;
	case RISCVOp::OPERAND_UIMM5_LSB0:
	Ok = isShiftedUInt<4, 1>(Imm);
	break;
	case RISCVOp::OPERAND_UIMM6_LSB0:
	Ok = isShiftedUInt<5, 1>(Imm);
	break;
	case RISCVOp::OPERAND_UIMM7_LSB00:
	Ok = isShiftedUInt<5, 2>(Imm);
	break;
	case RISCVOp::OPERAND_UIMM8_LSB00:
	Ok = isShiftedUInt<6, 2>(Imm);
	break;
	case RISCVOp::OPERAND_UIMM8_LSB000:
	Ok = isShiftedUInt<5, 3>(Imm);
	break;
	case RISCVOp::OPERAND_UIMM8_GE32:
	Ok = isUInt<8>(Imm) && Imm >= 32;
	break;
	case RISCVOp::OPERAND_UIMM9_LSB000:
	Ok = isShiftedUInt<6, 3>(Imm);
	break;
	case RISCVOp::OPERAND_SIMM10_LSB0000_NONZERO:
	Ok = isShiftedInt<6, 4>(Imm) && (Imm != 0);
	break;
	case RISCVOp::OPERAND_UIMM10_LSB00_NONZERO:
	Ok = isShiftedUInt<8, 2>(Imm) && (Imm != 0);
	break;
	case RISCVOp::OPERAND_ZERO:
	Ok = Imm == 0;
	break;
	case RISCVOp::OPERAND_SIMM5:
	Ok = isInt<5>(Imm);
	break;
	case RISCVOp::OPERAND_SIMM5_PLUS1:
	Ok = (isInt<5>(Imm) && Imm != -16) \|\| Imm == 16;
	break;
	case RISCVOp::OPERAND_SIMM6:
	Ok = isInt<6>(Imm);
	break;
	case RISCVOp::OPERAND_SIMM6_NONZERO:
	Ok = Imm != 0 && isInt<6>(Imm);
	break;
	case RISCVOp::OPERAND_VTYPEI10:
	Ok = isUInt<10>(Imm);
	break;
	case RISCVOp::OPERAND_VTYPEI11:
	Ok = isUInt<11>(Imm);
	break;
	case RISCVOp::OPERAND_SIMM12:
	Ok = isInt<12>(Imm);
	break;
	case RISCVOp::OPERAND_SIMM12_LSB00000:
	Ok = isShiftedInt<7, 5>(Imm);
	break;
	case RISCVOp::OPERAND_UIMMLOG2XLEN:
	Ok = STI.is64Bit() ? isUInt<6>(Imm) : isUInt<5>(Imm);
	break;
	case RISCVOp::OPERAND_UIMMLOG2XLEN_NONZERO:
	Ok = STI.is64Bit() ? isUInt<6>(Imm) : isUInt<5>(Imm);
	Ok = Ok && Imm != 0;
	break;
	case RISCVOp::OPERAND_CLUI_IMM:
	Ok = (isUInt<5>(Imm) && Imm != 0) \|\|
	(Imm >= 0xfffe0 && Imm <= 0xfffff);
	break;
	case RISCVOp::OPERAND_RVKRNUM:
	Ok = Imm >= 0 && Imm <= 10;
	break;
	case RISCVOp::OPERAND_RVKRNUM_0_7:
	Ok = Imm >= 0 && Imm <= 7;
	break;
	case RISCVOp::OPERAND_RVKRNUM_1_10:
	Ok = Imm >= 1 && Imm <= 10;
	break;
	case RISCVOp::OPERAND_RVKRNUM_2_14:
	Ok = Imm >= 2 && Imm <= 14;
	break;
	case RISCVOp::OPERAND_SPIMM:
	Ok = (Imm & 0xf) == 0;
	break;
	}
	if (!Ok) {
	ErrInfo = "Invalid immediate";
	return false;
	}
	}
	}
	}

	const uint64_t TSFlags = Desc.TSFlags;
	if (RISCVII::hasVLOp(TSFlags)) {
	const MachineOperand &Op = MI.getOperand(RISCVII::getVLOpNum(Desc));
	if (!Op.isImm() && !Op.isReg()) {
	ErrInfo = "Invalid operand type for VL operand";
	return false;
	}
	if (Op.isReg() && Op.getReg() != RISCV::NoRegister) {
	const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
	auto *RC = MRI.getRegClass(Op.getReg());
	if (!RISCV::GPRRegClass.hasSubClassEq(RC)) {
	ErrInfo = "Invalid register class for VL operand";
	return false;
	}
	}
	if (!RISCVII::hasSEWOp(TSFlags)) {
	ErrInfo = "VL operand w/o SEW operand?";
	return false;
	}
	}
	if (RISCVII::hasSEWOp(TSFlags)) {
	unsigned OpIdx = RISCVII::getSEWOpNum(Desc);
	if (!MI.getOperand(OpIdx).isImm()) {
	ErrInfo = "SEW value expected to be an immediate";
	return false;
	}
	uint64_t Log2SEW = MI.getOperand(OpIdx).getImm();
	if (Log2SEW > 31) {
	ErrInfo = "Unexpected SEW value";
	return false;
	}
	unsigned SEW = Log2SEW ? 1 << Log2SEW : 8;
	if (!RISCVVType::isValidSEW(SEW)) {
	ErrInfo = "Unexpected SEW value";
	return false;
	}
	}
	if (RISCVII::hasVecPolicyOp(TSFlags)) {
	unsigned OpIdx = RISCVII::getVecPolicyOpNum(Desc);
	if (!MI.getOperand(OpIdx).isImm()) {
	ErrInfo = "Policy operand expected to be an immediate";
	return false;
	}
	uint64_t Policy = MI.getOperand(OpIdx).getImm();
	if (Policy > (RISCVII::TAIL_AGNOSTIC \| RISCVII::MASK_AGNOSTIC)) {
	ErrInfo = "Invalid Policy Value";
	return false;
	}
	if (!RISCVII::hasVLOp(TSFlags)) {
	ErrInfo = "policy operand w/o VL operand?";
	return false;
	}

	// VecPolicy operands can only exist on instructions with passthru/merge
	// arguments. Note that not all arguments with passthru have vec policy
	// operands- some instructions have implicit policies.
	unsigned UseOpIdx;
	if (!MI.isRegTiedToUseOperand(0, &UseOpIdx)) {
	ErrInfo = "policy operand w/o tied operand?";
	return false;
	}
	}

	return true;
	}

	bool RISCVInstrInfo::canFoldIntoAddrMode(const MachineInstr &MemI, Register Reg,
	const MachineInstr &AddrI,
	ExtAddrMode &AM) const {
	switch (MemI.getOpcode()) {
	default:
	return false;
	case RISCV::LB:
	case RISCV::LBU:
	case RISCV::LH:
	case RISCV::LHU:
	case RISCV::LW:
	case RISCV::LWU:
	case RISCV::LD:
	case RISCV::FLH:
	case RISCV::FLW:
	case RISCV::FLD:
	case RISCV::SB:
	case RISCV::SH:
	case RISCV::SW:
	case RISCV::SD:
	case RISCV::FSH:
	case RISCV::FSW:
	case RISCV::FSD:
	break;
	}

	if (MemI.getOperand(0).getReg() == Reg)
	return false;

	if (AddrI.getOpcode() != RISCV::ADDI \|\| !AddrI.getOperand(1).isReg() \|\|
	!AddrI.getOperand(2).isImm())
	return false;

	int64_t OldOffset = MemI.getOperand(2).getImm();
	int64_t Disp = AddrI.getOperand(2).getImm();
	int64_t NewOffset = OldOffset + Disp;
	if (!STI.is64Bit())
	NewOffset = SignExtend64<32>(NewOffset);

	if (!isInt<12>(NewOffset))
	return false;

	AM.BaseReg = AddrI.getOperand(1).getReg();
	AM.ScaledReg = 0;
	AM.Scale = 0;
	AM.Displacement = NewOffset;
	AM.Form = ExtAddrMode::Formula::Basic;
	return true;
	}

	MachineInstr *RISCVInstrInfo::emitLdStWithAddr(MachineInstr &MemI,
	const ExtAddrMode &AM) const {

	const DebugLoc &DL = MemI.getDebugLoc();
	MachineBasicBlock &MBB = *MemI.getParent();

	assert(AM.ScaledReg == 0 && AM.Scale == 0 &&
	"Addressing mode not supported for folding");

	return BuildMI(MBB, MemI, DL, get(MemI.getOpcode()))
	.addReg(MemI.getOperand(0).getReg(),
	MemI.mayLoad() ? RegState::Define : 0)
	.addReg(AM.BaseReg)
	.addImm(AM.Displacement)
	.setMemRefs(MemI.memoperands())
	.setMIFlags(MemI.getFlags());
	}

	bool RISCVInstrInfo::getMemOperandsWithOffsetWidth(
	const MachineInstr &LdSt, SmallVectorImpl<const MachineOperand *> &BaseOps,
	int64_t &Offset, bool &OffsetIsScalable, LocationSize &Width,
	const TargetRegisterInfo *TRI) const {
	if (!LdSt.mayLoadOrStore())
	return false;

	// Conservatively, only handle scalar loads/stores for now.
	switch (LdSt.getOpcode()) {
	case RISCV::LB:
	case RISCV::LBU:
	case RISCV::SB:
	case RISCV::LH:
	case RISCV::LHU:
	case RISCV::FLH:
	case RISCV::SH:
	case RISCV::FSH:
	case RISCV::LW:
	case RISCV::LWU:
	case RISCV::FLW:
	case RISCV::SW:
	case RISCV::FSW:
	case RISCV::LD:
	case RISCV::FLD:
	case RISCV::SD:
	case RISCV::FSD:
	break;
	default:
	return false;
	}
	const MachineOperand *BaseOp;
	OffsetIsScalable = false;
	if (!getMemOperandWithOffsetWidth(LdSt, BaseOp, Offset, Width, TRI))
	return false;
	BaseOps.push_back(BaseOp);
	return true;
	}

	// TODO: This was copied from SIInstrInfo. Could it be lifted to a common
	// helper?
	static bool memOpsHaveSameBasePtr(const MachineInstr &MI1,
	ArrayRef<const MachineOperand *> BaseOps1,
	const MachineInstr &MI2,
	ArrayRef<const MachineOperand *> BaseOps2) {
	// Only examine the first "base" operand of each instruction, on the
	// assumption that it represents the real base address of the memory access.
	// Other operands are typically offsets or indices from this base address.
	if (BaseOps1.front()->isIdenticalTo(*BaseOps2.front()))
	return true;

	if (!MI1.hasOneMemOperand() \|\| !MI2.hasOneMemOperand())
	return false;

	auto MO1 = *MI1.memoperands_begin();
	auto MO2 = *MI2.memoperands_begin();
	if (MO1->getAddrSpace() != MO2->getAddrSpace())
	return false;

	auto Base1 = MO1->getValue();
	auto Base2 = MO2->getValue();
	if (!Base1 \|\| !Base2)
	return false;
	Base1 = getUnderlyingObject(Base1);
	Base2 = getUnderlyingObject(Base2);

	if (isa<UndefValue>(Base1) \|\| isa<UndefValue>(Base2))
	return false;

	return Base1 == Base2;
	}

	bool RISCVInstrInfo::shouldClusterMemOps(
	ArrayRef<const MachineOperand *> BaseOps1, int64_t Offset1,
	bool OffsetIsScalable1, ArrayRef<const MachineOperand *> BaseOps2,
	int64_t Offset2, bool OffsetIsScalable2, unsigned ClusterSize,
	unsigned NumBytes) const {
	// If the mem ops (to be clustered) do not have the same base ptr, then they
	// should not be clustered
	if (!BaseOps1.empty() && !BaseOps2.empty()) {
	const MachineInstr &FirstLdSt = *BaseOps1.front()->getParent();
	const MachineInstr &SecondLdSt = *BaseOps2.front()->getParent();
	if (!memOpsHaveSameBasePtr(FirstLdSt, BaseOps1, SecondLdSt, BaseOps2))
	return false;
	} else if (!BaseOps1.empty() \|\| !BaseOps2.empty()) {
	// If only one base op is empty, they do not have the same base ptr
	return false;
	}

	unsigned CacheLineSize =
	BaseOps1.front()->getParent()->getMF()->getSubtarget().getCacheLineSize();
	// Assume a cache line size of 64 bytes if no size is set in RISCVSubtarget.
	CacheLineSize = CacheLineSize ? CacheLineSize : 64;
	// Cluster if the memory operations are on the same or a neighbouring cache
	// line, but limit the maximum ClusterSize to avoid creating too much
	// additional register pressure.
	return ClusterSize <= 4 && std::abs(Offset1 - Offset2) < CacheLineSize;
	}

	// Set BaseReg (the base register operand), Offset (the byte offset being
	// accessed) and the access Width of the passed instruction that reads/writes
	// memory. Returns false if the instruction does not read/write memory or the
	// BaseReg/Offset/Width can't be determined. Is not guaranteed to always
	// recognise base operands and offsets in all cases.
	// TODO: Add an IsScalable bool ref argument (like the equivalent AArch64
	// function) and set it as appropriate.
	bool RISCVInstrInfo::getMemOperandWithOffsetWidth(
	const MachineInstr &LdSt, const MachineOperand *&BaseReg, int64_t &Offset,
	LocationSize &Width, const TargetRegisterInfo *TRI) const {
	if (!LdSt.mayLoadOrStore())
	return false;

	// Here we assume the standard RISC-V ISA, which uses a base+offset
	// addressing mode. You'll need to relax these conditions to support custom
	// load/store instructions.
	if (LdSt.getNumExplicitOperands() != 3)
	return false;
	if ((!LdSt.getOperand(1).isReg() && !LdSt.getOperand(1).isFI()) \|\|
	!LdSt.getOperand(2).isImm())
	return false;

	if (!LdSt.hasOneMemOperand())
	return false;

	Width = (*LdSt.memoperands_begin())->getSize();
	BaseReg = &LdSt.getOperand(1);
	Offset = LdSt.getOperand(2).getImm();
	return true;
	}

	bool RISCVInstrInfo::areMemAccessesTriviallyDisjoint(
	const MachineInstr &MIa, const MachineInstr &MIb) const {
	assert(MIa.mayLoadOrStore() && "MIa must be a load or store.");
	assert(MIb.mayLoadOrStore() && "MIb must be a load or store.");

	if (MIa.hasUnmodeledSideEffects() \|\| MIb.hasUnmodeledSideEffects() \|\|
	MIa.hasOrderedMemoryRef() \|\| MIb.hasOrderedMemoryRef())
	return false;

	// Retrieve the base register, offset from the base register and width. Width
	// is the size of memory that is being loaded/stored (e.g. 1, 2, 4). If
	// base registers are identical, and the offset of a lower memory access +
	// the width doesn't overlap the offset of a higher memory access,
	// then the memory accesses are different.
	const TargetRegisterInfo *TRI = STI.getRegisterInfo();
	const MachineOperand BaseOpA = nullptr, BaseOpB = nullptr;
	int64_t OffsetA = 0, OffsetB = 0;
	LocationSize WidthA = 0, WidthB = 0;
	if (getMemOperandWithOffsetWidth(MIa, BaseOpA, OffsetA, WidthA, TRI) &&
	getMemOperandWithOffsetWidth(MIb, BaseOpB, OffsetB, WidthB, TRI)) {
	if (BaseOpA->isIdenticalTo(*BaseOpB)) {
	int LowOffset = std::min(OffsetA, OffsetB);
	int HighOffset = std::max(OffsetA, OffsetB);
	LocationSize LowWidth = (LowOffset == OffsetA) ? WidthA : WidthB;
	if (LowWidth.hasValue() &&
	LowOffset + (int)LowWidth.getValue() <= HighOffset)
	return true;
	}
	}
	return false;
	}

	std::pair<unsigned, unsigned>
	RISCVInstrInfo::decomposeMachineOperandsTargetFlags(unsigned TF) const {
	const unsigned Mask = RISCVII::MO_DIRECT_FLAG_MASK;
	return std::make_pair(TF & Mask, TF & ~Mask);
	}

	ArrayRef<std::pair<unsigned, const char *>>
	RISCVInstrInfo::getSerializableDirectMachineOperandTargetFlags() const {
	using namespace RISCVII;
	static const std::pair<unsigned, const char *> TargetFlags[] = {
	{MO_CALL, "riscv-call"},
	{MO_LO, "riscv-lo"},
	{MO_HI, "riscv-hi"},
	{MO_PCREL_LO, "riscv-pcrel-lo"},
	{MO_PCREL_HI, "riscv-pcrel-hi"},
	{MO_GOT_HI, "riscv-got-hi"},
	{MO_TPREL_LO, "riscv-tprel-lo"},
	{MO_TPREL_HI, "riscv-tprel-hi"},
	{MO_TPREL_ADD, "riscv-tprel-add"},
	{MO_TLS_GOT_HI, "riscv-tls-got-hi"},
	{MO_TLS_GD_HI, "riscv-tls-gd-hi"},
	{MO_TLSDESC_HI, "riscv-tlsdesc-hi"},
	{MO_TLSDESC_LOAD_LO, "riscv-tlsdesc-load-lo"},
	{MO_TLSDESC_ADD_LO, "riscv-tlsdesc-add-lo"},
	{MO_TLSDESC_CALL, "riscv-tlsdesc-call"}};
	return ArrayRef(TargetFlags);
	}
	bool RISCVInstrInfo::isFunctionSafeToOutlineFrom(
	MachineFunction &MF, bool OutlineFromLinkOnceODRs) const {
	const Function &F = MF.getFunction();

	// Can F be deduplicated by the linker? If it can, don't outline from it.
	if (!OutlineFromLinkOnceODRs && F.hasLinkOnceODRLinkage())
	return false;

	// Don't outline from functions with section markings; the program could
	// expect that all the code is in the named section.
	if (F.hasSection())
	return false;

	// It's safe to outline from MF.
	return true;
	}

	bool RISCVInstrInfo::isMBBSafeToOutlineFrom(MachineBasicBlock &MBB,
	unsigned &Flags) const {
	// More accurate safety checking is done in getOutliningCandidateInfo.
	return TargetInstrInfo::isMBBSafeToOutlineFrom(MBB, Flags);
	}

	// Enum values indicating how an outlined call should be constructed.
	enum MachineOutlinerConstructionID {
	MachineOutlinerDefault
	};

	bool RISCVInstrInfo::shouldOutlineFromFunctionByDefault(
	MachineFunction &MF) const {
	return MF.getFunction().hasMinSize();
	}

	std::optional<outliner::OutlinedFunction>
	RISCVInstrInfo::getOutliningCandidateInfo(
	std::vector<outliner::Candidate> &RepeatedSequenceLocs) const {

	// First we need to filter out candidates where the X5 register (IE t0) can't
	// be used to setup the function call.
	auto CannotInsertCall = [](outliner::Candidate &C) {
	const TargetRegisterInfo *TRI = C.getMF()->getSubtarget().getRegisterInfo();
	return !C.isAvailableAcrossAndOutOfSeq(RISCV::X5, *TRI);
	};

	llvm::erase_if(RepeatedSequenceLocs, CannotInsertCall);

	// If the sequence doesn't have enough candidates left, then we're done.
	if (RepeatedSequenceLocs.size() < 2)
	return std::nullopt;

	unsigned SequenceSize = 0;

	for (auto &MI : RepeatedSequenceLocs[0])
	SequenceSize += getInstSizeInBytes(MI);

	// call t0, function = 8 bytes.
	unsigned CallOverhead = 8;
	for (auto &C : RepeatedSequenceLocs)
	C.setCallInfo(MachineOutlinerDefault, CallOverhead);

	// jr t0 = 4 bytes, 2 bytes if compressed instructions are enabled.
	unsigned FrameOverhead = 4;
	if (RepeatedSequenceLocs[0]
	.getMF()
	->getSubtarget<RISCVSubtarget>()
	.hasStdExtCOrZca())
	FrameOverhead = 2;

	return outliner::OutlinedFunction(RepeatedSequenceLocs, SequenceSize,
	FrameOverhead, MachineOutlinerDefault);
	}

	outliner::InstrType
	RISCVInstrInfo::getOutliningTypeImpl(MachineBasicBlock::iterator &MBBI,
	unsigned Flags) const {
	MachineInstr &MI = *MBBI;
	MachineBasicBlock *MBB = MI.getParent();
	const TargetRegisterInfo *TRI =
	MBB->getParent()->getSubtarget().getRegisterInfo();
	const auto &F = MI.getMF()->getFunction();

	// We can manually strip out CFI instructions later.
	if (MI.isCFIInstruction())
	// If current function has exception handling code, we can't outline &
	// strip these CFI instructions since it may break .eh_frame section
	// needed in unwinding.
	return F.needsUnwindTableEntry() ? outliner::InstrType::Illegal
	: outliner::InstrType::Invisible;

	// We need support for tail calls to outlined functions before return
	// statements can be allowed.
	if (MI.isReturn())
	return outliner::InstrType::Illegal;

	// Don't allow modifying the X5 register which we use for return addresses for
	// these outlined functions.
	if (MI.modifiesRegister(RISCV::X5, TRI) \|\|
	MI.getDesc().hasImplicitDefOfPhysReg(RISCV::X5))
	return outliner::InstrType::Illegal;

	// Make sure the operands don't reference something unsafe.
	for (const auto &MO : MI.operands()) {

	// pcrel-hi and pcrel-lo can't put in separate sections, filter that out
	// if any possible.
	if (MO.getTargetFlags() == RISCVII::MO_PCREL_LO &&
	(MI.getMF()->getTarget().getFunctionSections() \|\| F.hasComdat() \|\|
	- F.hasSection()))
	+ F.hasSection() \|\| F.getSectionPrefix()))
	return outliner::InstrType::Illegal;
	}

	return outliner::InstrType::Legal;
	}

	void RISCVInstrInfo::buildOutlinedFrame(
	MachineBasicBlock &MBB, MachineFunction &MF,
	const outliner::OutlinedFunction &OF) const {

	// Strip out any CFI instructions
	bool Changed = true;
	while (Changed) {
	Changed = false;
	auto I = MBB.begin();
	auto E = MBB.end();
	for (; I != E; ++I) {
	if (I->isCFIInstruction()) {
	I->removeFromParent();
	Changed = true;
	break;
	}
	}
	}

	MBB.addLiveIn(RISCV::X5);

	// Add in a return instruction to the end of the outlined frame.
	MBB.insert(MBB.end(), BuildMI(MF, DebugLoc(), get(RISCV::JALR))
	.addReg(RISCV::X0, RegState::Define)
	.addReg(RISCV::X5)
	.addImm(0));
	}

	MachineBasicBlock::iterator RISCVInstrInfo::insertOutlinedCall(
	Module &M, MachineBasicBlock &MBB, MachineBasicBlock::iterator &It,
	MachineFunction &MF, outliner::Candidate &C) const {

	// Add in a call instruction to the outlined function at the given location.
	It = MBB.insert(It,
	BuildMI(MF, DebugLoc(), get(RISCV::PseudoCALLReg), RISCV::X5)
	.addGlobalAddress(M.getNamedValue(MF.getName()), 0,
	RISCVII::MO_CALL));
	return It;
	}

	std::optional<RegImmPair> RISCVInstrInfo::isAddImmediate(const MachineInstr &MI,
	Register Reg) const {
	// TODO: Handle cases where Reg is a super- or sub-register of the
	// destination register.
	const MachineOperand &Op0 = MI.getOperand(0);
	if (!Op0.isReg() \|\| Reg != Op0.getReg())
	return std::nullopt;

	// Don't consider ADDIW as a candidate because the caller may not be aware
	// of its sign extension behaviour.
	if (MI.getOpcode() == RISCV::ADDI && MI.getOperand(1).isReg() &&
	MI.getOperand(2).isImm())
	return RegImmPair{MI.getOperand(1).getReg(), MI.getOperand(2).getImm()};

	return std::nullopt;
	}

	// MIR printer helper function to annotate Operands with a comment.
	std::string RISCVInstrInfo::createMIROperandComment(
	const MachineInstr &MI, const MachineOperand &Op, unsigned OpIdx,
	const TargetRegisterInfo *TRI) const {
	// Print a generic comment for this operand if there is one.
	std::string GenericComment =
	TargetInstrInfo::createMIROperandComment(MI, Op, OpIdx, TRI);
	if (!GenericComment.empty())
	return GenericComment;

	// If not, we must have an immediate operand.
	if (!Op.isImm())
	return std::string();

	std::string Comment;
	raw_string_ostream OS(Comment);

	uint64_t TSFlags = MI.getDesc().TSFlags;

	// Print the full VType operand of vsetvli/vsetivli instructions, and the SEW
	// operand of vector codegen pseudos.
	if ((MI.getOpcode() == RISCV::VSETVLI \|\| MI.getOpcode() == RISCV::VSETIVLI \|\|
	MI.getOpcode() == RISCV::PseudoVSETVLI \|\|
	MI.getOpcode() == RISCV::PseudoVSETIVLI \|\|
	MI.getOpcode() == RISCV::PseudoVSETVLIX0) &&
	OpIdx == 2) {
	unsigned Imm = MI.getOperand(OpIdx).getImm();
	RISCVVType::printVType(Imm, OS);
	} else if (RISCVII::hasSEWOp(TSFlags) &&
	OpIdx == RISCVII::getSEWOpNum(MI.getDesc())) {
	unsigned Log2SEW = MI.getOperand(OpIdx).getImm();
	unsigned SEW = Log2SEW ? 1 << Log2SEW : 8;
	assert(RISCVVType::isValidSEW(SEW) && "Unexpected SEW");
	OS << "e" << SEW;
	} else if (RISCVII::hasVecPolicyOp(TSFlags) &&
	OpIdx == RISCVII::getVecPolicyOpNum(MI.getDesc())) {
	unsigned Policy = MI.getOperand(OpIdx).getImm();
	assert(Policy <= (RISCVII::TAIL_AGNOSTIC \| RISCVII::MASK_AGNOSTIC) &&
	"Invalid Policy Value");
	OS << (Policy & RISCVII::TAIL_AGNOSTIC ? "ta" : "tu") << ", "
	<< (Policy & RISCVII::MASK_AGNOSTIC ? "ma" : "mu");
	}

	OS.flush();
	return Comment;
	}

	// clang-format off
	#define CASE_RVV_OPCODE_UNMASK_LMUL(OP, LMUL) \
	RISCV::Pseudo##OP##_##LMUL

	#define CASE_RVV_OPCODE_MASK_LMUL(OP, LMUL) \
	RISCV::Pseudo##OP##_##LMUL##_MASK

	#define CASE_RVV_OPCODE_LMUL(OP, LMUL) \
	CASE_RVV_OPCODE_UNMASK_LMUL(OP, LMUL): \
	case CASE_RVV_OPCODE_MASK_LMUL(OP, LMUL)

	#define CASE_RVV_OPCODE_UNMASK_WIDEN(OP) \
	CASE_RVV_OPCODE_UNMASK_LMUL(OP, MF8): \
	case CASE_RVV_OPCODE_UNMASK_LMUL(OP, MF4): \
	case CASE_RVV_OPCODE_UNMASK_LMUL(OP, MF2): \
	case CASE_RVV_OPCODE_UNMASK_LMUL(OP, M1): \
	case CASE_RVV_OPCODE_UNMASK_LMUL(OP, M2): \
	case CASE_RVV_OPCODE_UNMASK_LMUL(OP, M4)

	#define CASE_RVV_OPCODE_UNMASK(OP) \
	CASE_RVV_OPCODE_UNMASK_WIDEN(OP): \
	case CASE_RVV_OPCODE_UNMASK_LMUL(OP, M8)

	#define CASE_RVV_OPCODE_MASK_WIDEN(OP) \
	CASE_RVV_OPCODE_MASK_LMUL(OP, MF8): \
	case CASE_RVV_OPCODE_MASK_LMUL(OP, MF4): \
	case CASE_RVV_OPCODE_MASK_LMUL(OP, MF2): \
	case CASE_RVV_OPCODE_MASK_LMUL(OP, M1): \
	case CASE_RVV_OPCODE_MASK_LMUL(OP, M2): \
	case CASE_RVV_OPCODE_MASK_LMUL(OP, M4)

	#define CASE_RVV_OPCODE_MASK(OP) \
	CASE_RVV_OPCODE_MASK_WIDEN(OP): \
	case CASE_RVV_OPCODE_MASK_LMUL(OP, M8)

	#define CASE_RVV_OPCODE_WIDEN(OP) \
	CASE_RVV_OPCODE_UNMASK_WIDEN(OP): \
	case CASE_RVV_OPCODE_MASK_WIDEN(OP)

	#define CASE_RVV_OPCODE(OP) \
	CASE_RVV_OPCODE_UNMASK(OP): \
	case CASE_RVV_OPCODE_MASK(OP)
	// clang-format on

	// clang-format off
	#define CASE_VMA_OPCODE_COMMON(OP, TYPE, LMUL) \
	RISCV::PseudoV##OP##_##TYPE##_##LMUL

	#define CASE_VMA_OPCODE_LMULS_M1(OP, TYPE) \
	CASE_VMA_OPCODE_COMMON(OP, TYPE, M1): \
	case CASE_VMA_OPCODE_COMMON(OP, TYPE, M2): \
	case CASE_VMA_OPCODE_COMMON(OP, TYPE, M4): \
	case CASE_VMA_OPCODE_COMMON(OP, TYPE, M8)

	#define CASE_VMA_OPCODE_LMULS_MF2(OP, TYPE) \
	CASE_VMA_OPCODE_COMMON(OP, TYPE, MF2): \
	case CASE_VMA_OPCODE_LMULS_M1(OP, TYPE)

	#define CASE_VMA_OPCODE_LMULS_MF4(OP, TYPE) \
	CASE_VMA_OPCODE_COMMON(OP, TYPE, MF4): \
	case CASE_VMA_OPCODE_LMULS_MF2(OP, TYPE)

	#define CASE_VMA_OPCODE_LMULS(OP, TYPE) \
	CASE_VMA_OPCODE_COMMON(OP, TYPE, MF8): \
	case CASE_VMA_OPCODE_LMULS_MF4(OP, TYPE)

	// VFMA instructions are SEW specific.
	#define CASE_VFMA_OPCODE_COMMON(OP, TYPE, LMUL, SEW) \
	RISCV::PseudoV##OP##_##TYPE##_##LMUL##_##SEW

	#define CASE_VFMA_OPCODE_LMULS_M1(OP, TYPE, SEW) \
	CASE_VFMA_OPCODE_COMMON(OP, TYPE, M1, SEW): \
	case CASE_VFMA_OPCODE_COMMON(OP, TYPE, M2, SEW): \
	case CASE_VFMA_OPCODE_COMMON(OP, TYPE, M4, SEW): \
	case CASE_VFMA_OPCODE_COMMON(OP, TYPE, M8, SEW)

	#define CASE_VFMA_OPCODE_LMULS_MF2(OP, TYPE, SEW) \
	CASE_VFMA_OPCODE_COMMON(OP, TYPE, MF2, SEW): \
	case CASE_VFMA_OPCODE_LMULS_M1(OP, TYPE, SEW)

	#define CASE_VFMA_OPCODE_LMULS_MF4(OP, TYPE, SEW) \
	CASE_VFMA_OPCODE_COMMON(OP, TYPE, MF4, SEW): \
	case CASE_VFMA_OPCODE_LMULS_MF2(OP, TYPE, SEW)

	#define CASE_VFMA_OPCODE_VV(OP) \
	CASE_VFMA_OPCODE_LMULS_MF4(OP, VV, E16): \
	case CASE_VFMA_OPCODE_LMULS_MF2(OP, VV, E32): \
	case CASE_VFMA_OPCODE_LMULS_M1(OP, VV, E64)

	#define CASE_VFMA_SPLATS(OP) \
	CASE_VFMA_OPCODE_LMULS_MF4(OP, VFPR16, E16): \
	case CASE_VFMA_OPCODE_LMULS_MF2(OP, VFPR32, E32): \
	case CASE_VFMA_OPCODE_LMULS_M1(OP, VFPR64, E64)
	// clang-format on

	bool RISCVInstrInfo::findCommutedOpIndices(const MachineInstr &MI,
	unsigned &SrcOpIdx1,
	unsigned &SrcOpIdx2) const {
	const MCInstrDesc &Desc = MI.getDesc();
	if (!Desc.isCommutable())
	return false;

	switch (MI.getOpcode()) {
	case RISCV::TH_MVEQZ:
	case RISCV::TH_MVNEZ:
	// We can't commute operands if operand 2 (i.e., rs1 in
	// mveqz/mvnez rd,rs1,rs2) is the zero-register (as it is
	// not valid as the in/out-operand 1).
	if (MI.getOperand(2).getReg() == RISCV::X0)
	return false;
	// Operands 1 and 2 are commutable, if we switch the opcode.
	return fixCommutedOpIndices(SrcOpIdx1, SrcOpIdx2, 1, 2);
	case RISCV::TH_MULA:
	case RISCV::TH_MULAW:
	case RISCV::TH_MULAH:
	case RISCV::TH_MULS:
	case RISCV::TH_MULSW:
	case RISCV::TH_MULSH:
	// Operands 2 and 3 are commutable.
	return fixCommutedOpIndices(SrcOpIdx1, SrcOpIdx2, 2, 3);
	case RISCV::PseudoCCMOVGPRNoX0:
	case RISCV::PseudoCCMOVGPR:
	// Operands 4 and 5 are commutable.
	return fixCommutedOpIndices(SrcOpIdx1, SrcOpIdx2, 4, 5);
	case CASE_RVV_OPCODE(VADD_VV):
	case CASE_RVV_OPCODE(VAND_VV):
	case CASE_RVV_OPCODE(VOR_VV):
	case CASE_RVV_OPCODE(VXOR_VV):
	case CASE_RVV_OPCODE_MASK(VMSEQ_VV):
	case CASE_RVV_OPCODE_MASK(VMSNE_VV):
	case CASE_RVV_OPCODE(VMIN_VV):
	case CASE_RVV_OPCODE(VMINU_VV):
	case CASE_RVV_OPCODE(VMAX_VV):
	case CASE_RVV_OPCODE(VMAXU_VV):
	case CASE_RVV_OPCODE(VMUL_VV):
	case CASE_RVV_OPCODE(VMULH_VV):
	case CASE_RVV_OPCODE(VMULHU_VV):
	case CASE_RVV_OPCODE_WIDEN(VWADD_VV):
	case CASE_RVV_OPCODE_WIDEN(VWADDU_VV):
	case CASE_RVV_OPCODE_WIDEN(VWMUL_VV):
	case CASE_RVV_OPCODE_WIDEN(VWMULU_VV):
	case CASE_RVV_OPCODE_WIDEN(VWMACC_VV):
	case CASE_RVV_OPCODE_WIDEN(VWMACCU_VV):
	case CASE_RVV_OPCODE_UNMASK(VADC_VVM):
	case CASE_RVV_OPCODE(VSADD_VV):
	case CASE_RVV_OPCODE(VSADDU_VV):
	case CASE_RVV_OPCODE(VAADD_VV):
	case CASE_RVV_OPCODE(VAADDU_VV):
	case CASE_RVV_OPCODE(VSMUL_VV):
	// Operands 2 and 3 are commutable.
	return fixCommutedOpIndices(SrcOpIdx1, SrcOpIdx2, 2, 3);
	case CASE_VFMA_SPLATS(FMADD):
	case CASE_VFMA_SPLATS(FMSUB):
	case CASE_VFMA_SPLATS(FMACC):
	case CASE_VFMA_SPLATS(FMSAC):
	case CASE_VFMA_SPLATS(FNMADD):
	case CASE_VFMA_SPLATS(FNMSUB):
	case CASE_VFMA_SPLATS(FNMACC):
	case CASE_VFMA_SPLATS(FNMSAC):
	case CASE_VFMA_OPCODE_VV(FMACC):
	case CASE_VFMA_OPCODE_VV(FMSAC):
	case CASE_VFMA_OPCODE_VV(FNMACC):
	case CASE_VFMA_OPCODE_VV(FNMSAC):
	case CASE_VMA_OPCODE_LMULS(MADD, VX):
	case CASE_VMA_OPCODE_LMULS(NMSUB, VX):
	case CASE_VMA_OPCODE_LMULS(MACC, VX):
	case CASE_VMA_OPCODE_LMULS(NMSAC, VX):
	case CASE_VMA_OPCODE_LMULS(MACC, VV):
	case CASE_VMA_OPCODE_LMULS(NMSAC, VV): {
	// If the tail policy is undisturbed we can't commute.
	assert(RISCVII::hasVecPolicyOp(MI.getDesc().TSFlags));
	if ((MI.getOperand(MI.getNumExplicitOperands() - 1).getImm() & 1) == 0)
	return false;

	// For these instructions we can only swap operand 1 and operand 3 by
	// changing the opcode.
	unsigned CommutableOpIdx1 = 1;
	unsigned CommutableOpIdx2 = 3;
	if (!fixCommutedOpIndices(SrcOpIdx1, SrcOpIdx2, CommutableOpIdx1,
	CommutableOpIdx2))
	return false;
	return true;
	}
	case CASE_VFMA_OPCODE_VV(FMADD):
	case CASE_VFMA_OPCODE_VV(FMSUB):
	case CASE_VFMA_OPCODE_VV(FNMADD):
	case CASE_VFMA_OPCODE_VV(FNMSUB):
	case CASE_VMA_OPCODE_LMULS(MADD, VV):
	case CASE_VMA_OPCODE_LMULS(NMSUB, VV): {
	// If the tail policy is undisturbed we can't commute.
	assert(RISCVII::hasVecPolicyOp(MI.getDesc().TSFlags));
	if ((MI.getOperand(MI.getNumExplicitOperands() - 1).getImm() & 1) == 0)
	return false;

	// For these instructions we have more freedom. We can commute with the
	// other multiplicand or with the addend/subtrahend/minuend.

	// Any fixed operand must be from source 1, 2 or 3.
	if (SrcOpIdx1 != CommuteAnyOperandIndex && SrcOpIdx1 > 3)
	return false;
	if (SrcOpIdx2 != CommuteAnyOperandIndex && SrcOpIdx2 > 3)
	return false;

	// It both ops are fixed one must be the tied source.
	if (SrcOpIdx1 != CommuteAnyOperandIndex &&
	SrcOpIdx2 != CommuteAnyOperandIndex && SrcOpIdx1 != 1 && SrcOpIdx2 != 1)
	return false;

	// Look for two different register operands assumed to be commutable
	// regardless of the FMA opcode. The FMA opcode is adjusted later if
	// needed.
	if (SrcOpIdx1 == CommuteAnyOperandIndex \|\|
	SrcOpIdx2 == CommuteAnyOperandIndex) {
	// At least one of operands to be commuted is not specified and
	// this method is free to choose appropriate commutable operands.
	unsigned CommutableOpIdx1 = SrcOpIdx1;
	if (SrcOpIdx1 == SrcOpIdx2) {
	// Both of operands are not fixed. Set one of commutable
	// operands to the tied source.
	CommutableOpIdx1 = 1;
	} else if (SrcOpIdx1 == CommuteAnyOperandIndex) {
	// Only one of the operands is not fixed.
	CommutableOpIdx1 = SrcOpIdx2;
	}

	// CommutableOpIdx1 is well defined now. Let's choose another commutable
	// operand and assign its index to CommutableOpIdx2.
	unsigned CommutableOpIdx2;
	if (CommutableOpIdx1 != 1) {
	// If we haven't already used the tied source, we must use it now.
	CommutableOpIdx2 = 1;
	} else {
	Register Op1Reg = MI.getOperand(CommutableOpIdx1).getReg();

	// The commuted operands should have different registers.
	// Otherwise, the commute transformation does not change anything and
	// is useless. We use this as a hint to make our decision.
	if (Op1Reg != MI.getOperand(2).getReg())
	CommutableOpIdx2 = 2;
	else
	CommutableOpIdx2 = 3;
	}

	// Assign the found pair of commutable indices to SrcOpIdx1 and
	// SrcOpIdx2 to return those values.
	if (!fixCommutedOpIndices(SrcOpIdx1, SrcOpIdx2, CommutableOpIdx1,
	CommutableOpIdx2))
	return false;
	}

	return true;
	}
	}

	return TargetInstrInfo::findCommutedOpIndices(MI, SrcOpIdx1, SrcOpIdx2);
	}

	// clang-format off
	#define CASE_VMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, LMUL) \
	case RISCV::PseudoV##OLDOP##_##TYPE##_##LMUL: \
	Opc = RISCV::PseudoV##NEWOP##_##TYPE##_##LMUL; \
	break;

	#define CASE_VMA_CHANGE_OPCODE_LMULS_M1(OLDOP, NEWOP, TYPE) \
	CASE_VMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, M1) \
	CASE_VMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, M2) \
	CASE_VMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, M4) \
	CASE_VMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, M8)

	#define CASE_VMA_CHANGE_OPCODE_LMULS_MF2(OLDOP, NEWOP, TYPE) \
	CASE_VMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, MF2) \
	CASE_VMA_CHANGE_OPCODE_LMULS_M1(OLDOP, NEWOP, TYPE)

	#define CASE_VMA_CHANGE_OPCODE_LMULS_MF4(OLDOP, NEWOP, TYPE) \
	CASE_VMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, MF4) \
	CASE_VMA_CHANGE_OPCODE_LMULS_MF2(OLDOP, NEWOP, TYPE)

	#define CASE_VMA_CHANGE_OPCODE_LMULS(OLDOP, NEWOP, TYPE) \
	CASE_VMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, MF8) \
	CASE_VMA_CHANGE_OPCODE_LMULS_MF4(OLDOP, NEWOP, TYPE)

	#define CASE_VMA_CHANGE_OPCODE_SPLATS(OLDOP, NEWOP) \
	CASE_VMA_CHANGE_OPCODE_LMULS_MF4(OLDOP, NEWOP, VFPR16) \
	CASE_VMA_CHANGE_OPCODE_LMULS_MF2(OLDOP, NEWOP, VFPR32) \
	CASE_VMA_CHANGE_OPCODE_LMULS_M1(OLDOP, NEWOP, VFPR64)

	// VFMA depends on SEW.
	#define CASE_VFMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, LMUL, SEW) \
	case RISCV::PseudoV##OLDOP##_##TYPE##_##LMUL##_##SEW: \
	Opc = RISCV::PseudoV##NEWOP##_##TYPE##_##LMUL##_##SEW; \
	break;

	#define CASE_VFMA_CHANGE_OPCODE_LMULS_M1(OLDOP, NEWOP, TYPE, SEW) \
	CASE_VFMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, M1, SEW) \
	CASE_VFMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, M2, SEW) \
	CASE_VFMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, M4, SEW) \
	CASE_VFMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, M8, SEW)

	#define CASE_VFMA_CHANGE_OPCODE_LMULS_MF2(OLDOP, NEWOP, TYPE, SEW) \
	CASE_VFMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, MF2, SEW) \
	CASE_VFMA_CHANGE_OPCODE_LMULS_M1(OLDOP, NEWOP, TYPE, SEW)

	#define CASE_VFMA_CHANGE_OPCODE_VV(OLDOP, NEWOP) \
	CASE_VFMA_CHANGE_OPCODE_LMULS_MF4(OLDOP, NEWOP, VV, E16) \
	CASE_VFMA_CHANGE_OPCODE_LMULS_MF2(OLDOP, NEWOP, VV, E32) \
	CASE_VFMA_CHANGE_OPCODE_LMULS_M1(OLDOP, NEWOP, VV, E64)

	#define CASE_VFMA_CHANGE_OPCODE_LMULS_MF4(OLDOP, NEWOP, TYPE, SEW) \
	CASE_VFMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, MF4, SEW) \
	CASE_VFMA_CHANGE_OPCODE_LMULS_MF2(OLDOP, NEWOP, TYPE, SEW)

	#define CASE_VFMA_CHANGE_OPCODE_LMULS(OLDOP, NEWOP, TYPE, SEW) \
	CASE_VFMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, MF8, SEW) \
	CASE_VFMA_CHANGE_OPCODE_LMULS_MF4(OLDOP, NEWOP, TYPE, SEW)

	#define CASE_VFMA_CHANGE_OPCODE_SPLATS(OLDOP, NEWOP) \
	CASE_VFMA_CHANGE_OPCODE_LMULS_MF4(OLDOP, NEWOP, VFPR16, E16) \
	CASE_VFMA_CHANGE_OPCODE_LMULS_MF2(OLDOP, NEWOP, VFPR32, E32) \
	CASE_VFMA_CHANGE_OPCODE_LMULS_M1(OLDOP, NEWOP, VFPR64, E64)

	MachineInstr *RISCVInstrInfo::commuteInstructionImpl(MachineInstr &MI,
	bool NewMI,
	unsigned OpIdx1,
	unsigned OpIdx2) const {
	auto cloneIfNew = [NewMI](MachineInstr &MI) -> MachineInstr & {
	if (NewMI)
	return *MI.getParent()->getParent()->CloneMachineInstr(&MI);
	return MI;
	};

	switch (MI.getOpcode()) {
	case RISCV::TH_MVEQZ:
	case RISCV::TH_MVNEZ: {
	auto &WorkingMI = cloneIfNew(MI);
	WorkingMI.setDesc(get(MI.getOpcode() == RISCV::TH_MVEQZ ? RISCV::TH_MVNEZ
	: RISCV::TH_MVEQZ));
	return TargetInstrInfo::commuteInstructionImpl(WorkingMI, false, OpIdx1,
	OpIdx2);
	}
	case RISCV::PseudoCCMOVGPRNoX0:
	case RISCV::PseudoCCMOVGPR: {
	// CCMOV can be commuted by inverting the condition.
	auto CC = static_cast<RISCVCC::CondCode>(MI.getOperand(3).getImm());
	CC = RISCVCC::getOppositeBranchCondition(CC);
	auto &WorkingMI = cloneIfNew(MI);
	WorkingMI.getOperand(3).setImm(CC);
	return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /NewMI/ false,
	OpIdx1, OpIdx2);
	}
	case CASE_VFMA_SPLATS(FMACC):
	case CASE_VFMA_SPLATS(FMADD):
	case CASE_VFMA_SPLATS(FMSAC):
	case CASE_VFMA_SPLATS(FMSUB):
	case CASE_VFMA_SPLATS(FNMACC):
	case CASE_VFMA_SPLATS(FNMADD):
	case CASE_VFMA_SPLATS(FNMSAC):
	case CASE_VFMA_SPLATS(FNMSUB):
	case CASE_VFMA_OPCODE_VV(FMACC):
	case CASE_VFMA_OPCODE_VV(FMSAC):
	case CASE_VFMA_OPCODE_VV(FNMACC):
	case CASE_VFMA_OPCODE_VV(FNMSAC):
	case CASE_VMA_OPCODE_LMULS(MADD, VX):
	case CASE_VMA_OPCODE_LMULS(NMSUB, VX):
	case CASE_VMA_OPCODE_LMULS(MACC, VX):
	case CASE_VMA_OPCODE_LMULS(NMSAC, VX):
	case CASE_VMA_OPCODE_LMULS(MACC, VV):
	case CASE_VMA_OPCODE_LMULS(NMSAC, VV): {
	// It only make sense to toggle these between clobbering the
	// addend/subtrahend/minuend one of the multiplicands.
	assert((OpIdx1 == 1 \|\| OpIdx2 == 1) && "Unexpected opcode index");
	assert((OpIdx1 == 3 \|\| OpIdx2 == 3) && "Unexpected opcode index");
	unsigned Opc;
	switch (MI.getOpcode()) {
	default:
	llvm_unreachable("Unexpected opcode");
	CASE_VFMA_CHANGE_OPCODE_SPLATS(FMACC, FMADD)
	CASE_VFMA_CHANGE_OPCODE_SPLATS(FMADD, FMACC)
	CASE_VFMA_CHANGE_OPCODE_SPLATS(FMSAC, FMSUB)
	CASE_VFMA_CHANGE_OPCODE_SPLATS(FMSUB, FMSAC)
	CASE_VFMA_CHANGE_OPCODE_SPLATS(FNMACC, FNMADD)
	CASE_VFMA_CHANGE_OPCODE_SPLATS(FNMADD, FNMACC)
	CASE_VFMA_CHANGE_OPCODE_SPLATS(FNMSAC, FNMSUB)
	CASE_VFMA_CHANGE_OPCODE_SPLATS(FNMSUB, FNMSAC)
	CASE_VFMA_CHANGE_OPCODE_VV(FMACC, FMADD)
	CASE_VFMA_CHANGE_OPCODE_VV(FMSAC, FMSUB)
	CASE_VFMA_CHANGE_OPCODE_VV(FNMACC, FNMADD)
	CASE_VFMA_CHANGE_OPCODE_VV(FNMSAC, FNMSUB)
	CASE_VMA_CHANGE_OPCODE_LMULS(MACC, MADD, VX)
	CASE_VMA_CHANGE_OPCODE_LMULS(MADD, MACC, VX)
	CASE_VMA_CHANGE_OPCODE_LMULS(NMSAC, NMSUB, VX)
	CASE_VMA_CHANGE_OPCODE_LMULS(NMSUB, NMSAC, VX)
	CASE_VMA_CHANGE_OPCODE_LMULS(MACC, MADD, VV)
	CASE_VMA_CHANGE_OPCODE_LMULS(NMSAC, NMSUB, VV)
	}

	auto &WorkingMI = cloneIfNew(MI);
	WorkingMI.setDesc(get(Opc));
	return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /NewMI=/false,
	OpIdx1, OpIdx2);
	}
	case CASE_VFMA_OPCODE_VV(FMADD):
	case CASE_VFMA_OPCODE_VV(FMSUB):
	case CASE_VFMA_OPCODE_VV(FNMADD):
	case CASE_VFMA_OPCODE_VV(FNMSUB):
	case CASE_VMA_OPCODE_LMULS(MADD, VV):
	case CASE_VMA_OPCODE_LMULS(NMSUB, VV): {
	assert((OpIdx1 == 1 \|\| OpIdx2 == 1) && "Unexpected opcode index");
	// If one of the operands, is the addend we need to change opcode.
	// Otherwise we're just swapping 2 of the multiplicands.
	if (OpIdx1 == 3 \|\| OpIdx2 == 3) {
	unsigned Opc;
	switch (MI.getOpcode()) {
	default:
	llvm_unreachable("Unexpected opcode");
	CASE_VFMA_CHANGE_OPCODE_VV(FMADD, FMACC)
	CASE_VFMA_CHANGE_OPCODE_VV(FMSUB, FMSAC)
	CASE_VFMA_CHANGE_OPCODE_VV(FNMADD, FNMACC)
	CASE_VFMA_CHANGE_OPCODE_VV(FNMSUB, FNMSAC)
	CASE_VMA_CHANGE_OPCODE_LMULS(MADD, MACC, VV)
	CASE_VMA_CHANGE_OPCODE_LMULS(NMSUB, NMSAC, VV)
	}

	auto &WorkingMI = cloneIfNew(MI);
	WorkingMI.setDesc(get(Opc));
	return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /NewMI=/false,
	OpIdx1, OpIdx2);
	}
	// Let the default code handle it.
	break;
	}
	}

	return TargetInstrInfo::commuteInstructionImpl(MI, NewMI, OpIdx1, OpIdx2);
	}

	#undef CASE_RVV_OPCODE_UNMASK_LMUL
	#undef CASE_RVV_OPCODE_MASK_LMUL
	#undef CASE_RVV_OPCODE_LMUL
	#undef CASE_RVV_OPCODE_UNMASK_WIDEN
	#undef CASE_RVV_OPCODE_UNMASK
	#undef CASE_RVV_OPCODE_MASK_WIDEN
	#undef CASE_RVV_OPCODE_MASK
	#undef CASE_RVV_OPCODE_WIDEN
	#undef CASE_RVV_OPCODE

	#undef CASE_VMA_OPCODE_COMMON
	#undef CASE_VMA_OPCODE_LMULS_M1
	#undef CASE_VMA_OPCODE_LMULS_MF2
	#undef CASE_VMA_OPCODE_LMULS_MF4
	#undef CASE_VMA_OPCODE_LMULS
	#undef CASE_VFMA_OPCODE_COMMON
	#undef CASE_VFMA_OPCODE_LMULS_M1
	#undef CASE_VFMA_OPCODE_LMULS_MF2
	#undef CASE_VFMA_OPCODE_LMULS_MF4
	#undef CASE_VFMA_OPCODE_VV
	#undef CASE_VFMA_SPLATS

	// clang-format off
	#define CASE_WIDEOP_OPCODE_COMMON(OP, LMUL) \
	RISCV::PseudoV##OP##_##LMUL##_TIED

	#define CASE_WIDEOP_OPCODE_LMULS_MF4(OP) \
	CASE_WIDEOP_OPCODE_COMMON(OP, MF4): \
	case CASE_WIDEOP_OPCODE_COMMON(OP, MF2): \
	case CASE_WIDEOP_OPCODE_COMMON(OP, M1): \
	case CASE_WIDEOP_OPCODE_COMMON(OP, M2): \
	case CASE_WIDEOP_OPCODE_COMMON(OP, M4)

	#define CASE_WIDEOP_OPCODE_LMULS(OP) \
	CASE_WIDEOP_OPCODE_COMMON(OP, MF8): \
	case CASE_WIDEOP_OPCODE_LMULS_MF4(OP)

	#define CASE_WIDEOP_CHANGE_OPCODE_COMMON(OP, LMUL) \
	case RISCV::PseudoV##OP##_##LMUL##_TIED: \
	NewOpc = RISCV::PseudoV##OP##_##LMUL; \
	break;

	#define CASE_WIDEOP_CHANGE_OPCODE_LMULS_MF4(OP) \
	CASE_WIDEOP_CHANGE_OPCODE_COMMON(OP, MF4) \
	CASE_WIDEOP_CHANGE_OPCODE_COMMON(OP, MF2) \
	CASE_WIDEOP_CHANGE_OPCODE_COMMON(OP, M1) \
	CASE_WIDEOP_CHANGE_OPCODE_COMMON(OP, M2) \
	CASE_WIDEOP_CHANGE_OPCODE_COMMON(OP, M4)

	#define CASE_WIDEOP_CHANGE_OPCODE_LMULS(OP) \
	CASE_WIDEOP_CHANGE_OPCODE_COMMON(OP, MF8) \
	CASE_WIDEOP_CHANGE_OPCODE_LMULS_MF4(OP)

	// FP Widening Ops may by SEW aware. Create SEW aware cases for these cases.
	#define CASE_FP_WIDEOP_OPCODE_COMMON(OP, LMUL, SEW) \
	RISCV::PseudoV##OP##_##LMUL##_##SEW##_TIED

	#define CASE_FP_WIDEOP_OPCODE_LMULS_MF4(OP) \
	CASE_FP_WIDEOP_OPCODE_COMMON(OP, MF4, E16): \
	case CASE_FP_WIDEOP_OPCODE_COMMON(OP, MF2, E16): \
	case CASE_FP_WIDEOP_OPCODE_COMMON(OP, MF2, E32): \
	case CASE_FP_WIDEOP_OPCODE_COMMON(OP, M1, E16): \
	case CASE_FP_WIDEOP_OPCODE_COMMON(OP, M1, E32): \
	case CASE_FP_WIDEOP_OPCODE_COMMON(OP, M2, E16): \
	case CASE_FP_WIDEOP_OPCODE_COMMON(OP, M2, E32): \
	case CASE_FP_WIDEOP_OPCODE_COMMON(OP, M4, E16): \
	case CASE_FP_WIDEOP_OPCODE_COMMON(OP, M4, E32) \

	#define CASE_FP_WIDEOP_CHANGE_OPCODE_COMMON(OP, LMUL, SEW) \
	case RISCV::PseudoV##OP##_##LMUL##_##SEW##_TIED: \
	NewOpc = RISCV::PseudoV##OP##_##LMUL##_##SEW; \
	break;

	#define CASE_FP_WIDEOP_CHANGE_OPCODE_LMULS_MF4(OP) \
	CASE_FP_WIDEOP_CHANGE_OPCODE_COMMON(OP, MF4, E16) \
	CASE_FP_WIDEOP_CHANGE_OPCODE_COMMON(OP, MF2, E16) \
	CASE_FP_WIDEOP_CHANGE_OPCODE_COMMON(OP, MF2, E32) \
	CASE_FP_WIDEOP_CHANGE_OPCODE_COMMON(OP, M1, E16) \
	CASE_FP_WIDEOP_CHANGE_OPCODE_COMMON(OP, M1, E32) \
	CASE_FP_WIDEOP_CHANGE_OPCODE_COMMON(OP, M2, E16) \
	CASE_FP_WIDEOP_CHANGE_OPCODE_COMMON(OP, M2, E32) \
	CASE_FP_WIDEOP_CHANGE_OPCODE_COMMON(OP, M4, E16) \
	CASE_FP_WIDEOP_CHANGE_OPCODE_COMMON(OP, M4, E32) \

	#define CASE_FP_WIDEOP_CHANGE_OPCODE_LMULS(OP) \
	CASE_FP_WIDEOP_CHANGE_OPCODE_LMULS_MF4(OP)
	// clang-format on

	MachineInstr *RISCVInstrInfo::convertToThreeAddress(MachineInstr &MI,
	LiveVariables *LV,
	LiveIntervals *LIS) const {
	MachineInstrBuilder MIB;
	switch (MI.getOpcode()) {
	default:
	return nullptr;
	case CASE_FP_WIDEOP_OPCODE_LMULS_MF4(FWADD_WV):
	case CASE_FP_WIDEOP_OPCODE_LMULS_MF4(FWSUB_WV): {
	assert(RISCVII::hasVecPolicyOp(MI.getDesc().TSFlags) &&
	MI.getNumExplicitOperands() == 7 &&
	"Expect 7 explicit operands rd, rs2, rs1, rm, vl, sew, policy");
	// If the tail policy is undisturbed we can't convert.
	if ((MI.getOperand(RISCVII::getVecPolicyOpNum(MI.getDesc())).getImm() &
	1) == 0)
	return nullptr;
	// clang-format off
	unsigned NewOpc;
	switch (MI.getOpcode()) {
	default:
	llvm_unreachable("Unexpected opcode");
	CASE_FP_WIDEOP_CHANGE_OPCODE_LMULS_MF4(FWADD_WV)
	CASE_FP_WIDEOP_CHANGE_OPCODE_LMULS_MF4(FWSUB_WV)
	}
	// clang-format on

	MachineBasicBlock &MBB = *MI.getParent();
	MIB = BuildMI(MBB, MI, MI.getDebugLoc(), get(NewOpc))
	.add(MI.getOperand(0))
	.addReg(MI.getOperand(0).getReg(), RegState::Undef)
	.add(MI.getOperand(1))
	.add(MI.getOperand(2))
	.add(MI.getOperand(3))
	.add(MI.getOperand(4))
	.add(MI.getOperand(5))
	.add(MI.getOperand(6));
	break;
	}
	case CASE_WIDEOP_OPCODE_LMULS(WADD_WV):
	case CASE_WIDEOP_OPCODE_LMULS(WADDU_WV):
	case CASE_WIDEOP_OPCODE_LMULS(WSUB_WV):
	case CASE_WIDEOP_OPCODE_LMULS(WSUBU_WV): {
	// If the tail policy is undisturbed we can't convert.
	assert(RISCVII::hasVecPolicyOp(MI.getDesc().TSFlags) &&
	MI.getNumExplicitOperands() == 6);
	if ((MI.getOperand(5).getImm() & 1) == 0)
	return nullptr;

	// clang-format off
	unsigned NewOpc;
	switch (MI.getOpcode()) {
	default:
	llvm_unreachable("Unexpected opcode");
	CASE_WIDEOP_CHANGE_OPCODE_LMULS(WADD_WV)
	CASE_WIDEOP_CHANGE_OPCODE_LMULS(WADDU_WV)
	CASE_WIDEOP_CHANGE_OPCODE_LMULS(WSUB_WV)
	CASE_WIDEOP_CHANGE_OPCODE_LMULS(WSUBU_WV)
	}
	// clang-format on

	MachineBasicBlock &MBB = *MI.getParent();
	MIB = BuildMI(MBB, MI, MI.getDebugLoc(), get(NewOpc))
	.add(MI.getOperand(0))
	.addReg(MI.getOperand(0).getReg(), RegState::Undef)
	.add(MI.getOperand(1))
	.add(MI.getOperand(2))
	.add(MI.getOperand(3))
	.add(MI.getOperand(4))
	.add(MI.getOperand(5));
	break;
	}
	}
	MIB.copyImplicitOps(MI);

	if (LV) {
	unsigned NumOps = MI.getNumOperands();
	for (unsigned I = 1; I < NumOps; ++I) {
	MachineOperand &Op = MI.getOperand(I);
	if (Op.isReg() && Op.isKill())
	LV->replaceKillInstruction(Op.getReg(), MI, *MIB);
	}
	}

	if (LIS) {
	SlotIndex Idx = LIS->ReplaceMachineInstrInMaps(MI, *MIB);

	if (MI.getOperand(0).isEarlyClobber()) {
	// Use operand 1 was tied to early-clobber def operand 0, so its live
	// interval could have ended at an early-clobber slot. Now they are not
	// tied we need to update it to the normal register slot.
	LiveInterval &LI = LIS->getInterval(MI.getOperand(1).getReg());
	LiveRange::Segment *S = LI.getSegmentContaining(Idx);
	if (S->end == Idx.getRegSlot(true))
	S->end = Idx.getRegSlot();
	}
	}

	return MIB;
	}

	#undef CASE_WIDEOP_OPCODE_COMMON
	#undef CASE_WIDEOP_OPCODE_LMULS_MF4
	#undef CASE_WIDEOP_OPCODE_LMULS
	#undef CASE_WIDEOP_CHANGE_OPCODE_COMMON
	#undef CASE_WIDEOP_CHANGE_OPCODE_LMULS_MF4
	#undef CASE_WIDEOP_CHANGE_OPCODE_LMULS
	#undef CASE_FP_WIDEOP_OPCODE_COMMON
	#undef CASE_FP_WIDEOP_OPCODE_LMULS_MF4
	#undef CASE_FP_WIDEOP_CHANGE_OPCODE_COMMON
	#undef CASE_FP_WIDEOP_CHANGE_OPCODE_LMULS_MF4
	#undef CASE_FP_WIDEOP_CHANGE_OPCODE_LMULS

	void RISCVInstrInfo::mulImm(MachineFunction &MF, MachineBasicBlock &MBB,
	MachineBasicBlock::iterator II, const DebugLoc &DL,
	Register DestReg, uint32_t Amount,
	MachineInstr::MIFlag Flag) const {
	MachineRegisterInfo &MRI = MF.getRegInfo();
	if (llvm::has_single_bit<uint32_t>(Amount)) {
	uint32_t ShiftAmount = Log2_32(Amount);
	if (ShiftAmount == 0)
	return;
	BuildMI(MBB, II, DL, get(RISCV::SLLI), DestReg)
	.addReg(DestReg, RegState::Kill)
	.addImm(ShiftAmount)
	.setMIFlag(Flag);
	} else if (STI.hasStdExtZba() &&
	((Amount % 3 == 0 && isPowerOf2_64(Amount / 3)) \|\|
	(Amount % 5 == 0 && isPowerOf2_64(Amount / 5)) \|\|
	(Amount % 9 == 0 && isPowerOf2_64(Amount / 9)))) {
	// We can use Zba SHXADD+SLLI instructions for multiply in some cases.
	unsigned Opc;
	uint32_t ShiftAmount;
	if (Amount % 9 == 0) {
	Opc = RISCV::SH3ADD;
	ShiftAmount = Log2_64(Amount / 9);
	} else if (Amount % 5 == 0) {
	Opc = RISCV::SH2ADD;
	ShiftAmount = Log2_64(Amount / 5);
	} else if (Amount % 3 == 0) {
	Opc = RISCV::SH1ADD;
	ShiftAmount = Log2_64(Amount / 3);
	} else {
	llvm_unreachable("implied by if-clause");
	}
	if (ShiftAmount)
	BuildMI(MBB, II, DL, get(RISCV::SLLI), DestReg)
	.addReg(DestReg, RegState::Kill)
	.addImm(ShiftAmount)
	.setMIFlag(Flag);
	BuildMI(MBB, II, DL, get(Opc), DestReg)
	.addReg(DestReg, RegState::Kill)
	.addReg(DestReg)
	.setMIFlag(Flag);
	} else if (llvm::has_single_bit<uint32_t>(Amount - 1)) {
	Register ScaledRegister = MRI.createVirtualRegister(&RISCV::GPRRegClass);
	uint32_t ShiftAmount = Log2_32(Amount - 1);
	BuildMI(MBB, II, DL, get(RISCV::SLLI), ScaledRegister)
	.addReg(DestReg)
	.addImm(ShiftAmount)
	.setMIFlag(Flag);
	BuildMI(MBB, II, DL, get(RISCV::ADD), DestReg)
	.addReg(ScaledRegister, RegState::Kill)
	.addReg(DestReg, RegState::Kill)
	.setMIFlag(Flag);
	} else if (llvm::has_single_bit<uint32_t>(Amount + 1)) {
	Register ScaledRegister = MRI.createVirtualRegister(&RISCV::GPRRegClass);
	uint32_t ShiftAmount = Log2_32(Amount + 1);
	BuildMI(MBB, II, DL, get(RISCV::SLLI), ScaledRegister)
	.addReg(DestReg)
	.addImm(ShiftAmount)
	.setMIFlag(Flag);
	BuildMI(MBB, II, DL, get(RISCV::SUB), DestReg)
	.addReg(ScaledRegister, RegState::Kill)
	.addReg(DestReg, RegState::Kill)
	.setMIFlag(Flag);
	} else if (STI.hasStdExtZmmul()) {
	Register N = MRI.createVirtualRegister(&RISCV::GPRRegClass);
	movImm(MBB, II, DL, N, Amount, Flag);
	BuildMI(MBB, II, DL, get(RISCV::MUL), DestReg)
	.addReg(DestReg, RegState::Kill)
	.addReg(N, RegState::Kill)
	.setMIFlag(Flag);
	} else {
	Register Acc;
	uint32_t PrevShiftAmount = 0;
	for (uint32_t ShiftAmount = 0; Amount >> ShiftAmount; ShiftAmount++) {
	if (Amount & (1U << ShiftAmount)) {
	if (ShiftAmount)
	BuildMI(MBB, II, DL, get(RISCV::SLLI), DestReg)
	.addReg(DestReg, RegState::Kill)
	.addImm(ShiftAmount - PrevShiftAmount)
	.setMIFlag(Flag);
	if (Amount >> (ShiftAmount + 1)) {
	// If we don't have an accmulator yet, create it and copy DestReg.
	if (!Acc) {
	Acc = MRI.createVirtualRegister(&RISCV::GPRRegClass);
	BuildMI(MBB, II, DL, get(TargetOpcode::COPY), Acc)
	.addReg(DestReg)
	.setMIFlag(Flag);
	} else {
	BuildMI(MBB, II, DL, get(RISCV::ADD), Acc)
	.addReg(Acc, RegState::Kill)
	.addReg(DestReg)
	.setMIFlag(Flag);
	}
	}
	PrevShiftAmount = ShiftAmount;
	}
	}
	assert(Acc && "Expected valid accumulator");
	BuildMI(MBB, II, DL, get(RISCV::ADD), DestReg)
	.addReg(DestReg, RegState::Kill)
	.addReg(Acc, RegState::Kill)
	.setMIFlag(Flag);
	}
	}

	ArrayRef<std::pair<MachineMemOperand::Flags, const char *>>
	RISCVInstrInfo::getSerializableMachineMemOperandTargetFlags() const {
	static const std::pair<MachineMemOperand::Flags, const char *> TargetFlags[] =
	{{MONontemporalBit0, "riscv-nontemporal-domain-bit-0"},
	{MONontemporalBit1, "riscv-nontemporal-domain-bit-1"}};
	return ArrayRef(TargetFlags);
	}

	// Returns true if this is the sext.w pattern, addiw rd, rs1, 0.
	bool RISCV::isSEXT_W(const MachineInstr &MI) {
	return MI.getOpcode() == RISCV::ADDIW && MI.getOperand(1).isReg() &&
	MI.getOperand(2).isImm() && MI.getOperand(2).getImm() == 0;
	}

	// Returns true if this is the zext.w pattern, adduw rd, rs1, x0.
	bool RISCV::isZEXT_W(const MachineInstr &MI) {
	return MI.getOpcode() == RISCV::ADD_UW && MI.getOperand(1).isReg() &&
	MI.getOperand(2).isReg() && MI.getOperand(2).getReg() == RISCV::X0;
	}

	// Returns true if this is the zext.b pattern, andi rd, rs1, 255.
	bool RISCV::isZEXT_B(const MachineInstr &MI) {
	return MI.getOpcode() == RISCV::ANDI && MI.getOperand(1).isReg() &&
	MI.getOperand(2).isImm() && MI.getOperand(2).getImm() == 255;
	}

	static bool isRVVWholeLoadStore(unsigned Opcode) {
	switch (Opcode) {
	default:
	return false;
	case RISCV::VS1R_V:
	case RISCV::VS2R_V:
	case RISCV::VS4R_V:
	case RISCV::VS8R_V:
	case RISCV::VL1RE8_V:
	case RISCV::VL2RE8_V:
	case RISCV::VL4RE8_V:
	case RISCV::VL8RE8_V:
	case RISCV::VL1RE16_V:
	case RISCV::VL2RE16_V:
	case RISCV::VL4RE16_V:
	case RISCV::VL8RE16_V:
	case RISCV::VL1RE32_V:
	case RISCV::VL2RE32_V:
	case RISCV::VL4RE32_V:
	case RISCV::VL8RE32_V:
	case RISCV::VL1RE64_V:
	case RISCV::VL2RE64_V:
	case RISCV::VL4RE64_V:
	case RISCV::VL8RE64_V:
	return true;
	}
	}

	bool RISCV::isRVVSpill(const MachineInstr &MI) {
	// RVV lacks any support for immediate addressing for stack addresses, so be
	// conservative.
	unsigned Opcode = MI.getOpcode();
	if (!RISCVVPseudosTable::getPseudoInfo(Opcode) &&
	!isRVVWholeLoadStore(Opcode) && !isRVVSpillForZvlsseg(Opcode))
	return false;
	return true;
	}

	std::optional<std::pair<unsigned, unsigned>>
	RISCV::isRVVSpillForZvlsseg(unsigned Opcode) {
	switch (Opcode) {
	default:
	return std::nullopt;
	case RISCV::PseudoVSPILL2_M1:
	case RISCV::PseudoVRELOAD2_M1:
	return std::make_pair(2u, 1u);
	case RISCV::PseudoVSPILL2_M2:
	case RISCV::PseudoVRELOAD2_M2:
	return std::make_pair(2u, 2u);
	case RISCV::PseudoVSPILL2_M4:
	case RISCV::PseudoVRELOAD2_M4:
	return std::make_pair(2u, 4u);
	case RISCV::PseudoVSPILL3_M1:
	case RISCV::PseudoVRELOAD3_M1:
	return std::make_pair(3u, 1u);
	case RISCV::PseudoVSPILL3_M2:
	case RISCV::PseudoVRELOAD3_M2:
	return std::make_pair(3u, 2u);
	case RISCV::PseudoVSPILL4_M1:
	case RISCV::PseudoVRELOAD4_M1:
	return std::make_pair(4u, 1u);
	case RISCV::PseudoVSPILL4_M2:
	case RISCV::PseudoVRELOAD4_M2:
	return std::make_pair(4u, 2u);
	case RISCV::PseudoVSPILL5_M1:
	case RISCV::PseudoVRELOAD5_M1:
	return std::make_pair(5u, 1u);
	case RISCV::PseudoVSPILL6_M1:
	case RISCV::PseudoVRELOAD6_M1:
	return std::make_pair(6u, 1u);
	case RISCV::PseudoVSPILL7_M1:
	case RISCV::PseudoVRELOAD7_M1:
	return std::make_pair(7u, 1u);
	case RISCV::PseudoVSPILL8_M1:
	case RISCV::PseudoVRELOAD8_M1:
	return std::make_pair(8u, 1u);
	}
	}

	bool RISCV::isFaultFirstLoad(const MachineInstr &MI) {
	return MI.getNumExplicitDefs() == 2 &&
	MI.modifiesRegister(RISCV::VL, /TRI=/nullptr) && !MI.isInlineAsm();
	}

	bool RISCV::hasEqualFRM(const MachineInstr &MI1, const MachineInstr &MI2) {
	int16_t MI1FrmOpIdx =
	RISCV::getNamedOperandIdx(MI1.getOpcode(), RISCV::OpName::frm);
	int16_t MI2FrmOpIdx =
	RISCV::getNamedOperandIdx(MI2.getOpcode(), RISCV::OpName::frm);
	if (MI1FrmOpIdx < 0 \|\| MI2FrmOpIdx < 0)
	return false;
	MachineOperand FrmOp1 = MI1.getOperand(MI1FrmOpIdx);
	MachineOperand FrmOp2 = MI2.getOperand(MI2FrmOpIdx);
	return FrmOp1.getImm() == FrmOp2.getImm();
	}

	std::optional<unsigned>
	RISCV::getVectorLowDemandedScalarBits(uint16_t Opcode, unsigned Log2SEW) {
	// TODO: Handle Zvbb instructions
	switch (Opcode) {
	default:
	return std::nullopt;

	// 11.6. Vector Single-Width Shift Instructions
	case RISCV::VSLL_VX:
	case RISCV::VSRL_VX:
	case RISCV::VSRA_VX:
	// 12.4. Vector Single-Width Scaling Shift Instructions
	case RISCV::VSSRL_VX:
	case RISCV::VSSRA_VX:
	// Only the low lg2(SEW) bits of the shift-amount value are used.
	return Log2SEW;

	// 11.7 Vector Narrowing Integer Right Shift Instructions
	case RISCV::VNSRL_WX:
	case RISCV::VNSRA_WX:
	// 12.5. Vector Narrowing Fixed-Point Clip Instructions
	case RISCV::VNCLIPU_WX:
	case RISCV::VNCLIP_WX:
	// Only the low lg2(2*SEW) bits of the shift-amount value are used.
	return Log2SEW + 1;

	// 11.1. Vector Single-Width Integer Add and Subtract
	case RISCV::VADD_VX:
	case RISCV::VSUB_VX:
	case RISCV::VRSUB_VX:
	// 11.2. Vector Widening Integer Add/Subtract
	case RISCV::VWADDU_VX:
	case RISCV::VWSUBU_VX:
	case RISCV::VWADD_VX:
	case RISCV::VWSUB_VX:
	case RISCV::VWADDU_WX:
	case RISCV::VWSUBU_WX:
	case RISCV::VWADD_WX:
	case RISCV::VWSUB_WX:
	// 11.4. Vector Integer Add-with-Carry / Subtract-with-Borrow Instructions
	case RISCV::VADC_VXM:
	case RISCV::VADC_VIM:
	case RISCV::VMADC_VXM:
	case RISCV::VMADC_VIM:
	case RISCV::VMADC_VX:
	case RISCV::VSBC_VXM:
	case RISCV::VMSBC_VXM:
	case RISCV::VMSBC_VX:
	// 11.5 Vector Bitwise Logical Instructions
	case RISCV::VAND_VX:
	case RISCV::VOR_VX:
	case RISCV::VXOR_VX:
	// 11.8. Vector Integer Compare Instructions
	case RISCV::VMSEQ_VX:
	case RISCV::VMSNE_VX:
	case RISCV::VMSLTU_VX:
	case RISCV::VMSLT_VX:
	case RISCV::VMSLEU_VX:
	case RISCV::VMSLE_VX:
	case RISCV::VMSGTU_VX:
	case RISCV::VMSGT_VX:
	// 11.9. Vector Integer Min/Max Instructions
	case RISCV::VMINU_VX:
	case RISCV::VMIN_VX:
	case RISCV::VMAXU_VX:
	case RISCV::VMAX_VX:
	// 11.10. Vector Single-Width Integer Multiply Instructions
	case RISCV::VMUL_VX:
	case RISCV::VMULH_VX:
	case RISCV::VMULHU_VX:
	case RISCV::VMULHSU_VX:
	// 11.11. Vector Integer Divide Instructions
	case RISCV::VDIVU_VX:
	case RISCV::VDIV_VX:
	case RISCV::VREMU_VX:
	case RISCV::VREM_VX:
	// 11.12. Vector Widening Integer Multiply Instructions
	case RISCV::VWMUL_VX:
	case RISCV::VWMULU_VX:
	case RISCV::VWMULSU_VX:
	// 11.13. Vector Single-Width Integer Multiply-Add Instructions
	case RISCV::VMACC_VX:
	case RISCV::VNMSAC_VX:
	case RISCV::VMADD_VX:
	case RISCV::VNMSUB_VX:
	// 11.14. Vector Widening Integer Multiply-Add Instructions
	case RISCV::VWMACCU_VX:
	case RISCV::VWMACC_VX:
	case RISCV::VWMACCSU_VX:
	case RISCV::VWMACCUS_VX:
	// 11.15. Vector Integer Merge Instructions
	case RISCV::VMERGE_VXM:
	// 11.16. Vector Integer Move Instructions
	case RISCV::VMV_V_X:
	// 12.1. Vector Single-Width Saturating Add and Subtract
	case RISCV::VSADDU_VX:
	case RISCV::VSADD_VX:
	case RISCV::VSSUBU_VX:
	case RISCV::VSSUB_VX:
	// 12.2. Vector Single-Width Averaging Add and Subtract
	case RISCV::VAADDU_VX:
	case RISCV::VAADD_VX:
	case RISCV::VASUBU_VX:
	case RISCV::VASUB_VX:
	// 12.3. Vector Single-Width Fractional Multiply with Rounding and Saturation
	case RISCV::VSMUL_VX:
	// 16.1. Integer Scalar Move Instructions
	case RISCV::VMV_S_X:
	return 1U << Log2SEW;
	}
	}

	unsigned RISCV::getRVVMCOpcode(unsigned RVVPseudoOpcode) {
	const RISCVVPseudosTable::PseudoInfo *RVV =
	RISCVVPseudosTable::getPseudoInfo(RVVPseudoOpcode);
	if (!RVV)
	return 0;
	return RVV->BaseInstr;
	}
	diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86.td b/contrib/llvm-project/llvm/lib/Target/X86/X86.td
	index 9dafd5e628ca..e82e624f7099 100644
	--- a/contrib/llvm-project/llvm/lib/Target/X86/X86.td
	+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86.td
	@@ -1,1995 +1,2010 @@
	//===-- X86.td - Target definition file for the Intel X86 --- tablegen --===//
	//
	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
	// See https://llvm.org/LICENSE.txt for license information.
	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
	//
	//===----------------------------------------------------------------------===//
	//
	// This is a target description file for the Intel i386 architecture, referred
	// to here as the "X86" architecture.
	//
	//===----------------------------------------------------------------------===//

	// Get the target-independent interfaces which we are implementing...
	//
	include "llvm/Target/Target.td"

	//===----------------------------------------------------------------------===//
	// X86 Subtarget state
	//
	// disregarding specific ABI / programming model
	def Is64Bit : SubtargetFeature<"64bit-mode", "Is64Bit", "true",
	"64-bit mode (x86_64)">;
	def Is32Bit : SubtargetFeature<"32bit-mode", "Is32Bit", "true",
	"32-bit mode (80386)">;
	def Is16Bit : SubtargetFeature<"16bit-mode", "Is16Bit", "true",
	"16-bit mode (i8086)">;

	//===----------------------------------------------------------------------===//
	// X86 Subtarget ISA features
	//===----------------------------------------------------------------------===//

	def FeatureX87 : SubtargetFeature<"x87","HasX87", "true",
	"Enable X87 float instructions">;

	def FeatureNOPL : SubtargetFeature<"nopl", "HasNOPL", "true",
	"Enable NOPL instruction (generally pentium pro+)">;

	def FeatureCMOV : SubtargetFeature<"cmov","HasCMOV", "true",
	"Enable conditional move instructions">;

	def FeatureCX8 : SubtargetFeature<"cx8", "HasCX8", "true",
	"Support CMPXCHG8B instructions">;

	def FeatureCRC32 : SubtargetFeature<"crc32", "HasCRC32", "true",
	"Enable SSE 4.2 CRC32 instruction (used when SSE4.2 is supported but function is GPR only)">;

	def FeaturePOPCNT : SubtargetFeature<"popcnt", "HasPOPCNT", "true",
	"Support POPCNT instruction">;

	def FeatureFXSR : SubtargetFeature<"fxsr", "HasFXSR", "true",
	"Support fxsave/fxrestore instructions">;

	def FeatureXSAVE : SubtargetFeature<"xsave", "HasXSAVE", "true",
	"Support xsave instructions">;

	def FeatureXSAVEOPT: SubtargetFeature<"xsaveopt", "HasXSAVEOPT", "true",
	"Support xsaveopt instructions",
	[FeatureXSAVE]>;

	def FeatureXSAVEC : SubtargetFeature<"xsavec", "HasXSAVEC", "true",
	"Support xsavec instructions",
	[FeatureXSAVE]>;

	def FeatureXSAVES : SubtargetFeature<"xsaves", "HasXSAVES", "true",
	"Support xsaves instructions",
	[FeatureXSAVE]>;

	def FeatureSSE1 : SubtargetFeature<"sse", "X86SSELevel", "SSE1",
	"Enable SSE instructions">;
	def FeatureSSE2 : SubtargetFeature<"sse2", "X86SSELevel", "SSE2",
	"Enable SSE2 instructions",
	[FeatureSSE1]>;
	def FeatureSSE3 : SubtargetFeature<"sse3", "X86SSELevel", "SSE3",
	"Enable SSE3 instructions",
	[FeatureSSE2]>;
	def FeatureSSSE3 : SubtargetFeature<"ssse3", "X86SSELevel", "SSSE3",
	"Enable SSSE3 instructions",
	[FeatureSSE3]>;
	def FeatureSSE41 : SubtargetFeature<"sse4.1", "X86SSELevel", "SSE41",
	"Enable SSE 4.1 instructions",
	[FeatureSSSE3]>;
	def FeatureSSE42 : SubtargetFeature<"sse4.2", "X86SSELevel", "SSE42",
	"Enable SSE 4.2 instructions",
	[FeatureSSE41]>;
	// The MMX subtarget feature is separate from the rest of the SSE features
	// because it's important (for odd compatibility reasons) to be able to
	// turn it off explicitly while allowing SSE+ to be on.
	def FeatureMMX : SubtargetFeature<"mmx","HasMMX", "true",
	"Enable MMX instructions">;
	// All x86-64 hardware has SSE2, but we don't mark SSE2 as an implied
	// feature, because SSE2 can be disabled (e.g. for compiling OS kernels)
	// without disabling 64-bit mode. Nothing should imply this feature bit. It
	// is used to enforce that only 64-bit capable CPUs are used in 64-bit mode.
	def FeatureX86_64 : SubtargetFeature<"64bit", "HasX86_64", "true",
	"Support 64-bit instructions">;
	def FeatureCX16 : SubtargetFeature<"cx16", "HasCX16", "true",
	"64-bit with cmpxchg16b (this is true for most x86-64 chips, but not the first AMD chips)",
	[FeatureCX8]>;
	def FeatureSSE4A : SubtargetFeature<"sse4a", "HasSSE4A", "true",
	"Support SSE 4a instructions",
	[FeatureSSE3]>;

	def FeatureAVX : SubtargetFeature<"avx", "X86SSELevel", "AVX",
	"Enable AVX instructions",
	[FeatureSSE42]>;
	def FeatureAVX2 : SubtargetFeature<"avx2", "X86SSELevel", "AVX2",
	"Enable AVX2 instructions",
	[FeatureAVX]>;
	def FeatureFMA : SubtargetFeature<"fma", "HasFMA", "true",
	"Enable three-operand fused multiple-add",
	[FeatureAVX]>;
	def FeatureF16C : SubtargetFeature<"f16c", "HasF16C", "true",
	"Support 16-bit floating point conversion instructions",
	[FeatureAVX]>;
	def FeatureEVEX512 : SubtargetFeature<"evex512", "HasEVEX512", "true",
	"Support ZMM and 64-bit mask instructions">;
	def FeatureAVX512 : SubtargetFeature<"avx512f", "X86SSELevel", "AVX512",
	"Enable AVX-512 instructions",
	[FeatureAVX2, FeatureFMA, FeatureF16C]>;
	def FeatureCDI : SubtargetFeature<"avx512cd", "HasCDI", "true",
	"Enable AVX-512 Conflict Detection Instructions",
	[FeatureAVX512]>;
	def FeatureVPOPCNTDQ : SubtargetFeature<"avx512vpopcntdq", "HasVPOPCNTDQ",
	"true", "Enable AVX-512 Population Count Instructions",
	[FeatureAVX512]>;
	def FeaturePREFETCHI : SubtargetFeature<"prefetchi", "HasPREFETCHI",
	"true",
	"Prefetch instruction with T0 or T1 Hint">;
	def FeatureDQI : SubtargetFeature<"avx512dq", "HasDQI", "true",
	"Enable AVX-512 Doubleword and Quadword Instructions",
	[FeatureAVX512]>;
	def FeatureBWI : SubtargetFeature<"avx512bw", "HasBWI", "true",
	"Enable AVX-512 Byte and Word Instructions",
	[FeatureAVX512]>;
	def FeatureVLX : SubtargetFeature<"avx512vl", "HasVLX", "true",
	"Enable AVX-512 Vector Length eXtensions",
	[FeatureAVX512]>;
	def FeatureVBMI : SubtargetFeature<"avx512vbmi", "HasVBMI", "true",
	"Enable AVX-512 Vector Byte Manipulation Instructions",
	[FeatureBWI]>;
	def FeatureVBMI2 : SubtargetFeature<"avx512vbmi2", "HasVBMI2", "true",
	"Enable AVX-512 further Vector Byte Manipulation Instructions",
	[FeatureBWI]>;
	def FeatureAVXIFMA : SubtargetFeature<"avxifma", "HasAVXIFMA", "true",
	"Enable AVX-IFMA",
	[FeatureAVX2]>;
	def FeatureIFMA : SubtargetFeature<"avx512ifma", "HasIFMA", "true",
	"Enable AVX-512 Integer Fused Multiple-Add",
	[FeatureAVX512]>;
	def FeaturePKU : SubtargetFeature<"pku", "HasPKU", "true",
	"Enable protection keys">;
	def FeatureVNNI : SubtargetFeature<"avx512vnni", "HasVNNI", "true",
	"Enable AVX-512 Vector Neural Network Instructions",
	[FeatureAVX512]>;
	def FeatureAVXVNNI : SubtargetFeature<"avxvnni", "HasAVXVNNI", "true",
	"Support AVX_VNNI encoding",
	[FeatureAVX2]>;
	def FeatureBF16 : SubtargetFeature<"avx512bf16", "HasBF16", "true",
	"Support bfloat16 floating point",
	[FeatureBWI]>;
	def FeatureBITALG : SubtargetFeature<"avx512bitalg", "HasBITALG", "true",
	"Enable AVX-512 Bit Algorithms",
	[FeatureBWI]>;
	def FeatureVP2INTERSECT : SubtargetFeature<"avx512vp2intersect",
	"HasVP2INTERSECT", "true",
	"Enable AVX-512 vp2intersect",
	[FeatureAVX512]>;
	// FIXME: FP16 scalar intrinsics use the type v8f16, which is supposed to be
	// guarded under condition hasVLX. So we imply it in FeatureFP16 currently.
	// FIXME: FP16 conversion between f16 and i64 customize type v8i64, which is
	// supposed to be guarded under condition hasDQI. So we imply it in FeatureFP16
	// currently.
	def FeatureFP16 : SubtargetFeature<"avx512fp16", "HasFP16", "true",
	"Support 16-bit floating point",
	[FeatureBWI, FeatureVLX, FeatureDQI]>;
	def FeatureAVXVNNIINT8 : SubtargetFeature<"avxvnniint8",
	"HasAVXVNNIINT8", "true",
	"Enable AVX-VNNI-INT8",
	[FeatureAVX2]>;
	def FeatureAVXVNNIINT16 : SubtargetFeature<"avxvnniint16",
	"HasAVXVNNIINT16", "true",
	"Enable AVX-VNNI-INT16",
	[FeatureAVX2]>;
	def FeaturePCLMUL : SubtargetFeature<"pclmul", "HasPCLMUL", "true",
	"Enable packed carry-less multiplication instructions",
	[FeatureSSE2]>;
	def FeatureGFNI : SubtargetFeature<"gfni", "HasGFNI", "true",
	"Enable Galois Field Arithmetic Instructions",
	[FeatureSSE2]>;
	def FeatureVPCLMULQDQ : SubtargetFeature<"vpclmulqdq", "HasVPCLMULQDQ", "true",
	"Enable vpclmulqdq instructions",
	[FeatureAVX, FeaturePCLMUL]>;
	def FeatureFMA4 : SubtargetFeature<"fma4", "HasFMA4", "true",
	"Enable four-operand fused multiple-add",
	[FeatureAVX, FeatureSSE4A]>;
	def FeatureXOP : SubtargetFeature<"xop", "HasXOP", "true",
	"Enable XOP instructions",
	[FeatureFMA4]>;
	def FeatureSSEUnalignedMem : SubtargetFeature<"sse-unaligned-mem",
	"HasSSEUnalignedMem", "true",
	"Allow unaligned memory operands with SSE instructions (this may require setting a configuration bit in the processor)">;
	def FeatureAES : SubtargetFeature<"aes", "HasAES", "true",
	"Enable AES instructions",
	[FeatureSSE2]>;
	def FeatureVAES : SubtargetFeature<"vaes", "HasVAES", "true",
	"Promote selected AES instructions to AVX512/AVX registers",
	[FeatureAVX2, FeatureAES]>;
	def FeatureTBM : SubtargetFeature<"tbm", "HasTBM", "true",
	"Enable TBM instructions">;
	def FeatureLWP : SubtargetFeature<"lwp", "HasLWP", "true",
	"Enable LWP instructions">;
	def FeatureMOVBE : SubtargetFeature<"movbe", "HasMOVBE", "true",
	"Support MOVBE instruction">;
	def FeatureRDRAND : SubtargetFeature<"rdrnd", "HasRDRAND", "true",
	"Support RDRAND instruction">;
	def FeatureFSGSBase : SubtargetFeature<"fsgsbase", "HasFSGSBase", "true",
	"Support FS/GS Base instructions">;
	def FeatureLZCNT : SubtargetFeature<"lzcnt", "HasLZCNT", "true",
	"Support LZCNT instruction">;
	def FeatureBMI : SubtargetFeature<"bmi", "HasBMI", "true",
	"Support BMI instructions">;
	def FeatureBMI2 : SubtargetFeature<"bmi2", "HasBMI2", "true",
	"Support BMI2 instructions">;
	def FeatureRTM : SubtargetFeature<"rtm", "HasRTM", "true",
	"Support RTM instructions">;
	def FeatureADX : SubtargetFeature<"adx", "HasADX", "true",
	"Support ADX instructions">;
	def FeatureSHA : SubtargetFeature<"sha", "HasSHA", "true",
	"Enable SHA instructions",
	[FeatureSSE2]>;
	def FeatureSHA512 : SubtargetFeature<"sha512", "HasSHA512", "true",
	"Support SHA512 instructions",
	[FeatureAVX2]>;
	// Processor supports CET SHSTK - Control-Flow Enforcement Technology
	// using Shadow Stack
	def FeatureSHSTK : SubtargetFeature<"shstk", "HasSHSTK", "true",
	"Support CET Shadow-Stack instructions">;
	def FeatureSM3 : SubtargetFeature<"sm3", "HasSM3", "true",
	"Support SM3 instructions",
	[FeatureAVX]>;
	def FeatureSM4 : SubtargetFeature<"sm4", "HasSM4", "true",
	"Support SM4 instructions",
	[FeatureAVX2]>;
	def FeaturePRFCHW : SubtargetFeature<"prfchw", "HasPRFCHW", "true",
	"Support PRFCHW instructions">;
	def FeatureRDSEED : SubtargetFeature<"rdseed", "HasRDSEED", "true",
	"Support RDSEED instruction">;
	def FeatureLAHFSAHF64 : SubtargetFeature<"sahf", "HasLAHFSAHF64", "true",
	"Support LAHF and SAHF instructions in 64-bit mode">;
	def FeatureMWAITX : SubtargetFeature<"mwaitx", "HasMWAITX", "true",
	"Enable MONITORX/MWAITX timer functionality">;
	def FeatureCLZERO : SubtargetFeature<"clzero", "HasCLZERO", "true",
	"Enable Cache Line Zero">;
	def FeatureCLDEMOTE : SubtargetFeature<"cldemote", "HasCLDEMOTE", "true",
	"Enable Cache Line Demote">;
	def FeaturePTWRITE : SubtargetFeature<"ptwrite", "HasPTWRITE", "true",
	"Support ptwrite instruction">;
	def FeatureAMXTILE : SubtargetFeature<"amx-tile", "HasAMXTILE", "true",
	"Support AMX-TILE instructions">;
	def FeatureAMXINT8 : SubtargetFeature<"amx-int8", "HasAMXINT8", "true",
	"Support AMX-INT8 instructions",
	[FeatureAMXTILE]>;
	def FeatureAMXBF16 : SubtargetFeature<"amx-bf16", "HasAMXBF16", "true",
	"Support AMX-BF16 instructions",
	[FeatureAMXTILE]>;
	def FeatureAMXFP16 : SubtargetFeature<"amx-fp16", "HasAMXFP16", "true",
	"Support AMX amx-fp16 instructions",
	[FeatureAMXTILE]>;
	def FeatureAMXCOMPLEX : SubtargetFeature<"amx-complex", "HasAMXCOMPLEX", "true",
	"Support AMX-COMPLEX instructions",
	[FeatureAMXTILE]>;
	def FeatureCMPCCXADD : SubtargetFeature<"cmpccxadd", "HasCMPCCXADD", "true",
	"Support CMPCCXADD instructions">;
	def FeatureRAOINT : SubtargetFeature<"raoint", "HasRAOINT", "true",
	"Support RAO-INT instructions",
	[]>;
	def FeatureAVXNECONVERT : SubtargetFeature<"avxneconvert", "HasAVXNECONVERT", "true",
	"Support AVX-NE-CONVERT instructions",
	[FeatureAVX2]>;
	def FeatureINVPCID : SubtargetFeature<"invpcid", "HasINVPCID", "true",
	"Invalidate Process-Context Identifier">;
	def FeatureSGX : SubtargetFeature<"sgx", "HasSGX", "true",
	"Enable Software Guard Extensions">;
	def FeatureCLFLUSHOPT : SubtargetFeature<"clflushopt", "HasCLFLUSHOPT", "true",
	"Flush A Cache Line Optimized">;
	def FeatureCLWB : SubtargetFeature<"clwb", "HasCLWB", "true",
	"Cache Line Write Back">;
	def FeatureWBNOINVD : SubtargetFeature<"wbnoinvd", "HasWBNOINVD", "true",
	"Write Back No Invalidate">;
	def FeatureRDPID : SubtargetFeature<"rdpid", "HasRDPID", "true",
	"Support RDPID instructions">;
	def FeatureRDPRU : SubtargetFeature<"rdpru", "HasRDPRU", "true",
	"Support RDPRU instructions">;
	def FeatureWAITPKG : SubtargetFeature<"waitpkg", "HasWAITPKG", "true",
	"Wait and pause enhancements">;
	def FeatureENQCMD : SubtargetFeature<"enqcmd", "HasENQCMD", "true",
	"Has ENQCMD instructions">;
	def FeatureKL : SubtargetFeature<"kl", "HasKL", "true",
	"Support Key Locker kl Instructions",
	[FeatureSSE2]>;
	def FeatureWIDEKL : SubtargetFeature<"widekl", "HasWIDEKL", "true",
	"Support Key Locker wide Instructions",
	[FeatureKL]>;
	def FeatureHRESET : SubtargetFeature<"hreset", "HasHRESET", "true",
	"Has hreset instruction">;
	def FeatureSERIALIZE : SubtargetFeature<"serialize", "HasSERIALIZE", "true",
	"Has serialize instruction">;
	def FeatureTSXLDTRK : SubtargetFeature<"tsxldtrk", "HasTSXLDTRK", "true",
	"Support TSXLDTRK instructions">;
	def FeatureUINTR : SubtargetFeature<"uintr", "HasUINTR", "true",
	"Has UINTR Instructions">;
	def FeatureUSERMSR : SubtargetFeature<"usermsr", "HasUSERMSR", "true",
	"Support USERMSR instructions">;
	def FeaturePCONFIG : SubtargetFeature<"pconfig", "HasPCONFIG", "true",
	"platform configuration instruction">;
	def FeatureMOVDIRI : SubtargetFeature<"movdiri", "HasMOVDIRI", "true",
	"Support movdiri instruction (direct store integer)">;
	def FeatureMOVDIR64B : SubtargetFeature<"movdir64b", "HasMOVDIR64B", "true",
	"Support movdir64b instruction (direct store 64 bytes)">;
	def FeatureAVX10_1 : SubtargetFeature<"avx10.1-256", "HasAVX10_1", "true",
	"Support AVX10.1 up to 256-bit instruction",
	[FeatureCDI, FeatureVBMI, FeatureIFMA, FeatureVNNI,
	FeatureBF16, FeatureVPOPCNTDQ, FeatureVBMI2, FeatureBITALG,
	FeatureVAES, FeatureVPCLMULQDQ, FeatureFP16]>;
	def FeatureAVX10_1_512 : SubtargetFeature<"avx10.1-512", "HasAVX10_1_512", "true",
	"Support AVX10.1 up to 512-bit instruction",
	[FeatureAVX10_1, FeatureEVEX512]>;
	def FeatureEGPR : SubtargetFeature<"egpr", "HasEGPR", "true",
	"Support extended general purpose register">;
	def FeaturePush2Pop2 : SubtargetFeature<"push2pop2", "HasPush2Pop2", "true",
	"Support PUSH2/POP2 instructions">;
	def FeaturePPX : SubtargetFeature<"ppx", "HasPPX", "true",
	"Support Push-Pop Acceleration">;
	def FeatureNDD : SubtargetFeature<"ndd", "HasNDD", "true",
	"Support non-destructive destination">;
	def FeatureCCMP : SubtargetFeature<"ccmp", "HasCCMP", "true",
	"Support conditional cmp & test instructions">;
	def FeatureNF : SubtargetFeature<"nf", "HasNF", "true",
	"Support status flags update suppression">;
	def FeatureCF : SubtargetFeature<"cf", "HasCF", "true",
	"Support conditional faulting">;
	def FeatureZU : SubtargetFeature<"zu", "HasZU", "true",
	"Support zero-upper SETcc/IMUL">;
	def FeatureUseGPR32InInlineAsm
	: SubtargetFeature<"inline-asm-use-gpr32", "UseInlineAsmGPR32", "true",
	"Enable use of GPR32 in inline assembly for APX">;

	// Ivy Bridge and newer processors have enhanced REP MOVSB and STOSB (aka
	// "string operations"). See "REP String Enhancement" in the Intel Software
	// Development Manual. This feature essentially means that REP MOVSB will copy
	// using the largest available size instead of copying bytes one by one, making
	// it at least as fast as REPMOVS{W,D,Q}.
	def FeatureERMSB
	: SubtargetFeature<
	"ermsb", "HasERMSB", "true",
	"REP MOVS/STOS are fast">;

	// Icelake and newer processors have Fast Short REP MOV.
	def FeatureFSRM
	: SubtargetFeature<
	"fsrm", "HasFSRM", "true",
	"REP MOVSB of short lengths is faster">;

	def FeatureSoftFloat
	: SubtargetFeature<"soft-float", "UseSoftFloat", "true",
	"Use software floating point features">;

	//===----------------------------------------------------------------------===//
	// X86 Subtarget Security Mitigation features
	//===----------------------------------------------------------------------===//

	// Lower indirect calls using a special construct called a `retpoline` to
	// mitigate potential Spectre v2 attacks against them.
	def FeatureRetpolineIndirectCalls
	: SubtargetFeature<
	"retpoline-indirect-calls", "UseRetpolineIndirectCalls", "true",
	"Remove speculation of indirect calls from the generated code">;

	// Lower indirect branches and switches either using conditional branch trees
	// or using a special construct called a `retpoline` to mitigate potential
	// Spectre v2 attacks against them.
	def FeatureRetpolineIndirectBranches
	: SubtargetFeature<
	"retpoline-indirect-branches", "UseRetpolineIndirectBranches", "true",
	"Remove speculation of indirect branches from the generated code">;

	// Deprecated umbrella feature for enabling both `retpoline-indirect-calls` and
	// `retpoline-indirect-branches` above.
	def FeatureRetpoline
	: SubtargetFeature<"retpoline", "DeprecatedUseRetpoline", "true",
	"Remove speculation of indirect branches from the "
	"generated code, either by avoiding them entirely or "
	"lowering them with a speculation blocking construct",
	[FeatureRetpolineIndirectCalls,
	FeatureRetpolineIndirectBranches]>;

	// Rely on external thunks for the emitted retpoline calls. This allows users
	// to provide their own custom thunk definitions in highly specialized
	// environments such as a kernel that does boot-time hot patching.
	def FeatureRetpolineExternalThunk
	: SubtargetFeature<
	"retpoline-external-thunk", "UseRetpolineExternalThunk", "true",
	"When lowering an indirect call or branch using a `retpoline`, rely "
	"on the specified user provided thunk rather than emitting one "
	"ourselves. Only has effect when combined with some other retpoline "
	"feature", [FeatureRetpolineIndirectCalls]>;

	// Mitigate LVI attacks against indirect calls/branches and call returns
	def FeatureLVIControlFlowIntegrity
	: SubtargetFeature<
	"lvi-cfi", "UseLVIControlFlowIntegrity", "true",
	"Prevent indirect calls/branches from using a memory operand, and "
	"precede all indirect calls/branches from a register with an "
	"LFENCE instruction to serialize control flow. Also decompose RET "
	"instructions into a POP+LFENCE+JMP sequence.">;

	// Enable SESES to mitigate speculative execution attacks
	def FeatureSpeculativeExecutionSideEffectSuppression
	: SubtargetFeature<
	"seses", "UseSpeculativeExecutionSideEffectSuppression", "true",
	"Prevent speculative execution side channel timing attacks by "
	"inserting a speculation barrier before memory reads, memory writes, "
	"and conditional branches. Implies LVI Control Flow integrity.",
	[FeatureLVIControlFlowIntegrity]>;

	// Mitigate LVI attacks against data loads
	def FeatureLVILoadHardening
	: SubtargetFeature<
	"lvi-load-hardening", "UseLVILoadHardening", "true",
	"Insert LFENCE instructions to prevent data speculatively injected "
	"into loads from being used maliciously.">;

	def FeatureTaggedGlobals
	: SubtargetFeature<
	"tagged-globals", "AllowTaggedGlobals", "true",
	"Use an instruction sequence for taking the address of a global "
	"that allows a memory tag in the upper address bits.">;

	// Control codegen mitigation against Straight Line Speculation vulnerability.
	def FeatureHardenSlsRet
	: SubtargetFeature<
	"harden-sls-ret", "HardenSlsRet", "true",
	"Harden against straight line speculation across RET instructions.">;

	def FeatureHardenSlsIJmp
	: SubtargetFeature<
	"harden-sls-ijmp", "HardenSlsIJmp", "true",
	"Harden against straight line speculation across indirect JMP instructions.">;

	//===----------------------------------------------------------------------===//
	// X86 Subtarget Tuning features
	//===----------------------------------------------------------------------===//
	def TuningPreferMovmskOverVTest : SubtargetFeature<"prefer-movmsk-over-vtest",
	"PreferMovmskOverVTest", "true",
	"Prefer movmsk over vtest instruction">;

	def TuningSlowSHLD : SubtargetFeature<"slow-shld", "IsSHLDSlow", "true",
	"SHLD instruction is slow">;

	def TuningSlowPMULLD : SubtargetFeature<"slow-pmulld", "IsPMULLDSlow", "true",
	"PMULLD instruction is slow (compared to PMULLW/PMULHW and PMULUDQ)">;

	def TuningSlowPMADDWD : SubtargetFeature<"slow-pmaddwd", "IsPMADDWDSlow",
	"true",
	"PMADDWD is slower than PMULLD">;

	// FIXME: This should not apply to CPUs that do not have SSE.
	def TuningSlowUAMem16 : SubtargetFeature<"slow-unaligned-mem-16",
	"IsUnalignedMem16Slow", "true",
	"Slow unaligned 16-byte memory access">;

	def TuningSlowUAMem32 : SubtargetFeature<"slow-unaligned-mem-32",
	"IsUnalignedMem32Slow", "true",
	"Slow unaligned 32-byte memory access">;

	def TuningLEAForSP : SubtargetFeature<"lea-sp", "UseLeaForSP", "true",
	"Use LEA for adjusting the stack pointer (this is an optimization for Intel Atom processors)">;

	// True if 8-bit divisions are significantly faster than
	// 32-bit divisions and should be used when possible.
	def TuningSlowDivide32 : SubtargetFeature<"idivl-to-divb",
	"HasSlowDivide32", "true",
	"Use 8-bit divide for positive values less than 256">;

	// True if 32-bit divides are significantly faster than
	// 64-bit divisions and should be used when possible.
	def TuningSlowDivide64 : SubtargetFeature<"idivq-to-divl",
	"HasSlowDivide64", "true",
	"Use 32-bit divide for positive values less than 2^32">;

	def TuningPadShortFunctions : SubtargetFeature<"pad-short-functions",
	"PadShortFunctions", "true",
	"Pad short functions (to prevent a stall when returning too early)">;

	// On some processors, instructions that implicitly take two memory operands are
	// slow. In practice, this means that CALL, PUSH, and POP with memory operands
	// should be avoided in favor of a MOV + register CALL/PUSH/POP.
	def TuningSlowTwoMemOps : SubtargetFeature<"slow-two-mem-ops",
	"SlowTwoMemOps", "true",
	"Two memory operand instructions are slow">;

	// True if the LEA instruction inputs have to be ready at address generation
	// (AG) time.
	def TuningLEAUsesAG : SubtargetFeature<"lea-uses-ag", "LeaUsesAG", "true",
	"LEA instruction needs inputs at AG stage">;

	def TuningSlowLEA : SubtargetFeature<"slow-lea", "SlowLEA", "true",
	"LEA instruction with certain arguments is slow">;

	// True if the LEA instruction has all three source operands: base, index,
	// and offset or if the LEA instruction uses base and index registers where
	// the base is EBP, RBP,or R13
	def TuningSlow3OpsLEA : SubtargetFeature<"slow-3ops-lea", "Slow3OpsLEA", "true",
	"LEA instruction with 3 ops or certain registers is slow">;

	// True if INC and DEC instructions are slow when writing to flags
	def TuningSlowIncDec : SubtargetFeature<"slow-incdec", "SlowIncDec", "true",
	"INC and DEC instructions are slower than ADD and SUB">;

	def TuningPOPCNTFalseDeps : SubtargetFeature<"false-deps-popcnt",
	"HasPOPCNTFalseDeps", "true",
	"POPCNT has a false dependency on dest register">;

	def TuningLZCNTFalseDeps : SubtargetFeature<"false-deps-lzcnt-tzcnt",
	"HasLZCNTFalseDeps", "true",
	"LZCNT/TZCNT have a false dependency on dest register">;

	def TuningMULCFalseDeps : SubtargetFeature<"false-deps-mulc",
	"HasMULCFalseDeps", "true",
	"VF[C]MULCPH/SH has a false dependency on dest register">;

	def TuningPERMFalseDeps : SubtargetFeature<"false-deps-perm",
	"HasPERMFalseDeps", "true",
	"VPERMD/Q/PS/PD has a false dependency on dest register">;

	def TuningRANGEFalseDeps : SubtargetFeature<"false-deps-range",
	"HasRANGEFalseDeps", "true",
	"VRANGEPD/PS/SD/SS has a false dependency on dest register">;

	def TuningGETMANTFalseDeps : SubtargetFeature<"false-deps-getmant",
	"HasGETMANTFalseDeps", "true",
	"VGETMANTSS/SD/SH and VGETMANDPS/PD(memory version) has a"
	" false dependency on dest register">;

	def TuningMULLQFalseDeps : SubtargetFeature<"false-deps-mullq",
	"HasMULLQFalseDeps", "true",
	"VPMULLQ has a false dependency on dest register">;

	def TuningSBBDepBreaking : SubtargetFeature<"sbb-dep-breaking",
	"HasSBBDepBreaking", "true",
	"SBB with same register has no source dependency">;

	// On recent X86 (port bound) processors, its preferable to combine to a single shuffle
	// using a variable mask over multiple fixed shuffles.
	def TuningFastVariableCrossLaneShuffle
	: SubtargetFeature<"fast-variable-crosslane-shuffle",
	"HasFastVariableCrossLaneShuffle",
	"true", "Cross-lane shuffles with variable masks are fast">;
	def TuningFastVariablePerLaneShuffle
	: SubtargetFeature<"fast-variable-perlane-shuffle",
	"HasFastVariablePerLaneShuffle",
	"true", "Per-lane shuffles with variable masks are fast">;

	// Goldmont / Tremont (atom in general) has no bypass delay
	def TuningNoDomainDelay : SubtargetFeature<"no-bypass-delay",
	"NoDomainDelay","true",
	"Has no bypass delay when using the 'wrong' domain">;

	// Many processors (Nehalem+ on Intel) have no bypass delay when
	// using the wrong mov type.
	def TuningNoDomainDelayMov : SubtargetFeature<"no-bypass-delay-mov",
	"NoDomainDelayMov","true",
	"Has no bypass delay when using the 'wrong' mov type">;

	// Newer processors (Skylake+ on Intel) have no bypass delay when
	// using the wrong blend type.
	def TuningNoDomainDelayBlend : SubtargetFeature<"no-bypass-delay-blend",
	"NoDomainDelayBlend","true",
	"Has no bypass delay when using the 'wrong' blend type">;

	// Newer processors (Haswell+ on Intel) have no bypass delay when
	// using the wrong shuffle type.
	def TuningNoDomainDelayShuffle : SubtargetFeature<"no-bypass-delay-shuffle",
	"NoDomainDelayShuffle","true",
	"Has no bypass delay when using the 'wrong' shuffle type">;

	// Prefer lowering shuffles on AVX512 targets (e.g. Skylake Server) to
	// imm shifts/rotate if they can use more ports than regular shuffles.
	def TuningPreferShiftShuffle : SubtargetFeature<"faster-shift-than-shuffle",
	"PreferLowerShuffleAsShift", "true",
	"Shifts are faster (or as fast) as shuffle">;

	def TuningFastImmVectorShift : SubtargetFeature<"tuning-fast-imm-vector-shift",
	"FastImmVectorShift", "true",
	"Vector shifts are fast (2/cycle) as opposed to slow (1/cycle)">;

	// On some X86 processors, a vzeroupper instruction should be inserted after
	// using ymm/zmm registers before executing code that may use SSE instructions.
	def TuningInsertVZEROUPPER
	: SubtargetFeature<"vzeroupper",
	"InsertVZEROUPPER",
	"true", "Should insert vzeroupper instructions">;

	// TuningFastScalarFSQRT should be enabled if scalar FSQRT has shorter latency
	// than the corresponding NR code. TuningFastVectorFSQRT should be enabled if
	// vector FSQRT has higher throughput than the corresponding NR code.
	// The idea is that throughput bound code is likely to be vectorized, so for
	// vectorized code we should care about the throughput of SQRT operations.
	// But if the code is scalar that probably means that the code has some kind of
	// dependency and we should care more about reducing the latency.

	// True if hardware SQRTSS instruction is at least as fast (latency) as
	// RSQRTSS followed by a Newton-Raphson iteration.
	def TuningFastScalarFSQRT
	: SubtargetFeature<"fast-scalar-fsqrt", "HasFastScalarFSQRT",
	"true", "Scalar SQRT is fast (disable Newton-Raphson)">;
	// True if hardware SQRTPS/VSQRTPS instructions are at least as fast
	// (throughput) as RSQRTPS/VRSQRTPS followed by a Newton-Raphson iteration.
	def TuningFastVectorFSQRT
	: SubtargetFeature<"fast-vector-fsqrt", "HasFastVectorFSQRT",
	"true", "Vector SQRT is fast (disable Newton-Raphson)">;

	// If lzcnt has equivalent latency/throughput to most simple integer ops, it can
	// be used to replace test/set sequences.
	def TuningFastLZCNT
	: SubtargetFeature<
	"fast-lzcnt", "HasFastLZCNT", "true",
	"LZCNT instructions are as fast as most simple integer ops">;

	// If the target can efficiently decode NOPs upto 7-bytes in length.
	def TuningFast7ByteNOP
	: SubtargetFeature<
	"fast-7bytenop", "HasFast7ByteNOP", "true",
	"Target can quickly decode up to 7 byte NOPs">;

	// If the target can efficiently decode NOPs upto 11-bytes in length.
	def TuningFast11ByteNOP
	: SubtargetFeature<
	"fast-11bytenop", "HasFast11ByteNOP", "true",
	"Target can quickly decode up to 11 byte NOPs">;

	// If the target can efficiently decode NOPs upto 15-bytes in length.
	def TuningFast15ByteNOP
	: SubtargetFeature<
	"fast-15bytenop", "HasFast15ByteNOP", "true",
	"Target can quickly decode up to 15 byte NOPs">;

	// Sandy Bridge and newer processors can use SHLD with the same source on both
	// inputs to implement rotate to avoid the partial flag update of the normal
	// rotate instructions.
	def TuningFastSHLDRotate
	: SubtargetFeature<
	"fast-shld-rotate", "HasFastSHLDRotate", "true",
	"SHLD can be used as a faster rotate">;

	// Bulldozer and newer processors can merge CMP/TEST (but not other
	// instructions) with conditional branches.
	def TuningBranchFusion
	: SubtargetFeature<"branchfusion", "HasBranchFusion", "true",
	"CMP/TEST can be fused with conditional branches">;

	// Sandy Bridge and newer processors have many instructions that can be
	// fused with conditional branches and pass through the CPU as a single
	// operation.
	def TuningMacroFusion
	: SubtargetFeature<"macrofusion", "HasMacroFusion", "true",
	"Various instructions can be fused with conditional branches">;

	// Gather is available since Haswell (AVX2 set). So technically, we can
	// generate Gathers on all AVX2 processors. But the overhead on HSW is high.
	// Skylake Client processor has faster Gathers than HSW and performance is
	// similar to Skylake Server (AVX-512).
	def TuningFastGather
	: SubtargetFeature<"fast-gather", "HasFastGather", "true",
	"Indicates if gather is reasonably fast (this is true for Skylake client and all AVX-512 CPUs)">;

	// Generate vpdpwssd instead of vpmaddwd+vpaddd sequence.
	def TuningFastDPWSSD
	: SubtargetFeature<
	"fast-dpwssd", "HasFastDPWSSD", "true",
	"Prefer vpdpwssd instruction over vpmaddwd+vpaddd instruction sequence">;

	def TuningPreferNoGather
	: SubtargetFeature<"prefer-no-gather", "PreferGather", "false",
	"Prefer no gather instructions">;
	def TuningPreferNoScatter
	: SubtargetFeature<"prefer-no-scatter", "PreferScatter", "false",
	"Prefer no scatter instructions">;

	def TuningPrefer128Bit
	: SubtargetFeature<"prefer-128-bit", "Prefer128Bit", "true",
	"Prefer 128-bit AVX instructions">;

	def TuningPrefer256Bit
	: SubtargetFeature<"prefer-256-bit", "Prefer256Bit", "true",
	"Prefer 256-bit AVX instructions">;

	def TuningAllowLight256Bit
	: SubtargetFeature<"allow-light-256-bit", "AllowLight256Bit", "true",
	"Enable generation of 256-bit load/stores even if we prefer 128-bit">;

	def TuningPreferMaskRegisters
	: SubtargetFeature<"prefer-mask-registers", "PreferMaskRegisters", "true",
	"Prefer AVX512 mask registers over PTEST/MOVMSK">;

	def TuningFastBEXTR : SubtargetFeature<"fast-bextr", "HasFastBEXTR", "true",
	"Indicates that the BEXTR instruction is implemented as a single uop "
	"with good throughput">;

	// Combine vector math operations with shuffles into horizontal math
	// instructions if a CPU implements horizontal operations (introduced with
	// SSE3) with better latency/throughput than the alternative sequence.
	def TuningFastHorizontalOps
	: SubtargetFeature<
	"fast-hops", "HasFastHorizontalOps", "true",
	"Prefer horizontal vector math instructions (haddp, phsub, etc.) over "
	"normal vector instructions with shuffles">;

	def TuningFastScalarShiftMasks
	: SubtargetFeature<
	"fast-scalar-shift-masks", "HasFastScalarShiftMasks", "true",
	"Prefer a left/right scalar logical shift pair over a shift+and pair">;

	def TuningFastVectorShiftMasks
	: SubtargetFeature<
	"fast-vector-shift-masks", "HasFastVectorShiftMasks", "true",
	"Prefer a left/right vector logical shift pair over a shift+and pair">;

	def TuningFastMOVBE
	: SubtargetFeature<"fast-movbe", "HasFastMOVBE", "true",
	"Prefer a movbe over a single-use load + bswap / single-use bswap + store">;

	def TuningFastImm16
	: SubtargetFeature<"fast-imm16", "HasFastImm16", "true",
	"Prefer a i16 instruction with i16 immediate over extension to i32">;

	def TuningUseSLMArithCosts
	: SubtargetFeature<"use-slm-arith-costs", "UseSLMArithCosts", "true",
	"Use Silvermont specific arithmetic costs">;

	def TuningUseGLMDivSqrtCosts
	: SubtargetFeature<"use-glm-div-sqrt-costs", "UseGLMDivSqrtCosts", "true",
	"Use Goldmont specific floating point div/sqrt costs">;

	// Starting with Redwood Cove architecture, the branch has branch taken hint
	// (i.e., instruction prefix 3EH).
	def TuningBranchHint: SubtargetFeature<"branch-hint", "HasBranchHint", "true",
	"Target has branch hint feature">;

	//===----------------------------------------------------------------------===//
	// X86 CPU Families
	// TODO: Remove these - use general tuning features to determine codegen.
	//===----------------------------------------------------------------------===//

	// Bonnell
	def ProcIntelAtom : SubtargetFeature<"", "IsAtom", "true", "Is Intel Atom processor">;

	//===----------------------------------------------------------------------===//
	// Register File Description
	//===----------------------------------------------------------------------===//

	include "X86RegisterInfo.td"
	include "X86RegisterBanks.td"

	//===----------------------------------------------------------------------===//
	// Instruction Descriptions
	//===----------------------------------------------------------------------===//

	include "X86Schedule.td"
	include "X86InstrInfo.td"
	include "X86SchedPredicates.td"

	def X86InstrInfo : InstrInfo;

	//===----------------------------------------------------------------------===//
	// X86 Scheduler Models
	//===----------------------------------------------------------------------===//

	include "X86ScheduleAtom.td"
	include "X86SchedSandyBridge.td"
	include "X86SchedHaswell.td"
	include "X86SchedBroadwell.td"
	include "X86ScheduleSLM.td"
	include "X86ScheduleZnver1.td"
	include "X86ScheduleZnver2.td"
	include "X86ScheduleZnver3.td"
	include "X86ScheduleZnver4.td"
	include "X86ScheduleBdVer2.td"
	include "X86ScheduleBtVer2.td"
	include "X86SchedSkylakeClient.td"
	include "X86SchedSkylakeServer.td"
	include "X86SchedIceLake.td"
	include "X86SchedAlderlakeP.td"
	include "X86SchedSapphireRapids.td"

	//===----------------------------------------------------------------------===//
	// X86 Processor Feature Lists
	//===----------------------------------------------------------------------===//

	def ProcessorFeatures {
	// x86-64 micro-architecture levels: x86-64 and x86-64-v[234]
	list<SubtargetFeature> X86_64V1Features = [
	FeatureX87, FeatureCX8, FeatureCMOV, FeatureMMX, FeatureSSE2,
	FeatureFXSR, FeatureNOPL, FeatureX86_64,
	];
	list<SubtargetFeature> X86_64V1Tuning = [
	TuningMacroFusion,
	TuningSlow3OpsLEA,
	TuningSlowDivide64,
	TuningSlowIncDec,
	TuningInsertVZEROUPPER
	];

	list<SubtargetFeature> X86_64V2Features = !listconcat(X86_64V1Features, [
	FeatureCX16, FeatureLAHFSAHF64, FeatureCRC32, FeaturePOPCNT,
	FeatureSSE42
	]);
	list<SubtargetFeature> X86_64V2Tuning = [
	TuningMacroFusion,
	TuningSlow3OpsLEA,
	TuningSlowDivide64,
	TuningSlowUAMem32,
	TuningFastScalarFSQRT,
	TuningFastSHLDRotate,
	TuningFast15ByteNOP,
	TuningPOPCNTFalseDeps,
	TuningInsertVZEROUPPER
	];

	list<SubtargetFeature> X86_64V3Features = !listconcat(X86_64V2Features, [
	FeatureAVX2, FeatureBMI, FeatureBMI2, FeatureF16C, FeatureFMA, FeatureLZCNT,
	FeatureMOVBE, FeatureXSAVE
	]);
	list<SubtargetFeature> X86_64V3Tuning = [
	TuningMacroFusion,
	TuningSlow3OpsLEA,
	TuningSlowDivide64,
	TuningFastScalarFSQRT,
	TuningFastSHLDRotate,
	TuningFast15ByteNOP,
	TuningFastVariableCrossLaneShuffle,
	TuningFastVariablePerLaneShuffle,
	TuningPOPCNTFalseDeps,
	TuningLZCNTFalseDeps,
	TuningInsertVZEROUPPER,
	TuningAllowLight256Bit
	];

	list<SubtargetFeature> X86_64V4Features = !listconcat(X86_64V3Features, [
	FeatureEVEX512,
	FeatureBWI,
	FeatureCDI,
	FeatureDQI,
	FeatureVLX,
	]);
	list<SubtargetFeature> X86_64V4Tuning = [
	TuningMacroFusion,
	TuningSlow3OpsLEA,
	TuningSlowDivide64,
	TuningFastScalarFSQRT,
	TuningFastVectorFSQRT,
	TuningFastSHLDRotate,
	TuningFast15ByteNOP,
	TuningFastVariableCrossLaneShuffle,
	TuningFastVariablePerLaneShuffle,
	TuningPrefer256Bit,
	TuningFastGather,
	TuningPOPCNTFalseDeps,
	TuningInsertVZEROUPPER,
	TuningAllowLight256Bit
	];

	// Nehalem
	list<SubtargetFeature> NHMFeatures = X86_64V2Features;
	list<SubtargetFeature> NHMTuning = [TuningMacroFusion,
	TuningSlowDivide64,
	TuningInsertVZEROUPPER,
	TuningNoDomainDelayMov];

	// Westmere
	list<SubtargetFeature> WSMAdditionalFeatures = [FeaturePCLMUL];
	list<SubtargetFeature> WSMTuning = NHMTuning;
	list<SubtargetFeature> WSMFeatures =
	!listconcat(NHMFeatures, WSMAdditionalFeatures);

	// Sandybridge
	list<SubtargetFeature> SNBAdditionalFeatures = [FeatureAVX,
	FeatureXSAVE,
	FeatureXSAVEOPT];
	list<SubtargetFeature> SNBTuning = [TuningMacroFusion,
	TuningSlow3OpsLEA,
	TuningSlowDivide64,
	TuningSlowUAMem32,
	TuningFastScalarFSQRT,
	TuningFastSHLDRotate,
	TuningFast15ByteNOP,
	TuningPOPCNTFalseDeps,
	TuningInsertVZEROUPPER,
	TuningNoDomainDelayMov];
	list<SubtargetFeature> SNBFeatures =
	!listconcat(WSMFeatures, SNBAdditionalFeatures);

	// Ivybridge
	list<SubtargetFeature> IVBAdditionalFeatures = [FeatureRDRAND,
	FeatureF16C,
	FeatureFSGSBase];
	list<SubtargetFeature> IVBTuning = SNBTuning;
	list<SubtargetFeature> IVBFeatures =
	!listconcat(SNBFeatures, IVBAdditionalFeatures);

	// Haswell
	list<SubtargetFeature> HSWAdditionalFeatures = [FeatureAVX2,
	FeatureBMI,
	FeatureBMI2,
	FeatureERMSB,
	FeatureFMA,
	FeatureINVPCID,
	FeatureLZCNT,
	FeatureMOVBE];
	list<SubtargetFeature> HSWTuning = [TuningMacroFusion,
	TuningSlow3OpsLEA,
	TuningSlowDivide64,
	TuningFastScalarFSQRT,
	TuningFastSHLDRotate,
	TuningFast15ByteNOP,
	TuningFastVariableCrossLaneShuffle,
	TuningFastVariablePerLaneShuffle,
	TuningPOPCNTFalseDeps,
	TuningLZCNTFalseDeps,
	TuningInsertVZEROUPPER,
	TuningAllowLight256Bit,
	TuningNoDomainDelayMov,
	TuningNoDomainDelayShuffle];
	list<SubtargetFeature> HSWFeatures =
	!listconcat(IVBFeatures, HSWAdditionalFeatures);

	// Broadwell
	list<SubtargetFeature> BDWAdditionalFeatures = [FeatureADX,
	FeatureRDSEED,
	FeaturePRFCHW];
	list<SubtargetFeature> BDWTuning = HSWTuning;
	list<SubtargetFeature> BDWFeatures =
	!listconcat(HSWFeatures, BDWAdditionalFeatures);

	// Skylake
	list<SubtargetFeature> SKLAdditionalFeatures = [FeatureAES,
	FeatureXSAVEC,
	FeatureXSAVES,
	FeatureCLFLUSHOPT];
	list<SubtargetFeature> SKLTuning = [TuningFastGather,
	TuningMacroFusion,
	TuningSlow3OpsLEA,
	TuningSlowDivide64,
	TuningFastScalarFSQRT,
	TuningFastVectorFSQRT,
	TuningFastSHLDRotate,
	TuningFast15ByteNOP,
	TuningFastVariableCrossLaneShuffle,
	TuningFastVariablePerLaneShuffle,
	TuningPOPCNTFalseDeps,
	TuningInsertVZEROUPPER,
	TuningAllowLight256Bit,
	TuningNoDomainDelayMov,
	TuningNoDomainDelayShuffle,
	TuningNoDomainDelayBlend];
	list<SubtargetFeature> SKLFeatures =
	!listconcat(BDWFeatures, SKLAdditionalFeatures);

	// Skylake-AVX512
	list<SubtargetFeature> SKXAdditionalFeatures = [FeatureAES,
	FeatureXSAVEC,
	FeatureXSAVES,
	FeatureCLFLUSHOPT,
	FeatureAVX512,
	FeatureEVEX512,
	FeatureCDI,
	FeatureDQI,
	FeatureBWI,
	FeatureVLX,
	FeaturePKU,
	FeatureCLWB];
	list<SubtargetFeature> SKXTuning = [TuningFastGather,
	TuningMacroFusion,
	TuningSlow3OpsLEA,
	TuningSlowDivide64,
	TuningFastScalarFSQRT,
	TuningFastVectorFSQRT,
	TuningFastSHLDRotate,
	TuningFast15ByteNOP,
	TuningFastVariableCrossLaneShuffle,
	TuningFastVariablePerLaneShuffle,
	TuningPrefer256Bit,
	TuningPOPCNTFalseDeps,
	TuningInsertVZEROUPPER,
	TuningAllowLight256Bit,
	TuningPreferShiftShuffle,
	TuningNoDomainDelayMov,
	TuningNoDomainDelayShuffle,
	TuningNoDomainDelayBlend,
	TuningFastImmVectorShift];
	list<SubtargetFeature> SKXFeatures =
	!listconcat(BDWFeatures, SKXAdditionalFeatures);

	// Cascadelake
	list<SubtargetFeature> CLXAdditionalFeatures = [FeatureVNNI];
	list<SubtargetFeature> CLXTuning = SKXTuning;
	list<SubtargetFeature> CLXFeatures =
	!listconcat(SKXFeatures, CLXAdditionalFeatures);

	// Cooperlake
	list<SubtargetFeature> CPXAdditionalFeatures = [FeatureBF16];
	list<SubtargetFeature> CPXTuning = SKXTuning;
	list<SubtargetFeature> CPXFeatures =
	!listconcat(CLXFeatures, CPXAdditionalFeatures);

	// Cannonlake
	list<SubtargetFeature> CNLAdditionalFeatures = [FeatureAVX512,
	FeatureEVEX512,
	FeatureCDI,
	FeatureDQI,
	FeatureBWI,
	FeatureVLX,
	FeaturePKU,
	FeatureVBMI,
	FeatureIFMA,
	FeatureSHA];
	list<SubtargetFeature> CNLTuning = [TuningFastGather,
	TuningMacroFusion,
	TuningSlow3OpsLEA,
	TuningSlowDivide64,
	TuningFastScalarFSQRT,
	TuningFastVectorFSQRT,
	TuningFastSHLDRotate,
	TuningFast15ByteNOP,
	TuningFastVariableCrossLaneShuffle,
	TuningFastVariablePerLaneShuffle,
	TuningPrefer256Bit,
	TuningInsertVZEROUPPER,
	TuningAllowLight256Bit,
	TuningNoDomainDelayMov,
	TuningNoDomainDelayShuffle,
	TuningNoDomainDelayBlend,
	TuningFastImmVectorShift];
	list<SubtargetFeature> CNLFeatures =
	!listconcat(SKLFeatures, CNLAdditionalFeatures);

	// Icelake
	list<SubtargetFeature> ICLAdditionalFeatures = [FeatureBITALG,
	FeatureVAES,
	FeatureVBMI2,
	FeatureVNNI,
	FeatureVPCLMULQDQ,
	FeatureVPOPCNTDQ,
	FeatureGFNI,
	FeatureRDPID,
	FeatureFSRM];
	list<SubtargetFeature> ICLTuning = [TuningFastGather,
	TuningMacroFusion,
	TuningSlowDivide64,
	TuningFastScalarFSQRT,
	TuningFastVectorFSQRT,
	TuningFastSHLDRotate,
	TuningFast15ByteNOP,
	TuningFastVariableCrossLaneShuffle,
	TuningFastVariablePerLaneShuffle,
	TuningPrefer256Bit,
	TuningInsertVZEROUPPER,
	TuningAllowLight256Bit,
	TuningNoDomainDelayMov,
	TuningNoDomainDelayShuffle,
	TuningNoDomainDelayBlend,
	TuningFastImmVectorShift];
	list<SubtargetFeature> ICLFeatures =
	!listconcat(CNLFeatures, ICLAdditionalFeatures);

	// Icelake Server
	list<SubtargetFeature> ICXAdditionalFeatures = [FeaturePCONFIG,
	FeatureCLWB,
	FeatureWBNOINVD];
	list<SubtargetFeature> ICXTuning = ICLTuning;
	list<SubtargetFeature> ICXFeatures =
	!listconcat(ICLFeatures, ICXAdditionalFeatures);

	// Tigerlake
	list<SubtargetFeature> TGLAdditionalFeatures = [FeatureVP2INTERSECT,
	FeatureCLWB,
	FeatureMOVDIRI,
	FeatureMOVDIR64B,
	FeatureSHSTK];
	list<SubtargetFeature> TGLTuning = ICLTuning;
	list<SubtargetFeature> TGLFeatures =
	!listconcat(ICLFeatures, TGLAdditionalFeatures );

	// Sapphirerapids
	list<SubtargetFeature> SPRAdditionalFeatures = [FeatureAMXTILE,
	FeatureAMXINT8,
	FeatureAMXBF16,
	FeatureBF16,
	FeatureSERIALIZE,
	FeatureCLDEMOTE,
	FeatureWAITPKG,
	FeaturePTWRITE,
	FeatureFP16,
	FeatureAVXVNNI,
	FeatureTSXLDTRK,
	FeatureENQCMD,
	FeatureSHSTK,
	FeatureMOVDIRI,
	FeatureMOVDIR64B,
	FeatureUINTR];
	list<SubtargetFeature> SPRAdditionalTuning = [TuningMULCFalseDeps,
	TuningPERMFalseDeps,
	TuningRANGEFalseDeps,
	TuningGETMANTFalseDeps,
	TuningMULLQFalseDeps];
	list<SubtargetFeature> SPRTuning = !listconcat(ICXTuning, SPRAdditionalTuning);
	list<SubtargetFeature> SPRFeatures =
	!listconcat(ICXFeatures, SPRAdditionalFeatures);

	// Graniterapids
	list<SubtargetFeature> GNRAdditionalFeatures = [FeatureAMXFP16,
	FeaturePREFETCHI];
	list<SubtargetFeature> GNRFeatures =
	!listconcat(SPRFeatures, GNRAdditionalFeatures);
	list<SubtargetFeature> GNRAdditionalTuning = [TuningBranchHint];
	list<SubtargetFeature> GNRTuning = !listconcat(SPRTuning, GNRAdditionalTuning);

	// Graniterapids D
	list<SubtargetFeature> GNRDAdditionalFeatures = [FeatureAMXCOMPLEX];
	list<SubtargetFeature> GNRDFeatures =
	!listconcat(GNRFeatures, GNRDAdditionalFeatures);

	// Atom
	list<SubtargetFeature> AtomFeatures = [FeatureX87,
	FeatureCX8,
	FeatureCMOV,
	FeatureMMX,
	FeatureSSSE3,
	FeatureFXSR,
	FeatureNOPL,
	FeatureX86_64,
	FeatureCX16,
	FeatureMOVBE,
	FeatureLAHFSAHF64];
	list<SubtargetFeature> AtomTuning = [ProcIntelAtom,
	TuningSlowUAMem16,
	TuningLEAForSP,
	TuningSlowDivide32,
	TuningSlowDivide64,
	TuningSlowTwoMemOps,
	TuningFastImm16,
	TuningLEAUsesAG,
	TuningPadShortFunctions,
	TuningInsertVZEROUPPER,
	TuningNoDomainDelay];

	// Silvermont
	list<SubtargetFeature> SLMAdditionalFeatures = [FeatureSSE42,
	FeatureCRC32,
	FeaturePOPCNT,
	FeaturePCLMUL,
	FeaturePRFCHW,
	FeatureRDRAND];
	list<SubtargetFeature> SLMTuning = [TuningUseSLMArithCosts,
	TuningSlowTwoMemOps,
	TuningSlowLEA,
	TuningSlowIncDec,
	TuningSlowDivide64,
	TuningSlowPMULLD,
	TuningFast7ByteNOP,
	TuningFastMOVBE,
	TuningFastImm16,
	TuningPOPCNTFalseDeps,
	TuningInsertVZEROUPPER,
	TuningNoDomainDelay];
	list<SubtargetFeature> SLMFeatures =
	!listconcat(AtomFeatures, SLMAdditionalFeatures);

	// Goldmont
	list<SubtargetFeature> GLMAdditionalFeatures = [FeatureAES,
	FeatureSHA,
	FeatureRDSEED,
	FeatureXSAVE,
	FeatureXSAVEOPT,
	FeatureXSAVEC,
	FeatureXSAVES,
	FeatureCLFLUSHOPT,
	FeatureFSGSBase];
	list<SubtargetFeature> GLMTuning = [TuningUseGLMDivSqrtCosts,
	TuningSlowTwoMemOps,
	TuningSlowLEA,
	TuningSlowIncDec,
	TuningFastMOVBE,
	TuningFastImm16,
	TuningPOPCNTFalseDeps,
	TuningInsertVZEROUPPER,
	TuningNoDomainDelay];
	list<SubtargetFeature> GLMFeatures =
	!listconcat(SLMFeatures, GLMAdditionalFeatures);

	// Goldmont Plus
	list<SubtargetFeature> GLPAdditionalFeatures = [FeaturePTWRITE,
	FeatureRDPID];
	list<SubtargetFeature> GLPTuning = [TuningUseGLMDivSqrtCosts,
	TuningSlowTwoMemOps,
	TuningSlowLEA,
	TuningSlowIncDec,
	TuningFastMOVBE,
	TuningFastImm16,
	TuningInsertVZEROUPPER,
	TuningNoDomainDelay];
	list<SubtargetFeature> GLPFeatures =
	!listconcat(GLMFeatures, GLPAdditionalFeatures);

	// Tremont
	list<SubtargetFeature> TRMAdditionalFeatures = [FeatureCLWB,
	FeatureGFNI];
	list<SubtargetFeature> TRMTuning = GLPTuning;
	list<SubtargetFeature> TRMFeatures =
	!listconcat(GLPFeatures, TRMAdditionalFeatures);

	// Alderlake
	list<SubtargetFeature> ADLAdditionalFeatures = [FeatureSERIALIZE,
	FeaturePCONFIG,
	FeatureSHSTK,
	FeatureWIDEKL,
	FeatureINVPCID,
	FeatureADX,
	FeatureFMA,
	FeatureVAES,
	FeatureVPCLMULQDQ,
	FeatureF16C,
	FeatureBMI,
	FeatureBMI2,
	FeatureLZCNT,
	FeatureAVXVNNI,
	FeaturePKU,
	FeatureHRESET,
	FeatureCLDEMOTE,
	FeatureMOVDIRI,
	FeatureMOVDIR64B,
	FeatureWAITPKG];
	list<SubtargetFeature> ADLAdditionalTuning = [TuningPERMFalseDeps,
	TuningPreferMovmskOverVTest,
	TuningFastImmVectorShift];
	list<SubtargetFeature> ADLTuning = !listconcat(SKLTuning, ADLAdditionalTuning);
	list<SubtargetFeature> ADLFeatures =
	!listconcat(TRMFeatures, ADLAdditionalFeatures);

	// Gracemont
	list<SubtargetFeature> GRTTuning = [TuningMacroFusion,
	TuningSlow3OpsLEA,
	TuningFastScalarFSQRT,
	TuningFastVectorFSQRT,
	TuningFast15ByteNOP,
	TuningFastVariablePerLaneShuffle,
	TuningPOPCNTFalseDeps,
	TuningInsertVZEROUPPER];

	// Sierraforest
	list<SubtargetFeature> SRFAdditionalFeatures = [FeatureCMPCCXADD,
	FeatureAVXIFMA,
	FeatureAVXNECONVERT,
	FeatureENQCMD,
	FeatureUINTR,
	FeatureAVXVNNIINT8];
	list<SubtargetFeature> SRFFeatures =
	!listconcat(ADLFeatures, SRFAdditionalFeatures);

	// Arrowlake S
	list<SubtargetFeature> ARLSAdditionalFeatures = [FeatureAVXVNNIINT16,
	FeatureSHA512,
	FeatureSM3,
	FeatureSM4];
	list<SubtargetFeature> ARLSFeatures =
	!listconcat(SRFFeatures, ARLSAdditionalFeatures);

	// Pantherlake
	list<SubtargetFeature> PTLAdditionalFeatures = [FeaturePREFETCHI];
	list<SubtargetFeature> PTLFeatures =
	!listconcat(ARLSFeatures, PTLAdditionalFeatures);


	// Clearwaterforest
	list<SubtargetFeature> CWFAdditionalFeatures = [FeaturePREFETCHI,
	FeatureUSERMSR];
	list<SubtargetFeature> CWFFeatures =
	!listconcat(ARLSFeatures, CWFAdditionalFeatures);

	// Knights Landing
	list<SubtargetFeature> KNLFeatures = [FeatureX87,
	FeatureCX8,
	FeatureCMOV,
	FeatureMMX,
	FeatureFXSR,
	FeatureNOPL,
	FeatureX86_64,
	FeatureCX16,
	FeatureCRC32,
	FeaturePOPCNT,
	FeaturePCLMUL,
	FeatureXSAVE,
	FeatureXSAVEOPT,
	FeatureLAHFSAHF64,
	FeatureAES,
	FeatureRDRAND,
	FeatureF16C,
	FeatureFSGSBase,
	FeatureAVX512,
	FeatureEVEX512,
	FeatureCDI,
	FeatureADX,
	FeatureRDSEED,
	FeatureMOVBE,
	FeatureLZCNT,
	FeatureBMI,
	FeatureBMI2,
	FeatureFMA,
	FeaturePRFCHW];
	list<SubtargetFeature> KNLTuning = [TuningSlowDivide64,
	TuningSlow3OpsLEA,
	TuningSlowIncDec,
	TuningSlowTwoMemOps,
	TuningPreferMaskRegisters,
	TuningFastGather,
	TuningFastMOVBE,
	TuningFastImm16,
	TuningSlowPMADDWD];
	// TODO Add AVX5124FMAPS/AVX5124VNNIW features
	list<SubtargetFeature> KNMFeatures =
	!listconcat(KNLFeatures, [FeatureVPOPCNTDQ]);

	// Barcelona
	list<SubtargetFeature> BarcelonaFeatures = [FeatureX87,
	FeatureCX8,
	FeatureSSE4A,
	FeatureFXSR,
	FeatureNOPL,
	FeatureCX16,
	FeaturePRFCHW,
	FeatureLZCNT,
	FeaturePOPCNT,
	FeatureLAHFSAHF64,
	FeatureCMOV,
	FeatureX86_64];
	list<SubtargetFeature> BarcelonaTuning = [TuningFastScalarShiftMasks,
	TuningSlowDivide64,
	TuningSlowSHLD,
	TuningSBBDepBreaking,
	TuningInsertVZEROUPPER];

	// Bobcat
	list<SubtargetFeature> BtVer1Features = [FeatureX87,
	FeatureCX8,
	FeatureCMOV,
	FeatureMMX,
	FeatureSSSE3,
	FeatureSSE4A,
	FeatureFXSR,
	FeatureNOPL,
	FeatureX86_64,
	FeatureCX16,
	FeaturePRFCHW,
	FeatureLZCNT,
	FeaturePOPCNT,
	FeatureLAHFSAHF64];
	list<SubtargetFeature> BtVer1Tuning = [TuningFast15ByteNOP,
	TuningFastScalarShiftMasks,
	TuningFastVectorShiftMasks,
	TuningSlowDivide64,
	TuningSlowSHLD,
	TuningFastImm16,
	TuningSBBDepBreaking,
	TuningInsertVZEROUPPER];

	// Jaguar
	list<SubtargetFeature> BtVer2AdditionalFeatures = [FeatureAVX,
	FeatureAES,
	FeatureCRC32,
	FeaturePCLMUL,
	FeatureBMI,
	FeatureF16C,
	FeatureMOVBE,
	FeatureXSAVE,
	FeatureXSAVEOPT];
	list<SubtargetFeature> BtVer2Tuning = [TuningFastLZCNT,
	TuningFastBEXTR,
	TuningFastHorizontalOps,
	TuningFast15ByteNOP,
	TuningFastScalarShiftMasks,
	TuningFastVectorShiftMasks,
	TuningFastMOVBE,
	TuningFastImm16,
	TuningSBBDepBreaking,
	TuningSlowDivide64,
	TuningSlowSHLD];
	list<SubtargetFeature> BtVer2Features =
	!listconcat(BtVer1Features, BtVer2AdditionalFeatures);

	// Bulldozer
	list<SubtargetFeature> BdVer1Features = [FeatureX87,
	FeatureCX8,
	FeatureCMOV,
	FeatureXOP,
	FeatureX86_64,
	FeatureCX16,
	FeatureAES,
	FeatureCRC32,
	FeaturePRFCHW,
	FeaturePCLMUL,
	FeatureMMX,
	FeatureFXSR,
	FeatureNOPL,
	FeatureLZCNT,
	FeaturePOPCNT,
	FeatureXSAVE,
	FeatureLWP,
	FeatureLAHFSAHF64];
	list<SubtargetFeature> BdVer1Tuning = [TuningSlowSHLD,
	TuningSlowDivide64,
	TuningFast11ByteNOP,
	TuningFastScalarShiftMasks,
	TuningBranchFusion,
	TuningSBBDepBreaking,
	TuningInsertVZEROUPPER];

	// PileDriver
	list<SubtargetFeature> BdVer2AdditionalFeatures = [FeatureF16C,
	FeatureBMI,
	FeatureTBM,
	FeatureFMA];
	list<SubtargetFeature> BdVer2AdditionalTuning = [TuningFastBEXTR,
	TuningFastMOVBE];
	list<SubtargetFeature> BdVer2Tuning =
	!listconcat(BdVer1Tuning, BdVer2AdditionalTuning);
	list<SubtargetFeature> BdVer2Features =
	!listconcat(BdVer1Features, BdVer2AdditionalFeatures);

	// Steamroller
	list<SubtargetFeature> BdVer3AdditionalFeatures = [FeatureXSAVEOPT,
	FeatureFSGSBase];
	list<SubtargetFeature> BdVer3Tuning = BdVer2Tuning;
	list<SubtargetFeature> BdVer3Features =
	!listconcat(BdVer2Features, BdVer3AdditionalFeatures);

	// Excavator
	list<SubtargetFeature> BdVer4AdditionalFeatures = [FeatureAVX2,
	FeatureBMI2,
	FeatureMOVBE,
	FeatureRDRAND,
	FeatureMWAITX];
	list<SubtargetFeature> BdVer4Tuning = BdVer3Tuning;
	list<SubtargetFeature> BdVer4Features =
	!listconcat(BdVer3Features, BdVer4AdditionalFeatures);


	// AMD Zen Processors common ISAs
	list<SubtargetFeature> ZNFeatures = [FeatureADX,
	FeatureAES,
	FeatureAVX2,
	FeatureBMI,
	FeatureBMI2,
	FeatureCLFLUSHOPT,
	FeatureCLZERO,
	FeatureCMOV,
	FeatureX86_64,
	FeatureCX16,
	FeatureCRC32,
	FeatureF16C,
	FeatureFMA,
	FeatureFSGSBase,
	FeatureFXSR,
	FeatureNOPL,
	FeatureLAHFSAHF64,
	FeatureLZCNT,
	FeatureMMX,
	FeatureMOVBE,
	FeatureMWAITX,
	FeaturePCLMUL,
	FeaturePOPCNT,
	FeaturePRFCHW,
	FeatureRDRAND,
	FeatureRDSEED,
	FeatureSHA,
	FeatureSSE4A,
	FeatureX87,
	FeatureXSAVE,
	FeatureXSAVEC,
	FeatureXSAVEOPT,
	FeatureXSAVES];
	list<SubtargetFeature> ZNTuning = [TuningFastLZCNT,
	TuningFastBEXTR,
	TuningFast15ByteNOP,
	TuningBranchFusion,
	TuningFastScalarFSQRT,
	TuningFastVectorFSQRT,
	TuningFastScalarShiftMasks,
	TuningFastVariablePerLaneShuffle,
	TuningFastMOVBE,
	TuningFastImm16,
	TuningSlowDivide64,
	TuningSlowSHLD,
	TuningSBBDepBreaking,
	TuningInsertVZEROUPPER,
	TuningAllowLight256Bit];
	list<SubtargetFeature> ZN2AdditionalFeatures = [FeatureCLWB,
	FeatureRDPID,
	FeatureRDPRU,
	FeatureWBNOINVD];
	list<SubtargetFeature> ZN2Tuning = ZNTuning;
	list<SubtargetFeature> ZN2Features =
	!listconcat(ZNFeatures, ZN2AdditionalFeatures);
	list<SubtargetFeature> ZN3AdditionalFeatures = [FeatureFSRM,
	FeatureINVPCID,
	FeaturePKU,
	FeatureVAES,
	FeatureVPCLMULQDQ];
	list<SubtargetFeature> ZN3AdditionalTuning = [TuningMacroFusion];
	list<SubtargetFeature> ZN3Tuning =
	!listconcat(ZN2Tuning, ZN3AdditionalTuning);
	list<SubtargetFeature> ZN3Features =
	!listconcat(ZN2Features, ZN3AdditionalFeatures);


	list<SubtargetFeature> ZN4AdditionalTuning = [TuningFastDPWSSD];
	list<SubtargetFeature> ZN4Tuning =
	!listconcat(ZN3Tuning, ZN4AdditionalTuning);
	list<SubtargetFeature> ZN4AdditionalFeatures = [FeatureAVX512,
	FeatureEVEX512,
	FeatureCDI,
	FeatureDQI,
	FeatureBWI,
	FeatureVLX,
	FeatureVBMI,
	FeatureVBMI2,
	FeatureIFMA,
	FeatureVNNI,
	FeatureBITALG,
	FeatureGFNI,
	FeatureBF16,
	FeatureSHSTK,
	FeatureVPOPCNTDQ];
	list<SubtargetFeature> ZN4Features =
	!listconcat(ZN3Features, ZN4AdditionalFeatures);
	+
	+
	+ list<SubtargetFeature> ZN5Tuning = ZN4Tuning;
	+ list<SubtargetFeature> ZN5AdditionalFeatures = [FeatureVNNI,
	+ FeatureMOVDIRI,
	+ FeatureMOVDIR64B,
	+ FeatureVP2INTERSECT,
	+ FeaturePREFETCHI,
	+ FeatureAVXVNNI
	+ ];
	+ list<SubtargetFeature> ZN5Features =
	+ !listconcat(ZN4Features, ZN5AdditionalFeatures);
	+
	}

	//===----------------------------------------------------------------------===//
	// X86 processors supported.
	//===----------------------------------------------------------------------===//

	class Proc<string Name, list<SubtargetFeature> Features,
	list<SubtargetFeature> TuneFeatures>
	: ProcessorModel<Name, GenericModel, Features, TuneFeatures>;

	class ProcModel<string Name, SchedMachineModel Model,
	list<SubtargetFeature> Features,
	list<SubtargetFeature> TuneFeatures>
	: ProcessorModel<Name, Model, Features, TuneFeatures>;

	// NOTE: CMPXCHG8B is here for legacy compatibility so that it is only disabled
	// if i386/i486 is specifically requested.
	// NOTE: 64Bit is here as "generic" is the default llc CPU. The X86Subtarget
	// constructor checks that any CPU used in 64-bit mode has FeatureX86_64
	// enabled. It has no effect on code generation.
	// NOTE: As a default tuning, "generic" aims to produce code optimized for the
	// most common X86 processors. The tunings might be changed over time. It is
	// recommended to use "tune-cpu"="x86-64" in function attribute for consistency.
	def : ProcModel<"generic", SandyBridgeModel,
	[FeatureX87, FeatureCX8, FeatureX86_64],
	[TuningSlow3OpsLEA,
	TuningSlowDivide64,
	TuningMacroFusion,
	TuningFastScalarFSQRT,
	TuningFast15ByteNOP,
	TuningInsertVZEROUPPER]>;

	def : Proc<"i386", [FeatureX87],
	[TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
	def : Proc<"i486", [FeatureX87],
	[TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
	def : Proc<"i586", [FeatureX87, FeatureCX8],
	[TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
	def : Proc<"pentium", [FeatureX87, FeatureCX8],
	[TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
	foreach P = ["pentium-mmx", "pentium_mmx"] in {
	def : Proc<P, [FeatureX87, FeatureCX8, FeatureMMX],
	[TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
	}
	def : Proc<"i686", [FeatureX87, FeatureCX8, FeatureCMOV],
	[TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
	foreach P = ["pentiumpro", "pentium_pro"] in {
	def : Proc<P, [FeatureX87, FeatureCX8, FeatureCMOV, FeatureNOPL],
	[TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
	}
	foreach P = ["pentium2", "pentium_ii"] in {
	def : Proc<P, [FeatureX87, FeatureCX8, FeatureMMX, FeatureCMOV,
	FeatureFXSR, FeatureNOPL],
	[TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
	}
	foreach P = ["pentium3", "pentium3m", "pentium_iii_no_xmm_regs", "pentium_iii"] in {
	def : Proc<P, [FeatureX87, FeatureCX8, FeatureMMX,
	FeatureSSE1, FeatureFXSR, FeatureNOPL, FeatureCMOV],
	[TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
	}

	// Enable the PostRAScheduler for SSE2 and SSE3 class cpus.
	// The intent is to enable it for pentium4 which is the current default
	// processor in a vanilla 32-bit clang compilation when no specific
	// architecture is specified. This generally gives a nice performance
	// increase on silvermont, with largely neutral behavior on other
	// contemporary large core processors.
	// pentium-m, pentium4m, prescott and nocona are included as a preventative
	// measure to avoid performance surprises, in case clang's default cpu
	// changes slightly.

	foreach P = ["pentium_m", "pentium-m"] in {
	def : ProcModel<P, GenericPostRAModel,
	[FeatureX87, FeatureCX8, FeatureMMX, FeatureSSE2,
	FeatureFXSR, FeatureNOPL, FeatureCMOV],
	[TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
	}

	foreach P = ["pentium4", "pentium4m", "pentium_4"] in {
	def : ProcModel<P, GenericPostRAModel,
	[FeatureX87, FeatureCX8, FeatureMMX, FeatureSSE2,
	FeatureFXSR, FeatureNOPL, FeatureCMOV],
	[TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
	}

	// Intel Quark.
	def : Proc<"lakemont", [FeatureCX8],
	[TuningSlowUAMem16, TuningInsertVZEROUPPER]>;

	// Intel Core Duo.
	def : ProcModel<"yonah", SandyBridgeModel,
	[FeatureX87, FeatureCX8, FeatureMMX, FeatureSSE3,
	FeatureFXSR, FeatureNOPL, FeatureCMOV],
	[TuningSlowUAMem16, TuningInsertVZEROUPPER]>;

	// NetBurst.
	foreach P = ["prescott", "pentium_4_sse3"] in {
	def : ProcModel<P, GenericPostRAModel,
	[FeatureX87, FeatureCX8, FeatureMMX, FeatureSSE3,
	FeatureFXSR, FeatureNOPL, FeatureCMOV],
	[TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
	}
	def : ProcModel<"nocona", GenericPostRAModel, [
	FeatureX87,
	FeatureCX8,
	FeatureCMOV,
	FeatureMMX,
	FeatureSSE3,
	FeatureFXSR,
	FeatureNOPL,
	FeatureX86_64,
	FeatureCX16,
	],
	[
	TuningSlowUAMem16,
	TuningInsertVZEROUPPER
	]>;

	// Intel Core 2 Solo/Duo.
	foreach P = ["core2", "core_2_duo_ssse3"] in {
	def : ProcModel<P, SandyBridgeModel, [
	FeatureX87,
	FeatureCX8,
	FeatureCMOV,
	FeatureMMX,
	FeatureSSSE3,
	FeatureFXSR,
	FeatureNOPL,
	FeatureX86_64,
	FeatureCX16,
	FeatureLAHFSAHF64
	],
	[
	TuningMacroFusion,
	TuningSlowUAMem16,
	TuningInsertVZEROUPPER
	]>;
	}
	foreach P = ["penryn", "core_2_duo_sse4_1"] in {
	def : ProcModel<P, SandyBridgeModel, [
	FeatureX87,
	FeatureCX8,
	FeatureCMOV,
	FeatureMMX,
	FeatureSSE41,
	FeatureFXSR,
	FeatureNOPL,
	FeatureX86_64,
	FeatureCX16,
	FeatureLAHFSAHF64
	],
	[
	TuningMacroFusion,
	TuningSlowUAMem16,
	TuningInsertVZEROUPPER
	]>;
	}

	// Atom CPUs.
	foreach P = ["bonnell", "atom"] in {
	def : ProcModel<P, AtomModel, ProcessorFeatures.AtomFeatures,
	ProcessorFeatures.AtomTuning>;
	}

	foreach P = ["silvermont", "slm", "atom_sse4_2"] in {
	def : ProcModel<P, SLMModel, ProcessorFeatures.SLMFeatures,
	ProcessorFeatures.SLMTuning>;
	}

	def : ProcModel<"atom_sse4_2_movbe", SLMModel, ProcessorFeatures.GLMFeatures,
	ProcessorFeatures.SLMTuning>;
	def : ProcModel<"goldmont", SLMModel, ProcessorFeatures.GLMFeatures,
	ProcessorFeatures.GLMTuning>;
	foreach P = ["goldmont_plus", "goldmont-plus"] in {
	def : ProcModel<P, SLMModel, ProcessorFeatures.GLPFeatures,
	ProcessorFeatures.GLPTuning>;
	}
	def : ProcModel<"tremont", SLMModel, ProcessorFeatures.TRMFeatures,
	ProcessorFeatures.TRMTuning>;

	// "Arrandale" along with corei3 and corei5
	foreach P = ["nehalem", "corei7", "core_i7_sse4_2"] in {
	def : ProcModel<P, SandyBridgeModel, ProcessorFeatures.NHMFeatures,
	ProcessorFeatures.NHMTuning>;
	}

	// Westmere is the corei3/i5/i7 path from nehalem to sandybridge
	foreach P = ["westmere", "core_aes_pclmulqdq"] in {
	def : ProcModel<P, SandyBridgeModel, ProcessorFeatures.WSMFeatures,
	ProcessorFeatures.WSMTuning>;
	}

	foreach P = ["sandybridge", "corei7-avx", "core_2nd_gen_avx"] in {
	def : ProcModel<P, SandyBridgeModel, ProcessorFeatures.SNBFeatures,
	ProcessorFeatures.SNBTuning>;
	}

	foreach P = ["ivybridge", "core-avx-i", "core_3rd_gen_avx"] in {
	def : ProcModel<P, SandyBridgeModel, ProcessorFeatures.IVBFeatures,
	ProcessorFeatures.IVBTuning>;
	}

	foreach P = ["haswell", "core-avx2", "core_4th_gen_avx", "core_4th_gen_avx_tsx"] in {
	def : ProcModel<P, HaswellModel, ProcessorFeatures.HSWFeatures,
	ProcessorFeatures.HSWTuning>;
	}

	foreach P = ["broadwell", "core_5th_gen_avx", "core_5th_gen_avx_tsx"] in {
	def : ProcModel<P, BroadwellModel, ProcessorFeatures.BDWFeatures,
	ProcessorFeatures.BDWTuning>;
	}

	def : ProcModel<"skylake", SkylakeClientModel, ProcessorFeatures.SKLFeatures,
	ProcessorFeatures.SKLTuning>;

	// FIXME: define KNL scheduler model
	foreach P = ["knl", "mic_avx512"] in {
	def : ProcModel<P, HaswellModel, ProcessorFeatures.KNLFeatures,
	ProcessorFeatures.KNLTuning>;
	}
	def : ProcModel<"knm", HaswellModel, ProcessorFeatures.KNMFeatures,
	ProcessorFeatures.KNLTuning>;

	foreach P = ["skylake-avx512", "skx", "skylake_avx512"] in {
	def : ProcModel<P, SkylakeServerModel, ProcessorFeatures.SKXFeatures,
	ProcessorFeatures.SKXTuning>;
	}

	def : ProcModel<"cascadelake", SkylakeServerModel,
	ProcessorFeatures.CLXFeatures, ProcessorFeatures.CLXTuning>;
	def : ProcModel<"cooperlake", SkylakeServerModel,
	ProcessorFeatures.CPXFeatures, ProcessorFeatures.CPXTuning>;
	def : ProcModel<"cannonlake", SkylakeServerModel,
	ProcessorFeatures.CNLFeatures, ProcessorFeatures.CNLTuning>;
	foreach P = ["icelake-client", "icelake_client"] in {
	def : ProcModel<P, IceLakeModel,
	ProcessorFeatures.ICLFeatures, ProcessorFeatures.ICLTuning>;
	}
	def : ProcModel<"rocketlake", IceLakeModel,
	ProcessorFeatures.ICLFeatures, ProcessorFeatures.ICLTuning>;
	foreach P = ["icelake-server", "icelake_server"] in {
	def : ProcModel<P, IceLakeModel,
	ProcessorFeatures.ICXFeatures, ProcessorFeatures.ICXTuning>;
	}
	def : ProcModel<"tigerlake", IceLakeModel,
	ProcessorFeatures.TGLFeatures, ProcessorFeatures.TGLTuning>;
	def : ProcModel<"sapphirerapids", SapphireRapidsModel,
	ProcessorFeatures.SPRFeatures, ProcessorFeatures.SPRTuning>;
	def : ProcModel<"alderlake", AlderlakePModel,
	ProcessorFeatures.ADLFeatures, ProcessorFeatures.ADLTuning>;
	// FIXME: Use Gracemont Schedule Model when it is ready.
	def : ProcModel<"gracemont", AlderlakePModel,
	ProcessorFeatures.ADLFeatures, ProcessorFeatures.GRTTuning>;
	foreach P = ["sierraforest", "grandridge"] in {
	def : ProcModel<P, AlderlakePModel, ProcessorFeatures.SRFFeatures,
	ProcessorFeatures.GRTTuning>;
	}
	def : ProcModel<"raptorlake", AlderlakePModel,
	ProcessorFeatures.ADLFeatures, ProcessorFeatures.ADLTuning>;
	def : ProcModel<"meteorlake", AlderlakePModel,
	ProcessorFeatures.ADLFeatures, ProcessorFeatures.ADLTuning>;
	def : ProcModel<"arrowlake", AlderlakePModel,
	ProcessorFeatures.SRFFeatures, ProcessorFeatures.ADLTuning>;
	foreach P = ["arrowlake-s", "arrowlake_s", "lunarlake"] in {
	def : ProcModel<P, AlderlakePModel,
	ProcessorFeatures.ARLSFeatures, ProcessorFeatures.ADLTuning>;
	}
	def : ProcModel<"pantherlake", AlderlakePModel,
	ProcessorFeatures.PTLFeatures, ProcessorFeatures.ADLTuning>;
	def : ProcModel<"clearwaterforest", AlderlakePModel,
	ProcessorFeatures.CWFFeatures, ProcessorFeatures.ADLTuning>;
	def : ProcModel<"emeraldrapids", SapphireRapidsModel,
	ProcessorFeatures.SPRFeatures, ProcessorFeatures.SPRTuning>;
	def : ProcModel<"graniterapids", SapphireRapidsModel,
	ProcessorFeatures.GNRFeatures, ProcessorFeatures.GNRTuning>;
	foreach P = ["graniterapids-d", "graniterapids_d"] in {
	def : ProcModel<P, SapphireRapidsModel,
	ProcessorFeatures.GNRDFeatures, ProcessorFeatures.GNRTuning>;
	}

	// AMD CPUs.

	def : Proc<"k6", [FeatureX87, FeatureCX8, FeatureMMX],
	[TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
	def : Proc<"k6-2", [FeatureX87, FeatureCX8, FeatureMMX, FeaturePRFCHW],
	[TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
	def : Proc<"k6-3", [FeatureX87, FeatureCX8, FeatureMMX, FeaturePRFCHW],
	[TuningSlowUAMem16, TuningInsertVZEROUPPER]>;

	foreach P = ["athlon", "athlon-tbird"] in {
	def : Proc<P, [FeatureX87, FeatureCX8, FeatureCMOV, FeatureMMX, FeaturePRFCHW,
	FeatureNOPL],
	[TuningSlowSHLD, TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
	}

	foreach P = ["athlon-4", "athlon-xp", "athlon-mp"] in {
	def : Proc<P, [FeatureX87, FeatureCX8, FeatureCMOV,
	FeatureSSE1, FeatureMMX, FeaturePRFCHW, FeatureFXSR, FeatureNOPL],
	[TuningSlowSHLD, TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
	}

	foreach P = ["k8", "opteron", "athlon64", "athlon-fx"] in {
	def : Proc<P, [FeatureX87, FeatureCX8, FeatureSSE2, FeatureMMX, FeaturePRFCHW,
	FeatureFXSR, FeatureNOPL, FeatureX86_64, FeatureCMOV],
	[TuningFastScalarShiftMasks, TuningSlowSHLD, TuningSlowUAMem16,
	TuningSBBDepBreaking, TuningInsertVZEROUPPER]>;
	}

	foreach P = ["k8-sse3", "opteron-sse3", "athlon64-sse3"] in {
	def : Proc<P, [FeatureX87, FeatureCX8, FeatureSSE3, FeatureMMX, FeaturePRFCHW,
	FeatureFXSR, FeatureNOPL, FeatureCX16, FeatureCMOV,
	FeatureX86_64],
	[TuningFastScalarShiftMasks, TuningSlowSHLD, TuningSlowUAMem16,
	TuningSBBDepBreaking, TuningInsertVZEROUPPER]>;
	}

	foreach P = ["amdfam10", "barcelona"] in {
	def : Proc<P, ProcessorFeatures.BarcelonaFeatures,
	ProcessorFeatures.BarcelonaTuning>;
	}

	// Bobcat
	def : Proc<"btver1", ProcessorFeatures.BtVer1Features,
	ProcessorFeatures.BtVer1Tuning>;
	// Jaguar
	def : ProcModel<"btver2", BtVer2Model, ProcessorFeatures.BtVer2Features,
	ProcessorFeatures.BtVer2Tuning>;

	// Bulldozer
	def : ProcModel<"bdver1", BdVer2Model, ProcessorFeatures.BdVer1Features,
	ProcessorFeatures.BdVer1Tuning>;
	// Piledriver
	def : ProcModel<"bdver2", BdVer2Model, ProcessorFeatures.BdVer2Features,
	ProcessorFeatures.BdVer2Tuning>;
	// Steamroller
	def : Proc<"bdver3", ProcessorFeatures.BdVer3Features,
	ProcessorFeatures.BdVer3Tuning>;
	// Excavator
	def : Proc<"bdver4", ProcessorFeatures.BdVer4Features,
	ProcessorFeatures.BdVer4Tuning>;

	def : ProcModel<"znver1", Znver1Model, ProcessorFeatures.ZNFeatures,
	ProcessorFeatures.ZNTuning>;
	def : ProcModel<"znver2", Znver2Model, ProcessorFeatures.ZN2Features,
	ProcessorFeatures.ZN2Tuning>;
	def : ProcModel<"znver3", Znver3Model, ProcessorFeatures.ZN3Features,
	ProcessorFeatures.ZN3Tuning>;
	def : ProcModel<"znver4", Znver4Model, ProcessorFeatures.ZN4Features,
	ProcessorFeatures.ZN4Tuning>;
	+def : ProcModel<"znver5", Znver4Model, ProcessorFeatures.ZN5Features,
	+ ProcessorFeatures.ZN5Tuning>;

	def : Proc<"geode", [FeatureX87, FeatureCX8, FeatureMMX, FeaturePRFCHW],
	[TuningSlowUAMem16, TuningInsertVZEROUPPER]>;

	def : Proc<"winchip-c6", [FeatureX87, FeatureMMX],
	[TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
	def : Proc<"winchip2", [FeatureX87, FeatureMMX, FeaturePRFCHW],
	[TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
	def : Proc<"c3", [FeatureX87, FeatureMMX, FeaturePRFCHW],
	[TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
	def : Proc<"c3-2", [FeatureX87, FeatureCX8, FeatureMMX,
	FeatureSSE1, FeatureFXSR, FeatureCMOV],
	[TuningSlowUAMem16, TuningInsertVZEROUPPER]>;

	// We also provide a generic 64-bit specific x86 processor model which tries to
	// be good for modern chips without enabling instruction set encodings past the
	// basic SSE2 and 64-bit ones. It disables slow things from any mainstream and
	// modern 64-bit x86 chip, and enables features that are generally beneficial.
	//
	// We currently use the Sandy Bridge model as the default scheduling model as
	// we use it across Nehalem, Westmere, Sandy Bridge, and Ivy Bridge which
	// covers a huge swath of x86 processors. If there are specific scheduling
	// knobs which need to be tuned differently for AMD chips, we might consider
	// forming a common base for them.
	def : ProcModel<"x86-64", SandyBridgeModel, ProcessorFeatures.X86_64V1Features,
	ProcessorFeatures.X86_64V1Tuning>;
	// Close to Sandybridge.
	def : ProcModel<"x86-64-v2", SandyBridgeModel, ProcessorFeatures.X86_64V2Features,
	ProcessorFeatures.X86_64V2Tuning>;
	// Close to Haswell.
	def : ProcModel<"x86-64-v3", HaswellModel, ProcessorFeatures.X86_64V3Features,
	ProcessorFeatures.X86_64V3Tuning>;
	// Close to the AVX-512 level implemented by Xeon Scalable Processors.
	def : ProcModel<"x86-64-v4", SkylakeServerModel, ProcessorFeatures.X86_64V4Features,
	ProcessorFeatures.X86_64V4Tuning>;

	//===----------------------------------------------------------------------===//
	// Calling Conventions
	//===----------------------------------------------------------------------===//

	include "X86CallingConv.td"


	//===----------------------------------------------------------------------===//
	// Assembly Parser
	//===----------------------------------------------------------------------===//

	def ATTAsmParserVariant : AsmParserVariant {
	int Variant = 0;

	// Variant name.
	string Name = "att";

	// Discard comments in assembly strings.
	string CommentDelimiter = "#";

	// Recognize hard coded registers.
	string RegisterPrefix = "%";
	}

	def IntelAsmParserVariant : AsmParserVariant {
	int Variant = 1;

	// Variant name.
	string Name = "intel";

	// Discard comments in assembly strings.
	string CommentDelimiter = ";";

	// Recognize hard coded registers.
	string RegisterPrefix = "";
	}

	//===----------------------------------------------------------------------===//
	// Assembly Printers
	//===----------------------------------------------------------------------===//

	// The X86 target supports two different syntaxes for emitting machine code.
	// This is controlled by the -x86-asm-syntax={att\|intel}
	def ATTAsmWriter : AsmWriter {
	string AsmWriterClassName = "ATTInstPrinter";
	int Variant = 0;
	}
	def IntelAsmWriter : AsmWriter {
	string AsmWriterClassName = "IntelInstPrinter";
	int Variant = 1;
	}

	def X86 : Target {
	// Information about the instructions...
	let InstructionSet = X86InstrInfo;
	let AssemblyParserVariants = [ATTAsmParserVariant, IntelAsmParserVariant];
	let AssemblyWriters = [ATTAsmWriter, IntelAsmWriter];
	let AllowRegisterRenaming = 1;
	}

	//===----------------------------------------------------------------------===//
	// Pfm Counters
	//===----------------------------------------------------------------------===//

	include "X86PfmCounters.td"
	diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86PfmCounters.td b/contrib/llvm-project/llvm/lib/Target/X86/X86PfmCounters.td
	index 2b1dac411c99..c30e989cdc2a 100644
	--- a/contrib/llvm-project/llvm/lib/Target/X86/X86PfmCounters.td
	+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86PfmCounters.td
	@@ -1,352 +1,353 @@
	//===-- X86PfmCounters.td - X86 Hardware Counters ----------- tablegen --===//
	//
	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
	// See https://llvm.org/LICENSE.txt for license information.
	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
	//
	//===----------------------------------------------------------------------===//
	//
	// This describes the available hardware counters for various subtargets.
	//
	//===----------------------------------------------------------------------===//

	def UnhaltedCoreCyclesPfmCounter : PfmCounter<"unhalted_core_cycles">;
	def UopsIssuedPfmCounter : PfmCounter<"uops_issued:any">;

	// No default counters on X86.
	def DefaultPfmCounters : ProcPfmCounters {}
	def : PfmCountersDefaultBinding<DefaultPfmCounters>;

	// Intel X86 Counters.
	defvar DefaultIntelPfmValidationCounters = [
	PfmValidationCounter<InstructionRetired, "INSTRUCTIONS_RETIRED">,
	PfmValidationCounter<L1DCacheLoadMiss, "MEM_LOAD_UOPS_RETIRED:L1_MISS">,
	PfmValidationCounter<L1ICacheLoadMiss, "L1-ICACHE-LOAD-MISSES">,
	PfmValidationCounter<DataTLBLoadMiss, "DTLB_LOAD_MISSES:MISS_CAUSES_A_WALK">,
	PfmValidationCounter<DataTLBStoreMiss, "DTLB_STORE_MISSES:MISS_CAUSES_A_WALK">,
	PfmValidationCounter<InstructionTLBLoadMiss, "ITLB_MISSES:MISS_CAUSES_A_WALK">,
	PfmValidationCounter<BranchPredictionMiss, "BRANCH-MISSES">
	];

	def PentiumPfmCounters : ProcPfmCounters {
	let CycleCounter = PfmCounter<"cpu_clk_unhalted">;
	let UopsCounter = PfmCounter<"uops_retired">;
	}
	def : PfmCountersBinding<"pentiumpro", PentiumPfmCounters>;
	def : PfmCountersBinding<"pentium2", PentiumPfmCounters>;
	def : PfmCountersBinding<"pentium3", PentiumPfmCounters>;
	def : PfmCountersBinding<"pentium3m", PentiumPfmCounters>;
	def : PfmCountersBinding<"pentium-m", PentiumPfmCounters>;

	def CorePfmCounters : ProcPfmCounters {
	let CycleCounter = UnhaltedCoreCyclesPfmCounter;
	let UopsCounter = PfmCounter<"uops_retired:any">;
	}
	def : PfmCountersBinding<"yonah", CorePfmCounters>;
	def : PfmCountersBinding<"prescott", CorePfmCounters>;

	def AtomPfmCounters : ProcPfmCounters {
	let CycleCounter = UnhaltedCoreCyclesPfmCounter;
	let UopsCounter = PfmCounter<"uops_retired:any">;
	}
	def : PfmCountersBinding<"bonnell", AtomPfmCounters>;
	def : PfmCountersBinding<"atom", AtomPfmCounters>;

	def SLMPfmCounters : ProcPfmCounters {
	let CycleCounter = UnhaltedCoreCyclesPfmCounter;
	let UopsCounter = PfmCounter<"uops_retired:any">;
	let IssueCounters = [
	PfmIssueCounter<"SLM_MEC_RSV", "mem_uop_retired:any_ld + mem_uop_retired:any_st">
	];
	}
	def : PfmCountersBinding<"silvermont", SLMPfmCounters>;
	def : PfmCountersBinding<"goldmont", SLMPfmCounters>;
	def : PfmCountersBinding<"goldmont-plus", SLMPfmCounters>;
	def : PfmCountersBinding<"tremont", SLMPfmCounters>;

	def KnightPfmCounters : ProcPfmCounters {
	let CycleCounter = UnhaltedCoreCyclesPfmCounter;
	let UopsCounter = PfmCounter<"uops_retired:all">;
	}
	def : PfmCountersBinding<"knl", KnightPfmCounters>;
	def : PfmCountersBinding<"knm", KnightPfmCounters>;

	def Core2PfmCounters : ProcPfmCounters {
	let CycleCounter = UnhaltedCoreCyclesPfmCounter;
	let UopsCounter = PfmCounter<"uops_retired:any">;
	let IssueCounters = [
	PfmIssueCounter<"SBPort0", "rs_uops_dispatched_cycles:port_0">,
	PfmIssueCounter<"SBPort1", "rs_uops_dispatched_cycles:port_1">,
	PfmIssueCounter<"SBPort23", "rs_uops_dispatched_cycles:port_2 + rs_uops_dispatched_cycles:port_3">,
	PfmIssueCounter<"SBPort4", "rs_uops_dispatched_cycles:port_4">,
	PfmIssueCounter<"SBPort5", "rs_uops_dispatched_cycles:port_5">
	];
	}
	def : PfmCountersBinding<"core2", Core2PfmCounters>;
	def : PfmCountersBinding<"penryn", Core2PfmCounters>;

	def NehalemPfmCounters : ProcPfmCounters {
	let CycleCounter = UnhaltedCoreCyclesPfmCounter;
	let UopsCounter = PfmCounter<"uops_retired:any">;
	let IssueCounters = [
	PfmIssueCounter<"SBPort0", "uops_executed:port0">,
	PfmIssueCounter<"SBPort1", "uops_executed:port1">,
	PfmIssueCounter<"SBPort23", "uops_executed:port2_core + uops_executed:port3_core">,
	PfmIssueCounter<"SBPort4", "uops_executed:port4_core">,
	PfmIssueCounter<"SBPort5", "uops_executed:port5">
	];
	}
	def : PfmCountersBinding<"nehalem", NehalemPfmCounters>;
	def : PfmCountersBinding<"corei7", NehalemPfmCounters>;
	def : PfmCountersBinding<"westmere", NehalemPfmCounters>;

	def SandyBridgePfmCounters : ProcPfmCounters {
	let CycleCounter = UnhaltedCoreCyclesPfmCounter;
	let UopsCounter = UopsIssuedPfmCounter;
	let IssueCounters = [
	PfmIssueCounter<"SBPort0", "uops_dispatched_port:port_0">,
	PfmIssueCounter<"SBPort1", "uops_dispatched_port:port_1">,
	PfmIssueCounter<"SBPort23", "uops_dispatched_port:port_2 + uops_dispatched_port:port_3">,
	PfmIssueCounter<"SBPort4", "uops_dispatched_port:port_4">,
	PfmIssueCounter<"SBPort5", "uops_dispatched_port:port_5">
	];
	let ValidationCounters = DefaultIntelPfmValidationCounters;
	}
	def : PfmCountersBinding<"sandybridge", SandyBridgePfmCounters>;
	def : PfmCountersBinding<"ivybridge", SandyBridgePfmCounters>;

	def HaswellPfmCounters : ProcPfmCounters {
	let CycleCounter = UnhaltedCoreCyclesPfmCounter;
	let UopsCounter = UopsIssuedPfmCounter;
	let IssueCounters = [
	PfmIssueCounter<"HWPort0", "uops_executed_port:port_0">,
	PfmIssueCounter<"HWPort1", "uops_executed_port:port_1">,
	PfmIssueCounter<"HWPort2", "uops_executed_port:port_2">,
	PfmIssueCounter<"HWPort3", "uops_executed_port:port_3">,
	PfmIssueCounter<"HWPort4", "uops_executed_port:port_4">,
	PfmIssueCounter<"HWPort5", "uops_executed_port:port_5">,
	PfmIssueCounter<"HWPort6", "uops_executed_port:port_6">,
	PfmIssueCounter<"HWPort7", "uops_executed_port:port_7">
	];
	let ValidationCounters = DefaultIntelPfmValidationCounters;
	}
	def : PfmCountersBinding<"haswell", HaswellPfmCounters>;

	def BroadwellPfmCounters : ProcPfmCounters {
	let CycleCounter = UnhaltedCoreCyclesPfmCounter;
	let UopsCounter = UopsIssuedPfmCounter;
	let IssueCounters = [
	PfmIssueCounter<"BWPort0", "uops_executed_port:port_0">,
	PfmIssueCounter<"BWPort1", "uops_executed_port:port_1">,
	PfmIssueCounter<"BWPort2", "uops_executed_port:port_2">,
	PfmIssueCounter<"BWPort3", "uops_executed_port:port_3">,
	PfmIssueCounter<"BWPort4", "uops_executed_port:port_4">,
	PfmIssueCounter<"BWPort5", "uops_executed_port:port_5">,
	PfmIssueCounter<"BWPort6", "uops_executed_port:port_6">,
	PfmIssueCounter<"BWPort7", "uops_executed_port:port_7">
	];
	let ValidationCounters = DefaultIntelPfmValidationCounters;
	}
	def : PfmCountersBinding<"broadwell", BroadwellPfmCounters>;

	def SkylakeClientPfmCounters : ProcPfmCounters {
	let CycleCounter = UnhaltedCoreCyclesPfmCounter;
	let UopsCounter = UopsIssuedPfmCounter;
	let IssueCounters = [
	PfmIssueCounter<"SKLPort0", "uops_dispatched_port:port_0">,
	PfmIssueCounter<"SKLPort1", "uops_dispatched_port:port_1">,
	PfmIssueCounter<"SKLPort2", "uops_dispatched_port:port_2">,
	PfmIssueCounter<"SKLPort3", "uops_dispatched_port:port_3">,
	PfmIssueCounter<"SKLPort4", "uops_dispatched_port:port_4">,
	PfmIssueCounter<"SKLPort5", "uops_dispatched_port:port_5">,
	PfmIssueCounter<"SKLPort6", "uops_dispatched_port:port_6">,
	PfmIssueCounter<"SKLPort7", "uops_dispatched_port:port_7">
	];
	let ValidationCounters = DefaultIntelPfmValidationCounters;
	}
	def : PfmCountersBinding<"skylake", SkylakeClientPfmCounters>;

	def SkylakeServerPfmCounters : ProcPfmCounters {
	let CycleCounter = UnhaltedCoreCyclesPfmCounter;
	let UopsCounter = UopsIssuedPfmCounter;
	let IssueCounters = [
	PfmIssueCounter<"SKXPort0", "uops_dispatched_port:port_0">,
	PfmIssueCounter<"SKXPort1", "uops_dispatched_port:port_1">,
	PfmIssueCounter<"SKXPort2", "uops_dispatched_port:port_2">,
	PfmIssueCounter<"SKXPort3", "uops_dispatched_port:port_3">,
	PfmIssueCounter<"SKXPort4", "uops_dispatched_port:port_4">,
	PfmIssueCounter<"SKXPort5", "uops_dispatched_port:port_5">,
	PfmIssueCounter<"SKXPort6", "uops_dispatched_port:port_6">,
	PfmIssueCounter<"SKXPort7", "uops_dispatched_port:port_7">
	];
	let ValidationCounters = DefaultIntelPfmValidationCounters;
	}
	def : PfmCountersBinding<"skylake-avx512", SkylakeServerPfmCounters>;
	def : PfmCountersBinding<"cascadelake", SkylakeServerPfmCounters>;
	def : PfmCountersBinding<"cannonlake", SkylakeServerPfmCounters>;

	def IceLakePfmCounters : ProcPfmCounters {
	let CycleCounter = UnhaltedCoreCyclesPfmCounter;
	let UopsCounter = UopsIssuedPfmCounter;
	let IssueCounters = [
	PfmIssueCounter<"ICXPort0", "uops_dispatched_port:port_0">,
	PfmIssueCounter<"ICXPort1", "uops_dispatched_port:port_1">,
	PfmIssueCounter<"ICXPort23", "uops_dispatched_port:port_2_3">,
	PfmIssueCounter<"ICXPort49", "uops_dispatched_port:port_4_9">,
	PfmIssueCounter<"ICXPort5", "uops_dispatched_port:port_5">,
	PfmIssueCounter<"ICXPort6", "uops_dispatched_port:port_6">,
	PfmIssueCounter<"ICXPort78", "uops_dispatched_port:port_7_8">
	];
	let ValidationCounters = DefaultIntelPfmValidationCounters;
	}
	def : PfmCountersBinding<"icelake-client", IceLakePfmCounters>;
	def : PfmCountersBinding<"icelake-server", IceLakePfmCounters>;
	def : PfmCountersBinding<"rocketlake", IceLakePfmCounters>;
	def : PfmCountersBinding<"tigerlake", IceLakePfmCounters>;

	def AlderLakePfmCounters : ProcPfmCounters {
	let CycleCounter = UnhaltedCoreCyclesPfmCounter;
	let UopsCounter = UopsIssuedPfmCounter;
	let IssueCounters = [
	PfmIssueCounter<"ADLPPort00", "uops_dispatched_port:port_0">,
	PfmIssueCounter<"ADLPPort01", "uops_dispatched_port:port_1">,
	PfmIssueCounter<"ADLPPort02_03_10", "uops_dispatched_port:port_2_3_10">,
	PfmIssueCounter<"ADLPPort04_09", "uops_dispatched_port:port_4_9">,
	PfmIssueCounter<"ADLPPort05_11", "uops_dispatched_port:port_5_11">,
	PfmIssueCounter<"ADLPPort06", "uops_dispatched_port:port_6">,
	PfmIssueCounter<"ADLPPort07_08", "uops_dispatched_port:port_7_8">
	];
	let ValidationCounters = DefaultIntelPfmValidationCounters;
	}
	def : PfmCountersBinding<"alderlake", AlderLakePfmCounters>;

	// AMD X86 Counters.
	defvar DefaultAMDPfmValidationCounters = [
	PfmValidationCounter<InstructionRetired, "RETIRED_INSTRUCTIONS">,
	PfmValidationCounter<L1DCacheLoadMiss, "L1-DCACHE-LOAD-MISSES">,
	PfmValidationCounter<L1DCacheStoreMiss, "L1-DCACHE-STORE-MISSES">,
	PfmValidationCounter<L1ICacheLoadMiss, "L1-ICACHE-LOAD-MISSES">,
	PfmValidationCounter<DataTLBLoadMiss, "DTLB-LOAD-MISSES">,
	PfmValidationCounter<InstructionTLBLoadMiss, "ITLB-LOAD-MISSES">,
	PfmValidationCounter<BranchPredictionMiss, "BRANCH-MISSES">
	];

	// Set basic counters for AMD cpus that we know libpfm4 supports.
	def DefaultAMDPfmCounters : ProcPfmCounters {
	let CycleCounter = PfmCounter<"cpu_clk_unhalted">;
	let UopsCounter = PfmCounter<"retired_uops">;
	}
	def : PfmCountersBinding<"athlon", DefaultAMDPfmCounters>;
	def : PfmCountersBinding<"athlon-tbird", DefaultAMDPfmCounters>;
	def : PfmCountersBinding<"athlon-4", DefaultAMDPfmCounters>;
	def : PfmCountersBinding<"athlon-xp", DefaultAMDPfmCounters>;
	def : PfmCountersBinding<"athlon-mp", DefaultAMDPfmCounters>;
	def : PfmCountersBinding<"k8", DefaultAMDPfmCounters>;
	def : PfmCountersBinding<"opteron", DefaultAMDPfmCounters>;
	def : PfmCountersBinding<"athlon64", DefaultAMDPfmCounters>;
	def : PfmCountersBinding<"athlon-fx", DefaultAMDPfmCounters>;
	def : PfmCountersBinding<"k8-sse3", DefaultAMDPfmCounters>;
	def : PfmCountersBinding<"opteron-sse3", DefaultAMDPfmCounters>;
	def : PfmCountersBinding<"athlon64-sse3", DefaultAMDPfmCounters>;
	def : PfmCountersBinding<"amdfam10", DefaultAMDPfmCounters>;
	def : PfmCountersBinding<"barcelona", DefaultAMDPfmCounters>;

	def BdVer2PfmCounters : ProcPfmCounters {
	let CycleCounter = PfmCounter<"cpu_clk_unhalted">;
	let UopsCounter = PfmCounter<"retired_uops">;
	let IssueCounters = [
	PfmIssueCounter<"PdFPU0", "dispatched_fpu_ops:ops_pipe0 + dispatched_fpu_ops:ops_dual_pipe0">,
	PfmIssueCounter<"PdFPU1", "dispatched_fpu_ops:ops_pipe1 + dispatched_fpu_ops:ops_dual_pipe1">,
	PfmIssueCounter<"PdFPU2", "dispatched_fpu_ops:ops_pipe2 + dispatched_fpu_ops:ops_dual_pipe2">,
	PfmIssueCounter<"PdFPU3", "dispatched_fpu_ops:ops_pipe3 + dispatched_fpu_ops:ops_dual_pipe3">
	];
	}
	def : PfmCountersBinding<"bdver1", BdVer2PfmCounters>;
	def : PfmCountersBinding<"bdver2", BdVer2PfmCounters>;

	def BdVer3PfmCounters : ProcPfmCounters {
	let CycleCounter = PfmCounter<"cpu_clk_unhalted">;
	let UopsCounter = PfmCounter<"retired_uops">;
	let IssueCounters = [
	PfmIssueCounter<"SrFPU0", "dispatched_fpu_ops:ops_pipe0 + dispatched_fpu_ops:ops_dual_pipe0">,
	PfmIssueCounter<"SrFPU1", "dispatched_fpu_ops:ops_pipe1 + dispatched_fpu_ops:ops_dual_pipe1">,
	PfmIssueCounter<"SrFPU2", "dispatched_fpu_ops:ops_pipe2 + dispatched_fpu_ops:ops_dual_pipe2">
	];
	}
	def : PfmCountersBinding<"bdver3", BdVer3PfmCounters>;
	def : PfmCountersBinding<"bdver4", BdVer3PfmCounters>;

	def BtVer1PfmCounters : ProcPfmCounters {
	let CycleCounter = PfmCounter<"cpu_clk_unhalted">;
	let UopsCounter = PfmCounter<"retired_uops">;
	let IssueCounters = [
	PfmIssueCounter<"BtFPU0", "dispatched_fpu:pipe0">,
	PfmIssueCounter<"BtFPU1", "dispatched_fpu:pipe1">
	];
	}
	def : PfmCountersBinding<"btver1", BtVer1PfmCounters>;

	def BtVer2PfmCounters : ProcPfmCounters {
	let CycleCounter = PfmCounter<"cpu_clk_unhalted">;
	let UopsCounter = PfmCounter<"retired_uops">;
	let IssueCounters = [
	PfmIssueCounter<"JFPU0", "dispatched_fpu:pipe0">,
	PfmIssueCounter<"JFPU1", "dispatched_fpu:pipe1">
	];
	}
	def : PfmCountersBinding<"btver2", BtVer2PfmCounters>;

	def ZnVer1PfmCounters : ProcPfmCounters {
	let CycleCounter = PfmCounter<"cycles_not_in_halt">;
	let UopsCounter = PfmCounter<"retired_uops">;
	let IssueCounters = [
	PfmIssueCounter<"ZnFPU0", "fpu_pipe_assignment:total0">,
	PfmIssueCounter<"ZnFPU1", "fpu_pipe_assignment:total1">,
	PfmIssueCounter<"ZnFPU2", "fpu_pipe_assignment:total2">,
	PfmIssueCounter<"ZnFPU3", "fpu_pipe_assignment:total3">,
	PfmIssueCounter<"ZnAGU", "ls_dispatch:ld_st_dispatch + ls_dispatch:ld_dispatch + ls_dispatch:store_dispatch">,
	PfmIssueCounter<"ZnDivider", "div_op_count">
	];
	let ValidationCounters = DefaultAMDPfmValidationCounters;
	}
	def : PfmCountersBinding<"znver1", ZnVer1PfmCounters>;

	def ZnVer2PfmCounters : ProcPfmCounters {
	let CycleCounter = PfmCounter<"cycles_not_in_halt">;
	let UopsCounter = PfmCounter<"retired_uops">;
	let IssueCounters = [
	PfmIssueCounter<"Zn2AGU", "ls_dispatch:ld_st_dispatch + ls_dispatch:ld_dispatch + ls_dispatch:store_dispatch">,
	PfmIssueCounter<"Zn2Divider", "div_op_count">
	];
	let ValidationCounters = DefaultAMDPfmValidationCounters;
	}
	def : PfmCountersBinding<"znver2", ZnVer2PfmCounters>;

	def ZnVer3PfmCounters : ProcPfmCounters {
	let CycleCounter = PfmCounter<"cycles_not_in_halt">;
	let UopsCounter = PfmCounter<"retired_ops">;
	let IssueCounters = [
	PfmIssueCounter<"Zn3Int", "ops_type_dispatched_from_decoder:int_disp_retire_mode">,
	PfmIssueCounter<"Zn3FPU", "ops_type_dispatched_from_decoder:fp_disp_retire_mode">,
	PfmIssueCounter<"Zn3Load", "ls_dispatch:ld_dispatch">,
	PfmIssueCounter<"Zn3Store", "ls_dispatch:store_dispatch">,
	PfmIssueCounter<"Zn3Divider", "div_op_count">
	];
	let ValidationCounters = DefaultAMDPfmValidationCounters;
	}
	def : PfmCountersBinding<"znver3", ZnVer3PfmCounters>;

	def ZnVer4PfmCounters : ProcPfmCounters {
	let CycleCounter = PfmCounter<"cycles_not_in_halt">;
	let UopsCounter = PfmCounter<"retired_ops">;
	let IssueCounters = [
	PfmIssueCounter<"Zn4Int", "ops_type_dispatched_from_decoder:int_disp_retire_mode">,
	PfmIssueCounter<"Zn4FPU", "ops_type_dispatched_from_decoder:fp_disp_retire_mode">,
	PfmIssueCounter<"Zn4Load", "ls_dispatch:ld_dispatch">,
	PfmIssueCounter<"Zn4Store", "ls_dispatch:store_dispatch">,
	PfmIssueCounter<"Zn4Divider", "div_op_count">,
	PfmIssueCounter<"Zn4AGU", "ls_dispatch:ld_st_dispatch + ls_dispatch:ld_dispatch + ls_dispatch:store_dispatch">
	];
	let ValidationCounters = DefaultAMDPfmValidationCounters;
	}
	def : PfmCountersBinding<"znver4", ZnVer4PfmCounters>;
	+def : PfmCountersBinding<"znver5", ZnVer4PfmCounters>;
	diff --git a/contrib/llvm-project/llvm/lib/TargetParser/Host.cpp b/contrib/llvm-project/llvm/lib/TargetParser/Host.cpp
	index 7e637cba4cfb..865b6a44adbb 100644
	--- a/contrib/llvm-project/llvm/lib/TargetParser/Host.cpp
	+++ b/contrib/llvm-project/llvm/lib/TargetParser/Host.cpp
	@@ -1,2151 +1,2170 @@
	//===-- Host.cpp - Implement OS Host Detection ------------------- C++ --===//
	//
	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
	// See https://llvm.org/LICENSE.txt for license information.
	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
	//
	//===----------------------------------------------------------------------===//
	//
	// This file implements the operating system Host detection.
	//
	//===----------------------------------------------------------------------===//

	#include "llvm/TargetParser/Host.h"
	#include "llvm/ADT/SmallVector.h"
	#include "llvm/ADT/StringMap.h"
	#include "llvm/ADT/StringRef.h"
	#include "llvm/ADT/StringSwitch.h"
	#include "llvm/Config/llvm-config.h"
	#include "llvm/Support/MemoryBuffer.h"
	#include "llvm/Support/raw_ostream.h"
	#include "llvm/TargetParser/Triple.h"
	#include "llvm/TargetParser/X86TargetParser.h"
	#include <string.h>

	// Include the platform-specific parts of this class.
	#ifdef LLVM_ON_UNIX
	#include "Unix/Host.inc"
	#include <sched.h>
	#endif
	#ifdef _WIN32
	#include "Windows/Host.inc"
	#endif
	#ifdef _MSC_VER
	#include <intrin.h>
	#endif
	#ifdef __MVS__
	#include "llvm/Support/BCD.h"
	#endif
	#if defined(__APPLE__)
	#include <mach/host_info.h>
	#include <mach/mach.h>
	#include <mach/mach_host.h>
	#include <mach/machine.h>
	#include <sys/param.h>
	#include <sys/sysctl.h>
	#endif
	#ifdef _AIX
	#include <sys/systemcfg.h>
	#endif
	#if defined(__sun__) && defined(__svr4__)
	#include <kstat.h>
	#endif

	#define DEBUG_TYPE "host-detection"

	//===----------------------------------------------------------------------===//
	//
	// Implementations of the CPU detection routines
	//
	//===----------------------------------------------------------------------===//

	using namespace llvm;

	static std::unique_ptr<llvm::MemoryBuffer>
	LLVM_ATTRIBUTE_UNUSED getProcCpuinfoContent() {
	llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> Text =
	llvm::MemoryBuffer::getFileAsStream("/proc/cpuinfo");
	if (std::error_code EC = Text.getError()) {
	llvm::errs() << "Can't read "
	<< "/proc/cpuinfo: " << EC.message() << "\n";
	return nullptr;
	}
	return std::move(*Text);
	}

	StringRef sys::detail::getHostCPUNameForPowerPC(StringRef ProcCpuinfoContent) {
	// Access to the Processor Version Register (PVR) on PowerPC is privileged,
	// and so we must use an operating-system interface to determine the current
	// processor type. On Linux, this is exposed through the /proc/cpuinfo file.
	const char *generic = "generic";

	// The cpu line is second (after the 'processor: 0' line), so if this
	// buffer is too small then something has changed (or is wrong).
	StringRef::const_iterator CPUInfoStart = ProcCpuinfoContent.begin();
	StringRef::const_iterator CPUInfoEnd = ProcCpuinfoContent.end();

	StringRef::const_iterator CIP = CPUInfoStart;

	StringRef::const_iterator CPUStart = nullptr;
	size_t CPULen = 0;

	// We need to find the first line which starts with cpu, spaces, and a colon.
	// After the colon, there may be some additional spaces and then the cpu type.
	while (CIP < CPUInfoEnd && CPUStart == nullptr) {
	if (CIP < CPUInfoEnd && *CIP == '\n')
	++CIP;

	if (CIP < CPUInfoEnd && *CIP == 'c') {
	++CIP;
	if (CIP < CPUInfoEnd && *CIP == 'p') {
	++CIP;
	if (CIP < CPUInfoEnd && *CIP == 'u') {
	++CIP;
	while (CIP < CPUInfoEnd && (CIP == ' ' \|\| CIP == '\t'))
	++CIP;

	if (CIP < CPUInfoEnd && *CIP == ':') {
	++CIP;
	while (CIP < CPUInfoEnd && (CIP == ' ' \|\| CIP == '\t'))
	++CIP;

	if (CIP < CPUInfoEnd) {
	CPUStart = CIP;
	while (CIP < CPUInfoEnd && (CIP != ' ' && CIP != '\t' &&
	CIP != ',' && CIP != '\n'))
	++CIP;
	CPULen = CIP - CPUStart;
	}
	}
	}
	}
	}

	if (CPUStart == nullptr)
	while (CIP < CPUInfoEnd && *CIP != '\n')
	++CIP;
	}

	if (CPUStart == nullptr)
	return generic;

	return StringSwitch<const char *>(StringRef(CPUStart, CPULen))
	.Case("604e", "604e")
	.Case("604", "604")
	.Case("7400", "7400")
	.Case("7410", "7400")
	.Case("7447", "7400")
	.Case("7455", "7450")
	.Case("G4", "g4")
	.Case("POWER4", "970")
	.Case("PPC970FX", "970")
	.Case("PPC970MP", "970")
	.Case("G5", "g5")
	.Case("POWER5", "g5")
	.Case("A2", "a2")
	.Case("POWER6", "pwr6")
	.Case("POWER7", "pwr7")
	.Case("POWER8", "pwr8")
	.Case("POWER8E", "pwr8")
	.Case("POWER8NVL", "pwr8")
	.Case("POWER9", "pwr9")
	.Case("POWER10", "pwr10")
	.Case("POWER11", "pwr11")
	// FIXME: If we get a simulator or machine with the capabilities of
	// mcpu=future, we should revisit this and add the name reported by the
	// simulator/machine.
	.Default(generic);
	}

	StringRef sys::detail::getHostCPUNameForARM(StringRef ProcCpuinfoContent) {
	// The cpuid register on arm is not accessible from user space. On Linux,
	// it is exposed through the /proc/cpuinfo file.

	// Read 32 lines from /proc/cpuinfo, which should contain the CPU part line
	// in all cases.
	SmallVector<StringRef, 32> Lines;
	ProcCpuinfoContent.split(Lines, "\n");

	// Look for the CPU implementer line.
	StringRef Implementer;
	StringRef Hardware;
	StringRef Part;
	for (unsigned I = 0, E = Lines.size(); I != E; ++I) {
	if (Lines[I].starts_with("CPU implementer"))
	Implementer = Lines[I].substr(15).ltrim("\t :");
	if (Lines[I].starts_with("Hardware"))
	Hardware = Lines[I].substr(8).ltrim("\t :");
	if (Lines[I].starts_with("CPU part"))
	Part = Lines[I].substr(8).ltrim("\t :");
	}

	if (Implementer == "0x41") { // ARM Ltd.
	// MSM8992/8994 may give cpu part for the core that the kernel is running on,
	// which is undeterministic and wrong. Always return cortex-a53 for these SoC.
	if (Hardware.ends_with("MSM8994") \|\| Hardware.ends_with("MSM8996"))
	return "cortex-a53";


	// The CPU part is a 3 digit hexadecimal number with a 0x prefix. The
	// values correspond to the "Part number" in the CP15/c0 register. The
	// contents are specified in the various processor manuals.
	// This corresponds to the Main ID Register in Technical Reference Manuals.
	// and is used in programs like sys-utils
	return StringSwitch<const char *>(Part)
	.Case("0x926", "arm926ej-s")
	.Case("0xb02", "mpcore")
	.Case("0xb36", "arm1136j-s")
	.Case("0xb56", "arm1156t2-s")
	.Case("0xb76", "arm1176jz-s")
	.Case("0xc05", "cortex-a5")
	.Case("0xc07", "cortex-a7")
	.Case("0xc08", "cortex-a8")
	.Case("0xc09", "cortex-a9")
	.Case("0xc0f", "cortex-a15")
	.Case("0xc0e", "cortex-a17")
	.Case("0xc20", "cortex-m0")
	.Case("0xc23", "cortex-m3")
	.Case("0xc24", "cortex-m4")
	.Case("0xc27", "cortex-m7")
	.Case("0xd20", "cortex-m23")
	.Case("0xd21", "cortex-m33")
	.Case("0xd24", "cortex-m52")
	.Case("0xd22", "cortex-m55")
	.Case("0xd23", "cortex-m85")
	.Case("0xc18", "cortex-r8")
	.Case("0xd13", "cortex-r52")
	.Case("0xd16", "cortex-r52plus")
	.Case("0xd15", "cortex-r82")
	.Case("0xd14", "cortex-r82ae")
	.Case("0xd02", "cortex-a34")
	.Case("0xd04", "cortex-a35")
	.Case("0xd03", "cortex-a53")
	.Case("0xd05", "cortex-a55")
	.Case("0xd46", "cortex-a510")
	.Case("0xd80", "cortex-a520")
	.Case("0xd88", "cortex-a520ae")
	.Case("0xd07", "cortex-a57")
	.Case("0xd06", "cortex-a65")
	.Case("0xd43", "cortex-a65ae")
	.Case("0xd08", "cortex-a72")
	.Case("0xd09", "cortex-a73")
	.Case("0xd0a", "cortex-a75")
	.Case("0xd0b", "cortex-a76")
	.Case("0xd0e", "cortex-a76ae")
	.Case("0xd0d", "cortex-a77")
	.Case("0xd41", "cortex-a78")
	.Case("0xd42", "cortex-a78ae")
	.Case("0xd4b", "cortex-a78c")
	.Case("0xd47", "cortex-a710")
	.Case("0xd4d", "cortex-a715")
	.Case("0xd81", "cortex-a720")
	.Case("0xd89", "cortex-a720ae")
	.Case("0xd87", "cortex-a725")
	.Case("0xd44", "cortex-x1")
	.Case("0xd4c", "cortex-x1c")
	.Case("0xd48", "cortex-x2")
	.Case("0xd4e", "cortex-x3")
	.Case("0xd82", "cortex-x4")
	.Case("0xd85", "cortex-x925")
	.Case("0xd4a", "neoverse-e1")
	.Case("0xd0c", "neoverse-n1")
	.Case("0xd49", "neoverse-n2")
	.Case("0xd8e", "neoverse-n3")
	.Case("0xd40", "neoverse-v1")
	.Case("0xd4f", "neoverse-v2")
	.Case("0xd84", "neoverse-v3")
	.Case("0xd83", "neoverse-v3ae")
	.Default("generic");
	}

	if (Implementer == "0x42" \|\| Implementer == "0x43") { // Broadcom \| Cavium.
	return StringSwitch<const char *>(Part)
	.Case("0x516", "thunderx2t99")
	.Case("0x0516", "thunderx2t99")
	.Case("0xaf", "thunderx2t99")
	.Case("0x0af", "thunderx2t99")
	.Case("0xa1", "thunderxt88")
	.Case("0x0a1", "thunderxt88")
	.Default("generic");
	}

	if (Implementer == "0x46") { // Fujitsu Ltd.
	return StringSwitch<const char *>(Part)
	.Case("0x001", "a64fx")
	.Default("generic");
	}

	if (Implementer == "0x4e") { // NVIDIA Corporation
	return StringSwitch<const char *>(Part)
	.Case("0x004", "carmel")
	.Default("generic");
	}

	if (Implementer == "0x48") // HiSilicon Technologies, Inc.
	// The CPU part is a 3 digit hexadecimal number with a 0x prefix. The
	// values correspond to the "Part number" in the CP15/c0 register. The
	// contents are specified in the various processor manuals.
	return StringSwitch<const char *>(Part)
	.Case("0xd01", "tsv110")
	.Default("generic");

	if (Implementer == "0x51") // Qualcomm Technologies, Inc.
	// The CPU part is a 3 digit hexadecimal number with a 0x prefix. The
	// values correspond to the "Part number" in the CP15/c0 register. The
	// contents are specified in the various processor manuals.
	return StringSwitch<const char *>(Part)
	.Case("0x06f", "krait") // APQ8064
	.Case("0x201", "kryo")
	.Case("0x205", "kryo")
	.Case("0x211", "kryo")
	.Case("0x800", "cortex-a73") // Kryo 2xx Gold
	.Case("0x801", "cortex-a73") // Kryo 2xx Silver
	.Case("0x802", "cortex-a75") // Kryo 3xx Gold
	.Case("0x803", "cortex-a75") // Kryo 3xx Silver
	.Case("0x804", "cortex-a76") // Kryo 4xx Gold
	.Case("0x805", "cortex-a76") // Kryo 4xx/5xx Silver
	.Case("0xc00", "falkor")
	.Case("0xc01", "saphira")
	.Case("0x001", "oryon-1")
	.Default("generic");
	if (Implementer == "0x53") { // Samsung Electronics Co., Ltd.
	// The Exynos chips have a convoluted ID scheme that doesn't seem to follow
	// any predictive pattern across variants and parts.
	unsigned Variant = 0, Part = 0;

	// Look for the CPU variant line, whose value is a 1 digit hexadecimal
	// number, corresponding to the Variant bits in the CP15/C0 register.
	for (auto I : Lines)
	if (I.consume_front("CPU variant"))
	I.ltrim("\t :").getAsInteger(0, Variant);

	// Look for the CPU part line, whose value is a 3 digit hexadecimal
	// number, corresponding to the PartNum bits in the CP15/C0 register.
	for (auto I : Lines)
	if (I.consume_front("CPU part"))
	I.ltrim("\t :").getAsInteger(0, Part);

	unsigned Exynos = (Variant << 12) \| Part;
	switch (Exynos) {
	default:
	// Default by falling through to Exynos M3.
	[[fallthrough]];
	case 0x1002:
	return "exynos-m3";
	case 0x1003:
	return "exynos-m4";
	}
	}

	if (Implementer == "0x6d") { // Microsoft Corporation.
	// The Microsoft Azure Cobalt 100 CPU is handled as a Neoverse N2.
	return StringSwitch<const char *>(Part)
	.Case("0xd49", "neoverse-n2")
	.Default("generic");
	}

	if (Implementer == "0xc0") { // Ampere Computing
	return StringSwitch<const char *>(Part)
	.Case("0xac3", "ampere1")
	.Case("0xac4", "ampere1a")
	.Case("0xac5", "ampere1b")
	.Default("generic");
	}

	return "generic";
	}

	namespace {
	StringRef getCPUNameFromS390Model(unsigned int Id, bool HaveVectorSupport) {
	switch (Id) {
	case 2064: // z900 not supported by LLVM
	case 2066:
	case 2084: // z990 not supported by LLVM
	case 2086:
	case 2094: // z9-109 not supported by LLVM
	case 2096:
	return "generic";
	case 2097:
	case 2098:
	return "z10";
	case 2817:
	case 2818:
	return "z196";
	case 2827:
	case 2828:
	return "zEC12";
	case 2964:
	case 2965:
	return HaveVectorSupport? "z13" : "zEC12";
	case 3906:
	case 3907:
	return HaveVectorSupport? "z14" : "zEC12";
	case 8561:
	case 8562:
	return HaveVectorSupport? "z15" : "zEC12";
	case 3931:
	case 3932:
	default:
	return HaveVectorSupport? "z16" : "zEC12";
	}
	}
	} // end anonymous namespace

	StringRef sys::detail::getHostCPUNameForS390x(StringRef ProcCpuinfoContent) {
	// STIDP is a privileged operation, so use /proc/cpuinfo instead.

	// The "processor 0:" line comes after a fair amount of other information,
	// including a cache breakdown, but this should be plenty.
	SmallVector<StringRef, 32> Lines;
	ProcCpuinfoContent.split(Lines, "\n");

	// Look for the CPU features.
	SmallVector<StringRef, 32> CPUFeatures;
	for (unsigned I = 0, E = Lines.size(); I != E; ++I)
	if (Lines[I].starts_with("features")) {
	size_t Pos = Lines[I].find(':');
	if (Pos != StringRef::npos) {
	Lines[I].drop_front(Pos + 1).split(CPUFeatures, ' ');
	break;
	}
	}

	// We need to check for the presence of vector support independently of
	// the machine type, since we may only use the vector register set when
	// supported by the kernel (and hypervisor).
	bool HaveVectorSupport = false;
	for (unsigned I = 0, E = CPUFeatures.size(); I != E; ++I) {
	if (CPUFeatures[I] == "vx")
	HaveVectorSupport = true;
	}

	// Now check the processor machine type.
	for (unsigned I = 0, E = Lines.size(); I != E; ++I) {
	if (Lines[I].starts_with("processor ")) {
	size_t Pos = Lines[I].find("machine = ");
	if (Pos != StringRef::npos) {
	Pos += sizeof("machine = ") - 1;
	unsigned int Id;
	if (!Lines[I].drop_front(Pos).getAsInteger(10, Id))
	return getCPUNameFromS390Model(Id, HaveVectorSupport);
	}
	break;
	}
	}

	return "generic";
	}

	StringRef sys::detail::getHostCPUNameForRISCV(StringRef ProcCpuinfoContent) {
	// There are 24 lines in /proc/cpuinfo
	SmallVector<StringRef> Lines;
	ProcCpuinfoContent.split(Lines, "\n");

	// Look for uarch line to determine cpu name
	StringRef UArch;
	for (unsigned I = 0, E = Lines.size(); I != E; ++I) {
	if (Lines[I].starts_with("uarch")) {
	UArch = Lines[I].substr(5).ltrim("\t :");
	break;
	}
	}

	return StringSwitch<const char *>(UArch)
	.Case("sifive,u74-mc", "sifive-u74")
	.Case("sifive,bullet0", "sifive-u74")
	.Default("");
	}

	StringRef sys::detail::getHostCPUNameForBPF() {
	#if !defined(__linux__) \|\| !defined(__x86_64__)
	return "generic";
	#else
	uint8_t v3_insns[40] __attribute__ ((aligned (8))) =
	/* BPF_MOV64_IMM(BPF_REG_0, 0) */
	{ 0xb7, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
	/* BPF_MOV64_IMM(BPF_REG_2, 1) */
	0xb7, 0x2, 0x0, 0x0, 0x1, 0x0, 0x0, 0x0,
	/* BPF_JMP32_REG(BPF_JLT, BPF_REG_0, BPF_REG_2, 1) */
	0xae, 0x20, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0,
	/* BPF_MOV64_IMM(BPF_REG_0, 1) */
	0xb7, 0x0, 0x0, 0x0, 0x1, 0x0, 0x0, 0x0,
	/* BPF_EXIT_INSN() */
	0x95, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 };

	uint8_t v2_insns[40] __attribute__ ((aligned (8))) =
	/* BPF_MOV64_IMM(BPF_REG_0, 0) */
	{ 0xb7, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
	/* BPF_MOV64_IMM(BPF_REG_2, 1) */
	0xb7, 0x2, 0x0, 0x0, 0x1, 0x0, 0x0, 0x0,
	/* BPF_JMP_REG(BPF_JLT, BPF_REG_0, BPF_REG_2, 1) */
	0xad, 0x20, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0,
	/* BPF_MOV64_IMM(BPF_REG_0, 1) */
	0xb7, 0x0, 0x0, 0x0, 0x1, 0x0, 0x0, 0x0,
	/* BPF_EXIT_INSN() */
	0x95, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 };

	struct bpf_prog_load_attr {
	uint32_t prog_type;
	uint32_t insn_cnt;
	uint64_t insns;
	uint64_t license;
	uint32_t log_level;
	uint32_t log_size;
	uint64_t log_buf;
	uint32_t kern_version;
	uint32_t prog_flags;
	} attr = {};
	attr.prog_type = 1; /* BPF_PROG_TYPE_SOCKET_FILTER */
	attr.insn_cnt = 5;
	attr.insns = (uint64_t)v3_insns;
	attr.license = (uint64_t)"DUMMY";

	int fd = syscall(321 /* __NR_bpf /, 5 / BPF_PROG_LOAD */, &attr,
	sizeof(attr));
	if (fd >= 0) {
	close(fd);
	return "v3";
	}

	/* Clear the whole attr in case its content changed by syscall. */
	memset(&attr, 0, sizeof(attr));
	attr.prog_type = 1; /* BPF_PROG_TYPE_SOCKET_FILTER */
	attr.insn_cnt = 5;
	attr.insns = (uint64_t)v2_insns;
	attr.license = (uint64_t)"DUMMY";
	fd = syscall(321 /* __NR_bpf /, 5 / BPF_PROG_LOAD */, &attr, sizeof(attr));
	if (fd >= 0) {
	close(fd);
	return "v2";
	}
	return "v1";
	#endif
	}

	#if defined(__i386__) \|\| defined(_M_IX86) \|\| \
	defined(__x86_64__) \|\| defined(_M_X64)

	// The check below for i386 was copied from clang's cpuid.h (__get_cpuid_max).
	// Check motivated by bug reports for OpenSSL crashing on CPUs without CPUID
	// support. Consequently, for i386, the presence of CPUID is checked first
	// via the corresponding eflags bit.
	// Removal of cpuid.h header motivated by PR30384
	// Header cpuid.h and method __get_cpuid_max are not used in llvm, clang, openmp
	// or test-suite, but are used in external projects e.g. libstdcxx
	static bool isCpuIdSupported() {
	#if defined(__GNUC__) \|\| defined(__clang__)
	#if defined(__i386__)
	int __cpuid_supported;
	__asm__(" pushfl\n"
	" popl %%eax\n"
	" movl %%eax,%%ecx\n"
	" xorl $0x00200000,%%eax\n"
	" pushl %%eax\n"
	" popfl\n"
	" pushfl\n"
	" popl %%eax\n"
	" movl $0,%0\n"
	" cmpl %%eax,%%ecx\n"
	" je 1f\n"
	" movl $1,%0\n"
	"1:"
	: "=r"(__cpuid_supported)
	:
	: "eax", "ecx");
	if (!__cpuid_supported)
	return false;
	#endif
	return true;
	#endif
	return true;
	}

	/// getX86CpuIDAndInfo - Execute the specified cpuid and return the 4 values in
	/// the specified arguments. If we can't run cpuid on the host, return true.
	static bool getX86CpuIDAndInfo(unsigned value, unsigned rEAX, unsigned rEBX,
	unsigned rECX, unsigned rEDX) {
	#if defined(__GNUC__) \|\| defined(__clang__)
	#if defined(__x86_64__)
	// gcc doesn't know cpuid would clobber ebx/rbx. Preserve it manually.
	// FIXME: should we save this for Clang?
	__asm__("movq\t%%rbx, %%rsi\n\t"
	"cpuid\n\t"
	"xchgq\t%%rbx, %%rsi\n\t"
	: "=a"(rEAX), "=S"(rEBX), "=c"(rECX), "=d"(rEDX)
	: "a"(value));
	return false;
	#elif defined(__i386__)
	__asm__("movl\t%%ebx, %%esi\n\t"
	"cpuid\n\t"
	"xchgl\t%%ebx, %%esi\n\t"
	: "=a"(rEAX), "=S"(rEBX), "=c"(rECX), "=d"(rEDX)
	: "a"(value));
	return false;
	#else
	return true;
	#endif
	#elif defined(_MSC_VER)
	// The MSVC intrinsic is portable across x86 and x64.
	int registers[4];
	__cpuid(registers, value);
	*rEAX = registers[0];
	*rEBX = registers[1];
	*rECX = registers[2];
	*rEDX = registers[3];
	return false;
	#else
	return true;
	#endif
	}

	namespace llvm {
	namespace sys {
	namespace detail {
	namespace x86 {

	VendorSignatures getVendorSignature(unsigned *MaxLeaf) {
	unsigned EAX = 0, EBX = 0, ECX = 0, EDX = 0;
	if (MaxLeaf == nullptr)
	MaxLeaf = &EAX;
	else
	*MaxLeaf = 0;

	if (!isCpuIdSupported())
	return VendorSignatures::UNKNOWN;

	if (getX86CpuIDAndInfo(0, MaxLeaf, &EBX, &ECX, &EDX) \|\| *MaxLeaf < 1)
	return VendorSignatures::UNKNOWN;

	// "Genu ineI ntel"
	if (EBX == 0x756e6547 && EDX == 0x49656e69 && ECX == 0x6c65746e)
	return VendorSignatures::GENUINE_INTEL;

	// "Auth enti cAMD"
	if (EBX == 0x68747541 && EDX == 0x69746e65 && ECX == 0x444d4163)
	return VendorSignatures::AUTHENTIC_AMD;

	return VendorSignatures::UNKNOWN;
	}

	} // namespace x86
	} // namespace detail
	} // namespace sys
	} // namespace llvm

	using namespace llvm::sys::detail::x86;

	/// getX86CpuIDAndInfoEx - Execute the specified cpuid with subleaf and return
	/// the 4 values in the specified arguments. If we can't run cpuid on the host,
	/// return true.
	static bool getX86CpuIDAndInfoEx(unsigned value, unsigned subleaf,
	unsigned rEAX, unsigned rEBX, unsigned *rECX,
	unsigned *rEDX) {
	#if defined(__GNUC__) \|\| defined(__clang__)
	#if defined(__x86_64__)
	// gcc doesn't know cpuid would clobber ebx/rbx. Preserve it manually.
	// FIXME: should we save this for Clang?
	__asm__("movq\t%%rbx, %%rsi\n\t"
	"cpuid\n\t"
	"xchgq\t%%rbx, %%rsi\n\t"
	: "=a"(rEAX), "=S"(rEBX), "=c"(rECX), "=d"(rEDX)
	: "a"(value), "c"(subleaf));
	return false;
	#elif defined(__i386__)
	__asm__("movl\t%%ebx, %%esi\n\t"
	"cpuid\n\t"
	"xchgl\t%%ebx, %%esi\n\t"
	: "=a"(rEAX), "=S"(rEBX), "=c"(rECX), "=d"(rEDX)
	: "a"(value), "c"(subleaf));
	return false;
	#else
	return true;
	#endif
	#elif defined(_MSC_VER)
	int registers[4];
	__cpuidex(registers, value, subleaf);
	*rEAX = registers[0];
	*rEBX = registers[1];
	*rECX = registers[2];
	*rEDX = registers[3];
	return false;
	#else
	return true;
	#endif
	}

	// Read control register 0 (XCR0). Used to detect features such as AVX.
	static bool getX86XCR0(unsigned rEAX, unsigned rEDX) {
	#if defined(__GNUC__) \|\| defined(__clang__)
	// Check xgetbv; this uses a .byte sequence instead of the instruction
	// directly because older assemblers do not include support for xgetbv and
	// there is no easy way to conditionally compile based on the assembler used.
	__asm__(".byte 0x0f, 0x01, 0xd0" : "=a"(rEAX), "=d"(rEDX) : "c"(0));
	return false;
	#elif defined(_MSC_FULL_VER) && defined(_XCR_XFEATURE_ENABLED_MASK)
	unsigned long long Result = _xgetbv(_XCR_XFEATURE_ENABLED_MASK);
	*rEAX = Result;
	*rEDX = Result >> 32;
	return false;
	#else
	return true;
	#endif
	}

	static void detectX86FamilyModel(unsigned EAX, unsigned *Family,
	unsigned *Model) {
	*Family = (EAX >> 8) & 0xf; // Bits 8 - 11
	*Model = (EAX >> 4) & 0xf; // Bits 4 - 7
	if (Family == 6 \|\| Family == 0xf) {
	if (*Family == 0xf)
	// Examine extended family ID if family ID is F.
	*Family += (EAX >> 20) & 0xff; // Bits 20 - 27
	// Examine extended model ID if family ID is 6 or F.
	*Model += ((EAX >> 16) & 0xf) << 4; // Bits 16 - 19
	}
	}

	#define testFeature(F) (Features[F / 32] & (1 << (F % 32))) != 0

	static StringRef getIntelProcessorTypeAndSubtype(unsigned Family,
	unsigned Model,
	const unsigned *Features,
	unsigned *Type,
	unsigned *Subtype) {
	StringRef CPU;

	switch (Family) {
	case 3:
	CPU = "i386";
	break;
	case 4:
	CPU = "i486";
	break;
	case 5:
	if (testFeature(X86::FEATURE_MMX)) {
	CPU = "pentium-mmx";
	break;
	}
	CPU = "pentium";
	break;
	case 6:
	switch (Model) {
	case 0x0f: // Intel Core 2 Duo processor, Intel Core 2 Duo mobile
	// processor, Intel Core 2 Quad processor, Intel Core 2 Quad
	// mobile processor, Intel Core 2 Extreme processor, Intel
	// Pentium Dual-Core processor, Intel Xeon processor, model
	// 0Fh. All processors are manufactured using the 65 nm process.
	case 0x16: // Intel Celeron processor model 16h. All processors are
	// manufactured using the 65 nm process
	CPU = "core2";
	*Type = X86::INTEL_CORE2;
	break;
	case 0x17: // Intel Core 2 Extreme processor, Intel Xeon processor, model
	// 17h. All processors are manufactured using the 45 nm process.
	//
	// 45nm: Penryn , Wolfdale, Yorkfield (XE)
	case 0x1d: // Intel Xeon processor MP. All processors are manufactured using
	// the 45 nm process.
	CPU = "penryn";
	*Type = X86::INTEL_CORE2;
	break;
	case 0x1a: // Intel Core i7 processor and Intel Xeon processor. All
	// processors are manufactured using the 45 nm process.
	case 0x1e: // Intel(R) Core(TM) i7 CPU 870 @ 2.93GHz.
	// As found in a Summer 2010 model iMac.
	case 0x1f:
	case 0x2e: // Nehalem EX
	CPU = "nehalem";
	*Type = X86::INTEL_COREI7;
	*Subtype = X86::INTEL_COREI7_NEHALEM;
	break;
	case 0x25: // Intel Core i7, laptop version.
	case 0x2c: // Intel Core i7 processor and Intel Xeon processor. All
	// processors are manufactured using the 32 nm process.
	case 0x2f: // Westmere EX
	CPU = "westmere";
	*Type = X86::INTEL_COREI7;
	*Subtype = X86::INTEL_COREI7_WESTMERE;
	break;
	case 0x2a: // Intel Core i7 processor. All processors are manufactured
	// using the 32 nm process.
	case 0x2d:
	CPU = "sandybridge";
	*Type = X86::INTEL_COREI7;
	*Subtype = X86::INTEL_COREI7_SANDYBRIDGE;
	break;
	case 0x3a:
	case 0x3e: // Ivy Bridge EP
	CPU = "ivybridge";
	*Type = X86::INTEL_COREI7;
	*Subtype = X86::INTEL_COREI7_IVYBRIDGE;
	break;

	// Haswell:
	case 0x3c:
	case 0x3f:
	case 0x45:
	case 0x46:
	CPU = "haswell";
	*Type = X86::INTEL_COREI7;
	*Subtype = X86::INTEL_COREI7_HASWELL;
	break;

	// Broadwell:
	case 0x3d:
	case 0x47:
	case 0x4f:
	case 0x56:
	CPU = "broadwell";
	*Type = X86::INTEL_COREI7;
	*Subtype = X86::INTEL_COREI7_BROADWELL;
	break;

	// Skylake:
	case 0x4e: // Skylake mobile
	case 0x5e: // Skylake desktop
	case 0x8e: // Kaby Lake mobile
	case 0x9e: // Kaby Lake desktop
	case 0xa5: // Comet Lake-H/S
	case 0xa6: // Comet Lake-U
	CPU = "skylake";
	*Type = X86::INTEL_COREI7;
	*Subtype = X86::INTEL_COREI7_SKYLAKE;
	break;

	// Rocketlake:
	case 0xa7:
	CPU = "rocketlake";
	*Type = X86::INTEL_COREI7;
	*Subtype = X86::INTEL_COREI7_ROCKETLAKE;
	break;

	// Skylake Xeon:
	case 0x55:
	*Type = X86::INTEL_COREI7;
	if (testFeature(X86::FEATURE_AVX512BF16)) {
	CPU = "cooperlake";
	*Subtype = X86::INTEL_COREI7_COOPERLAKE;
	} else if (testFeature(X86::FEATURE_AVX512VNNI)) {
	CPU = "cascadelake";
	*Subtype = X86::INTEL_COREI7_CASCADELAKE;
	} else {
	CPU = "skylake-avx512";
	*Subtype = X86::INTEL_COREI7_SKYLAKE_AVX512;
	}
	break;

	// Cannonlake:
	case 0x66:
	CPU = "cannonlake";
	*Type = X86::INTEL_COREI7;
	*Subtype = X86::INTEL_COREI7_CANNONLAKE;
	break;

	// Icelake:
	case 0x7d:
	case 0x7e:
	CPU = "icelake-client";
	*Type = X86::INTEL_COREI7;
	*Subtype = X86::INTEL_COREI7_ICELAKE_CLIENT;
	break;

	// Tigerlake:
	case 0x8c:
	case 0x8d:
	CPU = "tigerlake";
	*Type = X86::INTEL_COREI7;
	*Subtype = X86::INTEL_COREI7_TIGERLAKE;
	break;

	// Alderlake:
	case 0x97:
	case 0x9a:
	// Gracemont
	case 0xbe:
	// Raptorlake:
	case 0xb7:
	case 0xba:
	case 0xbf:
	// Meteorlake:
	case 0xaa:
	case 0xac:
	CPU = "alderlake";
	*Type = X86::INTEL_COREI7;
	*Subtype = X86::INTEL_COREI7_ALDERLAKE;
	break;

	// Arrowlake:
	case 0xc5:
	CPU = "arrowlake";
	*Type = X86::INTEL_COREI7;
	*Subtype = X86::INTEL_COREI7_ARROWLAKE;
	break;

	// Arrowlake S:
	case 0xc6:
	// Lunarlake:
	case 0xbd:
	CPU = "arrowlake-s";
	*Type = X86::INTEL_COREI7;
	*Subtype = X86::INTEL_COREI7_ARROWLAKE_S;
	break;

	// Pantherlake:
	case 0xcc:
	CPU = "pantherlake";
	*Type = X86::INTEL_COREI7;
	*Subtype = X86::INTEL_COREI7_PANTHERLAKE;
	break;

	// Graniterapids:
	case 0xad:
	CPU = "graniterapids";
	*Type = X86::INTEL_COREI7;
	*Subtype = X86::INTEL_COREI7_GRANITERAPIDS;
	break;

	// Granite Rapids D:
	case 0xae:
	CPU = "graniterapids-d";
	*Type = X86::INTEL_COREI7;
	*Subtype = X86::INTEL_COREI7_GRANITERAPIDS_D;
	break;

	// Icelake Xeon:
	case 0x6a:
	case 0x6c:
	CPU = "icelake-server";
	*Type = X86::INTEL_COREI7;
	*Subtype = X86::INTEL_COREI7_ICELAKE_SERVER;
	break;

	// Emerald Rapids:
	case 0xcf:
	// Sapphire Rapids:
	case 0x8f:
	CPU = "sapphirerapids";
	*Type = X86::INTEL_COREI7;
	*Subtype = X86::INTEL_COREI7_SAPPHIRERAPIDS;
	break;

	case 0x1c: // Most 45 nm Intel Atom processors
	case 0x26: // 45 nm Atom Lincroft
	case 0x27: // 32 nm Atom Medfield
	case 0x35: // 32 nm Atom Midview
	case 0x36: // 32 nm Atom Midview
	CPU = "bonnell";
	*Type = X86::INTEL_BONNELL;
	break;

	// Atom Silvermont codes from the Intel software optimization guide.
	case 0x37:
	case 0x4a:
	case 0x4d:
	case 0x5a:
	case 0x5d:
	case 0x4c: // really airmont
	CPU = "silvermont";
	*Type = X86::INTEL_SILVERMONT;
	break;
	// Goldmont:
	case 0x5c: // Apollo Lake
	case 0x5f: // Denverton
	CPU = "goldmont";
	*Type = X86::INTEL_GOLDMONT;
	break;
	case 0x7a:
	CPU = "goldmont-plus";
	*Type = X86::INTEL_GOLDMONT_PLUS;
	break;
	case 0x86:
	case 0x8a: // Lakefield
	case 0x96: // Elkhart Lake
	case 0x9c: // Jasper Lake
	CPU = "tremont";
	*Type = X86::INTEL_TREMONT;
	break;

	// Sierraforest:
	case 0xaf:
	CPU = "sierraforest";
	*Type = X86::INTEL_SIERRAFOREST;
	break;

	// Grandridge:
	case 0xb6:
	CPU = "grandridge";
	*Type = X86::INTEL_GRANDRIDGE;
	break;

	// Clearwaterforest:
	case 0xdd:
	CPU = "clearwaterforest";
	*Type = X86::INTEL_CLEARWATERFOREST;
	break;

	// Xeon Phi (Knights Landing + Knights Mill):
	case 0x57:
	CPU = "knl";
	*Type = X86::INTEL_KNL;
	break;
	case 0x85:
	CPU = "knm";
	*Type = X86::INTEL_KNM;
	break;

	default: // Unknown family 6 CPU, try to guess.
	// Don't both with Type/Subtype here, they aren't used by the caller.
	// They're used above to keep the code in sync with compiler-rt.
	// TODO detect tigerlake host from model
	if (testFeature(X86::FEATURE_AVX512VP2INTERSECT)) {
	CPU = "tigerlake";
	} else if (testFeature(X86::FEATURE_AVX512VBMI2)) {
	CPU = "icelake-client";
	} else if (testFeature(X86::FEATURE_AVX512VBMI)) {
	CPU = "cannonlake";
	} else if (testFeature(X86::FEATURE_AVX512BF16)) {
	CPU = "cooperlake";
	} else if (testFeature(X86::FEATURE_AVX512VNNI)) {
	CPU = "cascadelake";
	} else if (testFeature(X86::FEATURE_AVX512VL)) {
	CPU = "skylake-avx512";
	} else if (testFeature(X86::FEATURE_CLFLUSHOPT)) {
	if (testFeature(X86::FEATURE_SHA))
	CPU = "goldmont";
	else
	CPU = "skylake";
	} else if (testFeature(X86::FEATURE_ADX)) {
	CPU = "broadwell";
	} else if (testFeature(X86::FEATURE_AVX2)) {
	CPU = "haswell";
	} else if (testFeature(X86::FEATURE_AVX)) {
	CPU = "sandybridge";
	} else if (testFeature(X86::FEATURE_SSE4_2)) {
	if (testFeature(X86::FEATURE_MOVBE))
	CPU = "silvermont";
	else
	CPU = "nehalem";
	} else if (testFeature(X86::FEATURE_SSE4_1)) {
	CPU = "penryn";
	} else if (testFeature(X86::FEATURE_SSSE3)) {
	if (testFeature(X86::FEATURE_MOVBE))
	CPU = "bonnell";
	else
	CPU = "core2";
	} else if (testFeature(X86::FEATURE_64BIT)) {
	CPU = "core2";
	} else if (testFeature(X86::FEATURE_SSE3)) {
	CPU = "yonah";
	} else if (testFeature(X86::FEATURE_SSE2)) {
	CPU = "pentium-m";
	} else if (testFeature(X86::FEATURE_SSE)) {
	CPU = "pentium3";
	} else if (testFeature(X86::FEATURE_MMX)) {
	CPU = "pentium2";
	} else {
	CPU = "pentiumpro";
	}
	break;
	}
	break;
	case 15: {
	if (testFeature(X86::FEATURE_64BIT)) {
	CPU = "nocona";
	break;
	}
	if (testFeature(X86::FEATURE_SSE3)) {
	CPU = "prescott";
	break;
	}
	CPU = "pentium4";
	break;
	}
	default:
	break; // Unknown.
	}

	return CPU;
	}

	static const char *getAMDProcessorTypeAndSubtype(unsigned Family,
	unsigned Model,
	const unsigned *Features,
	unsigned *Type,
	unsigned *Subtype) {
	const char *CPU = 0;

	switch (Family) {
	case 4:
	CPU = "i486";
	break;
	case 5:
	CPU = "pentium";
	switch (Model) {
	case 6:
	case 7:
	CPU = "k6";
	break;
	case 8:
	CPU = "k6-2";
	break;
	case 9:
	case 13:
	CPU = "k6-3";
	break;
	case 10:
	CPU = "geode";
	break;
	}
	break;
	case 6:
	if (testFeature(X86::FEATURE_SSE)) {
	CPU = "athlon-xp";
	break;
	}
	CPU = "athlon";
	break;
	case 15:
	if (testFeature(X86::FEATURE_SSE3)) {
	CPU = "k8-sse3";
	break;
	}
	CPU = "k8";
	break;
	case 16:
	CPU = "amdfam10";
	*Type = X86::AMDFAM10H; // "amdfam10"
	switch (Model) {
	case 2:
	*Subtype = X86::AMDFAM10H_BARCELONA;
	break;
	case 4:
	*Subtype = X86::AMDFAM10H_SHANGHAI;
	break;
	case 8:
	*Subtype = X86::AMDFAM10H_ISTANBUL;
	break;
	}
	break;
	case 20:
	CPU = "btver1";
	*Type = X86::AMD_BTVER1;
	break;
	case 21:
	CPU = "bdver1";
	*Type = X86::AMDFAM15H;
	if (Model >= 0x60 && Model <= 0x7f) {
	CPU = "bdver4";
	*Subtype = X86::AMDFAM15H_BDVER4;
	break; // 60h-7Fh: Excavator
	}
	if (Model >= 0x30 && Model <= 0x3f) {
	CPU = "bdver3";
	*Subtype = X86::AMDFAM15H_BDVER3;
	break; // 30h-3Fh: Steamroller
	}
	if ((Model >= 0x10 && Model <= 0x1f) \|\| Model == 0x02) {
	CPU = "bdver2";
	*Subtype = X86::AMDFAM15H_BDVER2;
	break; // 02h, 10h-1Fh: Piledriver
	}
	if (Model <= 0x0f) {
	*Subtype = X86::AMDFAM15H_BDVER1;
	break; // 00h-0Fh: Bulldozer
	}
	break;
	case 22:
	CPU = "btver2";
	*Type = X86::AMD_BTVER2;
	break;
	case 23:
	CPU = "znver1";
	*Type = X86::AMDFAM17H;
	if ((Model >= 0x30 && Model <= 0x3f) \|\| (Model == 0x47) \|\|
	(Model >= 0x60 && Model <= 0x67) \|\| (Model >= 0x68 && Model <= 0x6f) \|\|
	(Model >= 0x70 && Model <= 0x7f) \|\| (Model >= 0x84 && Model <= 0x87) \|\|
	(Model >= 0x90 && Model <= 0x97) \|\| (Model >= 0x98 && Model <= 0x9f) \|\|
	(Model >= 0xa0 && Model <= 0xaf)) {
	// Family 17h Models 30h-3Fh (Starship) Zen 2
	// Family 17h Models 47h (Cardinal) Zen 2
	// Family 17h Models 60h-67h (Renoir) Zen 2
	// Family 17h Models 68h-6Fh (Lucienne) Zen 2
	// Family 17h Models 70h-7Fh (Matisse) Zen 2
	// Family 17h Models 84h-87h (ProjectX) Zen 2
	// Family 17h Models 90h-97h (VanGogh) Zen 2
	// Family 17h Models 98h-9Fh (Mero) Zen 2
	// Family 17h Models A0h-AFh (Mendocino) Zen 2
	CPU = "znver2";
	*Subtype = X86::AMDFAM17H_ZNVER2;
	break;
	}
	if ((Model >= 0x10 && Model <= 0x1f) \|\| (Model >= 0x20 && Model <= 0x2f)) {
	// Family 17h Models 10h-1Fh (Raven1) Zen
	// Family 17h Models 10h-1Fh (Picasso) Zen+
	// Family 17h Models 20h-2Fh (Raven2 x86) Zen
	*Subtype = X86::AMDFAM17H_ZNVER1;
	break;
	}
	break;
	case 25:
	CPU = "znver3";
	*Type = X86::AMDFAM19H;
	if (Model <= 0x0f \|\| (Model >= 0x20 && Model <= 0x2f) \|\|
	(Model >= 0x30 && Model <= 0x3f) \|\| (Model >= 0x40 && Model <= 0x4f) \|\|
	(Model >= 0x50 && Model <= 0x5f)) {
	// Family 19h Models 00h-0Fh (Genesis, Chagall) Zen 3
	// Family 19h Models 20h-2Fh (Vermeer) Zen 3
	// Family 19h Models 30h-3Fh (Badami) Zen 3
	// Family 19h Models 40h-4Fh (Rembrandt) Zen 3+
	// Family 19h Models 50h-5Fh (Cezanne) Zen 3
	*Subtype = X86::AMDFAM19H_ZNVER3;
	break;
	}
	if ((Model >= 0x10 && Model <= 0x1f) \|\| (Model >= 0x60 && Model <= 0x6f) \|\|
	(Model >= 0x70 && Model <= 0x77) \|\| (Model >= 0x78 && Model <= 0x7f) \|\|
	(Model >= 0xa0 && Model <= 0xaf)) {
	// Family 19h Models 10h-1Fh (Stones; Storm Peak) Zen 4
	// Family 19h Models 60h-6Fh (Raphael) Zen 4
	// Family 19h Models 70h-77h (Phoenix, Hawkpoint1) Zen 4
	// Family 19h Models 78h-7Fh (Phoenix 2, Hawkpoint2) Zen 4
	// Family 19h Models A0h-AFh (Stones-Dense) Zen 4
	CPU = "znver4";
	*Subtype = X86::AMDFAM19H_ZNVER4;
	break; // "znver4"
	}
	break; // family 19h
	+ case 26:
	+ CPU = "znver5";
	+ *Type = X86::AMDFAM1AH;
	+ if (Model <= 0x77) {
	+ // Models 00h-0Fh (Breithorn).
	+ // Models 10h-1Fh (Breithorn-Dense).
	+ // Models 20h-2Fh (Strix 1).
	+ // Models 30h-37h (Strix 2).
	+ // Models 38h-3Fh (Strix 3).
	+ // Models 40h-4Fh (Granite Ridge).
	+ // Models 50h-5Fh (Weisshorn).
	+ // Models 60h-6Fh (Krackan1).
	+ // Models 70h-77h (Sarlak).
	+ CPU = "znver5";
	+ *Subtype = X86::AMDFAM1AH_ZNVER5;
	+ break; // "znver5"
	+ }
	+ break;
	+
	default:
	break; // Unknown AMD CPU.
	}

	return CPU;
	}

	#undef testFeature

	static void getAvailableFeatures(unsigned ECX, unsigned EDX, unsigned MaxLeaf,
	unsigned *Features) {
	unsigned EAX, EBX;

	auto setFeature = [&](unsigned F) {
	Features[F / 32] \|= 1U << (F % 32);
	};

	if ((EDX >> 15) & 1)
	setFeature(X86::FEATURE_CMOV);
	if ((EDX >> 23) & 1)
	setFeature(X86::FEATURE_MMX);
	if ((EDX >> 25) & 1)
	setFeature(X86::FEATURE_SSE);
	if ((EDX >> 26) & 1)
	setFeature(X86::FEATURE_SSE2);

	if ((ECX >> 0) & 1)
	setFeature(X86::FEATURE_SSE3);
	if ((ECX >> 1) & 1)
	setFeature(X86::FEATURE_PCLMUL);
	if ((ECX >> 9) & 1)
	setFeature(X86::FEATURE_SSSE3);
	if ((ECX >> 12) & 1)
	setFeature(X86::FEATURE_FMA);
	if ((ECX >> 19) & 1)
	setFeature(X86::FEATURE_SSE4_1);
	if ((ECX >> 20) & 1) {
	setFeature(X86::FEATURE_SSE4_2);
	setFeature(X86::FEATURE_CRC32);
	}
	if ((ECX >> 23) & 1)
	setFeature(X86::FEATURE_POPCNT);
	if ((ECX >> 25) & 1)
	setFeature(X86::FEATURE_AES);

	if ((ECX >> 22) & 1)
	setFeature(X86::FEATURE_MOVBE);

	// If CPUID indicates support for XSAVE, XRESTORE and AVX, and XGETBV
	// indicates that the AVX registers will be saved and restored on context
	// switch, then we have full AVX support.
	const unsigned AVXBits = (1 << 27) \| (1 << 28);
	bool HasAVX = ((ECX & AVXBits) == AVXBits) && !getX86XCR0(&EAX, &EDX) &&
	((EAX & 0x6) == 0x6);
	#if defined(__APPLE__)
	// Darwin lazily saves the AVX512 context on first use: trust that the OS will
	// save the AVX512 context if we use AVX512 instructions, even the bit is not
	// set right now.
	bool HasAVX512Save = true;
	#else
	// AVX512 requires additional context to be saved by the OS.
	bool HasAVX512Save = HasAVX && ((EAX & 0xe0) == 0xe0);
	#endif

	if (HasAVX)
	setFeature(X86::FEATURE_AVX);

	bool HasLeaf7 =
	MaxLeaf >= 0x7 && !getX86CpuIDAndInfoEx(0x7, 0x0, &EAX, &EBX, &ECX, &EDX);

	if (HasLeaf7 && ((EBX >> 3) & 1))
	setFeature(X86::FEATURE_BMI);
	if (HasLeaf7 && ((EBX >> 5) & 1) && HasAVX)
	setFeature(X86::FEATURE_AVX2);
	if (HasLeaf7 && ((EBX >> 8) & 1))
	setFeature(X86::FEATURE_BMI2);
	if (HasLeaf7 && ((EBX >> 16) & 1) && HasAVX512Save) {
	setFeature(X86::FEATURE_AVX512F);
	setFeature(X86::FEATURE_EVEX512);
	}
	if (HasLeaf7 && ((EBX >> 17) & 1) && HasAVX512Save)
	setFeature(X86::FEATURE_AVX512DQ);
	if (HasLeaf7 && ((EBX >> 19) & 1))
	setFeature(X86::FEATURE_ADX);
	if (HasLeaf7 && ((EBX >> 21) & 1) && HasAVX512Save)
	setFeature(X86::FEATURE_AVX512IFMA);
	if (HasLeaf7 && ((EBX >> 23) & 1))
	setFeature(X86::FEATURE_CLFLUSHOPT);
	if (HasLeaf7 && ((EBX >> 28) & 1) && HasAVX512Save)
	setFeature(X86::FEATURE_AVX512CD);
	if (HasLeaf7 && ((EBX >> 29) & 1))
	setFeature(X86::FEATURE_SHA);
	if (HasLeaf7 && ((EBX >> 30) & 1) && HasAVX512Save)
	setFeature(X86::FEATURE_AVX512BW);
	if (HasLeaf7 && ((EBX >> 31) & 1) && HasAVX512Save)
	setFeature(X86::FEATURE_AVX512VL);

	if (HasLeaf7 && ((ECX >> 1) & 1) && HasAVX512Save)
	setFeature(X86::FEATURE_AVX512VBMI);
	if (HasLeaf7 && ((ECX >> 6) & 1) && HasAVX512Save)
	setFeature(X86::FEATURE_AVX512VBMI2);
	if (HasLeaf7 && ((ECX >> 8) & 1))
	setFeature(X86::FEATURE_GFNI);
	if (HasLeaf7 && ((ECX >> 10) & 1) && HasAVX)
	setFeature(X86::FEATURE_VPCLMULQDQ);
	if (HasLeaf7 && ((ECX >> 11) & 1) && HasAVX512Save)
	setFeature(X86::FEATURE_AVX512VNNI);
	if (HasLeaf7 && ((ECX >> 12) & 1) && HasAVX512Save)
	setFeature(X86::FEATURE_AVX512BITALG);
	if (HasLeaf7 && ((ECX >> 14) & 1) && HasAVX512Save)
	setFeature(X86::FEATURE_AVX512VPOPCNTDQ);

	if (HasLeaf7 && ((EDX >> 2) & 1) && HasAVX512Save)
	setFeature(X86::FEATURE_AVX5124VNNIW);
	if (HasLeaf7 && ((EDX >> 3) & 1) && HasAVX512Save)
	setFeature(X86::FEATURE_AVX5124FMAPS);
	if (HasLeaf7 && ((EDX >> 8) & 1) && HasAVX512Save)
	setFeature(X86::FEATURE_AVX512VP2INTERSECT);

	// EAX from subleaf 0 is the maximum subleaf supported. Some CPUs don't
	// return all 0s for invalid subleaves so check the limit.
	bool HasLeaf7Subleaf1 =
	HasLeaf7 && EAX >= 1 &&
	!getX86CpuIDAndInfoEx(0x7, 0x1, &EAX, &EBX, &ECX, &EDX);
	if (HasLeaf7Subleaf1 && ((EAX >> 5) & 1) && HasAVX512Save)
	setFeature(X86::FEATURE_AVX512BF16);

	unsigned MaxExtLevel;
	getX86CpuIDAndInfo(0x80000000, &MaxExtLevel, &EBX, &ECX, &EDX);

	bool HasExtLeaf1 = MaxExtLevel >= 0x80000001 &&
	!getX86CpuIDAndInfo(0x80000001, &EAX, &EBX, &ECX, &EDX);
	if (HasExtLeaf1 && ((ECX >> 6) & 1))
	setFeature(X86::FEATURE_SSE4_A);
	if (HasExtLeaf1 && ((ECX >> 11) & 1))
	setFeature(X86::FEATURE_XOP);
	if (HasExtLeaf1 && ((ECX >> 16) & 1))
	setFeature(X86::FEATURE_FMA4);

	if (HasExtLeaf1 && ((EDX >> 29) & 1))
	setFeature(X86::FEATURE_64BIT);
	}

	StringRef sys::getHostCPUName() {
	unsigned MaxLeaf = 0;
	const VendorSignatures Vendor = getVendorSignature(&MaxLeaf);
	if (Vendor == VendorSignatures::UNKNOWN)
	return "generic";

	unsigned EAX = 0, EBX = 0, ECX = 0, EDX = 0;
	getX86CpuIDAndInfo(0x1, &EAX, &EBX, &ECX, &EDX);

	unsigned Family = 0, Model = 0;
	unsigned Features[(X86::CPU_FEATURE_MAX + 31) / 32] = {0};
	detectX86FamilyModel(EAX, &Family, &Model);
	getAvailableFeatures(ECX, EDX, MaxLeaf, Features);

	// These aren't consumed in this file, but we try to keep some source code the
	// same or similar to compiler-rt.
	unsigned Type = 0;
	unsigned Subtype = 0;

	StringRef CPU;

	if (Vendor == VendorSignatures::GENUINE_INTEL) {
	CPU = getIntelProcessorTypeAndSubtype(Family, Model, Features, &Type,
	&Subtype);
	} else if (Vendor == VendorSignatures::AUTHENTIC_AMD) {
	CPU = getAMDProcessorTypeAndSubtype(Family, Model, Features, &Type,
	&Subtype);
	}

	if (!CPU.empty())
	return CPU;

	return "generic";
	}

	#elif defined(__APPLE__) && defined(__powerpc__)
	StringRef sys::getHostCPUName() {
	host_basic_info_data_t hostInfo;
	mach_msg_type_number_t infoCount;

	infoCount = HOST_BASIC_INFO_COUNT;
	mach_port_t hostPort = mach_host_self();
	host_info(hostPort, HOST_BASIC_INFO, (host_info_t)&hostInfo,
	&infoCount);
	mach_port_deallocate(mach_task_self(), hostPort);

	if (hostInfo.cpu_type != CPU_TYPE_POWERPC)
	return "generic";

	switch (hostInfo.cpu_subtype) {
	case CPU_SUBTYPE_POWERPC_601:
	return "601";
	case CPU_SUBTYPE_POWERPC_602:
	return "602";
	case CPU_SUBTYPE_POWERPC_603:
	return "603";
	case CPU_SUBTYPE_POWERPC_603e:
	return "603e";
	case CPU_SUBTYPE_POWERPC_603ev:
	return "603ev";
	case CPU_SUBTYPE_POWERPC_604:
	return "604";
	case CPU_SUBTYPE_POWERPC_604e:
	return "604e";
	case CPU_SUBTYPE_POWERPC_620:
	return "620";
	case CPU_SUBTYPE_POWERPC_750:
	return "750";
	case CPU_SUBTYPE_POWERPC_7400:
	return "7400";
	case CPU_SUBTYPE_POWERPC_7450:
	return "7450";
	case CPU_SUBTYPE_POWERPC_970:
	return "970";
	default:;
	}

	return "generic";
	}
	#elif defined(__linux__) && defined(__powerpc__)
	StringRef sys::getHostCPUName() {
	std::unique_ptr<llvm::MemoryBuffer> P = getProcCpuinfoContent();
	StringRef Content = P ? P->getBuffer() : "";
	return detail::getHostCPUNameForPowerPC(Content);
	}
	#elif defined(__linux__) && (defined(__arm__) \|\| defined(__aarch64__))
	StringRef sys::getHostCPUName() {
	std::unique_ptr<llvm::MemoryBuffer> P = getProcCpuinfoContent();
	StringRef Content = P ? P->getBuffer() : "";
	return detail::getHostCPUNameForARM(Content);
	}
	#elif defined(__linux__) && defined(__s390x__)
	StringRef sys::getHostCPUName() {
	std::unique_ptr<llvm::MemoryBuffer> P = getProcCpuinfoContent();
	StringRef Content = P ? P->getBuffer() : "";
	return detail::getHostCPUNameForS390x(Content);
	}
	#elif defined(__MVS__)
	StringRef sys::getHostCPUName() {
	// Get pointer to Communications Vector Table (CVT).
	// The pointer is located at offset 16 of the Prefixed Save Area (PSA).
	// It is stored as 31 bit pointer and will be zero-extended to 64 bit.
	int StartToCVTOffset = reinterpret_cast<int >(0x10);
	// Since its stored as a 31-bit pointer, get the 4 bytes from the start
	// of address.
	int ReadValue = *StartToCVTOffset;
	// Explicitly clear the high order bit.
	ReadValue = (ReadValue & 0x7FFFFFFF);
	char CVT = reinterpret_cast<char >(ReadValue);
	// The model number is located in the CVT prefix at offset -6 and stored as
	// signless packed decimal.
	uint16_t Id = (uint16_t )&CVT[-6];
	// Convert number to integer.
	Id = decodePackedBCD<uint16_t>(Id, false);
	// Check for vector support. It's stored in field CVTFLAG5 (offset 244),
	// bit CVTVEF (X'80'). The facilities list is part of the PSA but the vector
	// extension can only be used if bit CVTVEF is on.
	bool HaveVectorSupport = CVT[244] & 0x80;
	return getCPUNameFromS390Model(Id, HaveVectorSupport);
	}
	#elif defined(__APPLE__) && (defined(__arm__) \|\| defined(__aarch64__))
	#define CPUFAMILY_ARM_SWIFT 0x1e2d6381
	#define CPUFAMILY_ARM_CYCLONE 0x37a09642
	#define CPUFAMILY_ARM_TYPHOON 0x2c91a47e
	#define CPUFAMILY_ARM_TWISTER 0x92fb37c8
	#define CPUFAMILY_ARM_HURRICANE 0x67ceee93
	#define CPUFAMILY_ARM_MONSOON_MISTRAL 0xe81e7ef6
	#define CPUFAMILY_ARM_VORTEX_TEMPEST 0x07d34b9f
	#define CPUFAMILY_ARM_LIGHTNING_THUNDER 0x462504d2
	#define CPUFAMILY_ARM_FIRESTORM_ICESTORM 0x1b588bb3
	#define CPUFAMILY_ARM_BLIZZARD_AVALANCHE 0xda33d83d
	#define CPUFAMILY_ARM_EVEREST_SAWTOOTH 0x8765edea

	StringRef sys::getHostCPUName() {
	uint32_t Family;
	size_t Length = sizeof(Family);
	sysctlbyname("hw.cpufamily", &Family, &Length, NULL, 0);

	switch (Family) {
	case CPUFAMILY_ARM_SWIFT:
	return "swift";
	case CPUFAMILY_ARM_CYCLONE:
	return "apple-a7";
	case CPUFAMILY_ARM_TYPHOON:
	return "apple-a8";
	case CPUFAMILY_ARM_TWISTER:
	return "apple-a9";
	case CPUFAMILY_ARM_HURRICANE:
	return "apple-a10";
	case CPUFAMILY_ARM_MONSOON_MISTRAL:
	return "apple-a11";
	case CPUFAMILY_ARM_VORTEX_TEMPEST:
	return "apple-a12";
	case CPUFAMILY_ARM_LIGHTNING_THUNDER:
	return "apple-a13";
	case CPUFAMILY_ARM_FIRESTORM_ICESTORM:
	return "apple-m1";
	case CPUFAMILY_ARM_BLIZZARD_AVALANCHE:
	return "apple-m2";
	case CPUFAMILY_ARM_EVEREST_SAWTOOTH:
	return "apple-m3";
	default:
	// Default to the newest CPU we know about.
	return "apple-m3";
	}
	}
	#elif defined(_AIX)
	StringRef sys::getHostCPUName() {
	switch (_system_configuration.implementation) {
	case POWER_4:
	if (_system_configuration.version == PV_4_3)
	return "970";
	return "pwr4";
	case POWER_5:
	if (_system_configuration.version == PV_5)
	return "pwr5";
	return "pwr5x";
	case POWER_6:
	if (_system_configuration.version == PV_6_Compat)
	return "pwr6";
	return "pwr6x";
	case POWER_7:
	return "pwr7";
	case POWER_8:
	return "pwr8";
	case POWER_9:
	return "pwr9";
	// TODO: simplify this once the macro is available in all OS levels.
	#ifdef POWER_10
	case POWER_10:
	#else
	case 0x40000:
	#endif
	return "pwr10";
	#ifdef POWER_11
	case POWER_11:
	#else
	case 0x80000:
	#endif
	return "pwr11";
	default:
	return "generic";
	}
	}
	#elif defined(__loongarch__)
	StringRef sys::getHostCPUName() {
	// Use processor id to detect cpu name.
	uint32_t processor_id;
	__asm__("cpucfg %[prid], $zero\n\t" : [prid] "=r"(processor_id));
	// Refer PRID_SERIES_MASK in linux kernel: arch/loongarch/include/asm/cpu.h.
	switch (processor_id & 0xf000) {
	case 0xc000: // Loongson 64bit, 4-issue
	return "la464";
	case 0xd000: // Loongson 64bit, 6-issue
	return "la664";
	// TODO: Others.
	default:
	break;
	}
	return "generic";
	}
	#elif defined(__riscv)
	StringRef sys::getHostCPUName() {
	#if defined(__linux__)
	std::unique_ptr<llvm::MemoryBuffer> P = getProcCpuinfoContent();
	StringRef Content = P ? P->getBuffer() : "";
	StringRef Name = detail::getHostCPUNameForRISCV(Content);
	if (!Name.empty())
	return Name;
	#endif
	#if __riscv_xlen == 64
	return "generic-rv64";
	#elif __riscv_xlen == 32
	return "generic-rv32";
	#else
	#error "Unhandled value of __riscv_xlen"
	#endif
	}
	#elif defined(__sparc__)
	#if defined(__linux__)
	StringRef sys::detail::getHostCPUNameForSPARC(StringRef ProcCpuinfoContent) {
	SmallVector<StringRef> Lines;
	ProcCpuinfoContent.split(Lines, "\n");

	// Look for cpu line to determine cpu name
	StringRef Cpu;
	for (unsigned I = 0, E = Lines.size(); I != E; ++I) {
	if (Lines[I].starts_with("cpu")) {
	Cpu = Lines[I].substr(5).ltrim("\t :");
	break;
	}
	}

	return StringSwitch<const char *>(Cpu)
	.StartsWith("SuperSparc", "supersparc")
	.StartsWith("HyperSparc", "hypersparc")
	.StartsWith("SpitFire", "ultrasparc")
	.StartsWith("BlackBird", "ultrasparc")
	.StartsWith("Sabre", " ultrasparc")
	.StartsWith("Hummingbird", "ultrasparc")
	.StartsWith("Cheetah", "ultrasparc3")
	.StartsWith("Jalapeno", "ultrasparc3")
	.StartsWith("Jaguar", "ultrasparc3")
	.StartsWith("Panther", "ultrasparc3")
	.StartsWith("Serrano", "ultrasparc3")
	.StartsWith("UltraSparc T1", "niagara")
	.StartsWith("UltraSparc T2", "niagara2")
	.StartsWith("UltraSparc T3", "niagara3")
	.StartsWith("UltraSparc T4", "niagara4")
	.StartsWith("UltraSparc T5", "niagara4")
	.StartsWith("LEON", "leon3")
	// niagara7/m8 not supported by LLVM yet.
	.StartsWith("SPARC-M7", "niagara4" /* "niagara7" */)
	.StartsWith("SPARC-S7", "niagara4" /* "niagara7" */)
	.StartsWith("SPARC-M8", "niagara4" /* "m8" */)
	.Default("generic");
	}
	#endif

	StringRef sys::getHostCPUName() {
	#if defined(__linux__)
	std::unique_ptr<llvm::MemoryBuffer> P = getProcCpuinfoContent();
	StringRef Content = P ? P->getBuffer() : "";
	return detail::getHostCPUNameForSPARC(Content);
	#elif defined(__sun__) && defined(__svr4__)
	char *buf = NULL;
	kstat_ctl_t *kc;
	kstat_t *ksp;
	kstat_named_t *brand = NULL;

	kc = kstat_open();
	if (kc != NULL) {
	ksp = kstat_lookup(kc, const_cast<char *>("cpu_info"), -1, NULL);
	if (ksp != NULL && kstat_read(kc, ksp, NULL) != -1 &&
	ksp->ks_type == KSTAT_TYPE_NAMED)
	brand =
	(kstat_named_t )kstat_data_lookup(ksp, const_cast<char >("brand"));
	if (brand != NULL && brand->data_type == KSTAT_DATA_STRING)
	buf = KSTAT_NAMED_STR_PTR(brand);
	}
	kstat_close(kc);

	return StringSwitch<const char *>(buf)
	.Case("TMS390S10", "supersparc") // Texas Instruments microSPARC I
	.Case("TMS390Z50", "supersparc") // Texas Instruments SuperSPARC I
	.Case("TMS390Z55",
	"supersparc") // Texas Instruments SuperSPARC I with SuperCache
	.Case("MB86904", "supersparc") // Fujitsu microSPARC II
	.Case("MB86907", "supersparc") // Fujitsu TurboSPARC
	.Case("RT623", "hypersparc") // Ross hyperSPARC
	.Case("RT625", "hypersparc")
	.Case("RT626", "hypersparc")
	.Case("UltraSPARC-I", "ultrasparc")
	.Case("UltraSPARC-II", "ultrasparc")
	.Case("UltraSPARC-IIe", "ultrasparc")
	.Case("UltraSPARC-IIi", "ultrasparc")
	.Case("SPARC64-III", "ultrasparc")
	.Case("SPARC64-IV", "ultrasparc")
	.Case("UltraSPARC-III", "ultrasparc3")
	.Case("UltraSPARC-III+", "ultrasparc3")
	.Case("UltraSPARC-IIIi", "ultrasparc3")
	.Case("UltraSPARC-IIIi+", "ultrasparc3")
	.Case("UltraSPARC-IV", "ultrasparc3")
	.Case("UltraSPARC-IV+", "ultrasparc3")
	.Case("SPARC64-V", "ultrasparc3")
	.Case("SPARC64-VI", "ultrasparc3")
	.Case("SPARC64-VII", "ultrasparc3")
	.Case("UltraSPARC-T1", "niagara")
	.Case("UltraSPARC-T2", "niagara2")
	.Case("UltraSPARC-T2", "niagara2")
	.Case("UltraSPARC-T2+", "niagara2")
	.Case("SPARC-T3", "niagara3")
	.Case("SPARC-T4", "niagara4")
	.Case("SPARC-T5", "niagara4")
	// niagara7/m8 not supported by LLVM yet.
	.Case("SPARC-M7", "niagara4" /* "niagara7" */)
	.Case("SPARC-S7", "niagara4" /* "niagara7" */)
	.Case("SPARC-M8", "niagara4" /* "m8" */)
	.Default("generic");
	#else
	return "generic";
	#endif
	}
	#else
	StringRef sys::getHostCPUName() { return "generic"; }
	namespace llvm {
	namespace sys {
	namespace detail {
	namespace x86 {

	VendorSignatures getVendorSignature(unsigned *MaxLeaf) {
	return VendorSignatures::UNKNOWN;
	}

	} // namespace x86
	} // namespace detail
	} // namespace sys
	} // namespace llvm
	#endif

	#if defined(__i386__) \|\| defined(_M_IX86) \|\| \
	defined(__x86_64__) \|\| defined(_M_X64)
	const StringMap<bool> sys::getHostCPUFeatures() {
	unsigned EAX = 0, EBX = 0, ECX = 0, EDX = 0;
	unsigned MaxLevel;
	StringMap<bool> Features;

	if (getX86CpuIDAndInfo(0, &MaxLevel, &EBX, &ECX, &EDX) \|\| MaxLevel < 1)
	return Features;

	getX86CpuIDAndInfo(1, &EAX, &EBX, &ECX, &EDX);

	Features["cx8"] = (EDX >> 8) & 1;
	Features["cmov"] = (EDX >> 15) & 1;
	Features["mmx"] = (EDX >> 23) & 1;
	Features["fxsr"] = (EDX >> 24) & 1;
	Features["sse"] = (EDX >> 25) & 1;
	Features["sse2"] = (EDX >> 26) & 1;

	Features["sse3"] = (ECX >> 0) & 1;
	Features["pclmul"] = (ECX >> 1) & 1;
	Features["ssse3"] = (ECX >> 9) & 1;
	Features["cx16"] = (ECX >> 13) & 1;
	Features["sse4.1"] = (ECX >> 19) & 1;
	Features["sse4.2"] = (ECX >> 20) & 1;
	Features["crc32"] = Features["sse4.2"];
	Features["movbe"] = (ECX >> 22) & 1;
	Features["popcnt"] = (ECX >> 23) & 1;
	Features["aes"] = (ECX >> 25) & 1;
	Features["rdrnd"] = (ECX >> 30) & 1;

	// If CPUID indicates support for XSAVE, XRESTORE and AVX, and XGETBV
	// indicates that the AVX registers will be saved and restored on context
	// switch, then we have full AVX support.
	bool HasXSave = ((ECX >> 27) & 1) && !getX86XCR0(&EAX, &EDX);
	bool HasAVXSave = HasXSave && ((ECX >> 28) & 1) && ((EAX & 0x6) == 0x6);
	#if defined(__APPLE__)
	// Darwin lazily saves the AVX512 context on first use: trust that the OS will
	// save the AVX512 context if we use AVX512 instructions, even the bit is not
	// set right now.
	bool HasAVX512Save = true;
	#else
	// AVX512 requires additional context to be saved by the OS.
	bool HasAVX512Save = HasAVXSave && ((EAX & 0xe0) == 0xe0);
	#endif
	// AMX requires additional context to be saved by the OS.
	const unsigned AMXBits = (1 << 17) \| (1 << 18);
	bool HasAMXSave = HasXSave && ((EAX & AMXBits) == AMXBits);

	Features["avx"] = HasAVXSave;
	Features["fma"] = ((ECX >> 12) & 1) && HasAVXSave;
	// Only enable XSAVE if OS has enabled support for saving YMM state.
	Features["xsave"] = ((ECX >> 26) & 1) && HasAVXSave;
	Features["f16c"] = ((ECX >> 29) & 1) && HasAVXSave;

	unsigned MaxExtLevel;
	getX86CpuIDAndInfo(0x80000000, &MaxExtLevel, &EBX, &ECX, &EDX);

	bool HasExtLeaf1 = MaxExtLevel >= 0x80000001 &&
	!getX86CpuIDAndInfo(0x80000001, &EAX, &EBX, &ECX, &EDX);
	Features["sahf"] = HasExtLeaf1 && ((ECX >> 0) & 1);
	Features["lzcnt"] = HasExtLeaf1 && ((ECX >> 5) & 1);
	Features["sse4a"] = HasExtLeaf1 && ((ECX >> 6) & 1);
	Features["prfchw"] = HasExtLeaf1 && ((ECX >> 8) & 1);
	Features["xop"] = HasExtLeaf1 && ((ECX >> 11) & 1) && HasAVXSave;
	Features["lwp"] = HasExtLeaf1 && ((ECX >> 15) & 1);
	Features["fma4"] = HasExtLeaf1 && ((ECX >> 16) & 1) && HasAVXSave;
	Features["tbm"] = HasExtLeaf1 && ((ECX >> 21) & 1);
	Features["mwaitx"] = HasExtLeaf1 && ((ECX >> 29) & 1);

	Features["64bit"] = HasExtLeaf1 && ((EDX >> 29) & 1);

	// Miscellaneous memory related features, detected by
	// using the 0x80000008 leaf of the CPUID instruction
	bool HasExtLeaf8 = MaxExtLevel >= 0x80000008 &&
	!getX86CpuIDAndInfo(0x80000008, &EAX, &EBX, &ECX, &EDX);
	Features["clzero"] = HasExtLeaf8 && ((EBX >> 0) & 1);
	Features["rdpru"] = HasExtLeaf8 && ((EBX >> 4) & 1);
	Features["wbnoinvd"] = HasExtLeaf8 && ((EBX >> 9) & 1);

	bool HasLeaf7 =
	MaxLevel >= 7 && !getX86CpuIDAndInfoEx(0x7, 0x0, &EAX, &EBX, &ECX, &EDX);

	Features["fsgsbase"] = HasLeaf7 && ((EBX >> 0) & 1);
	Features["sgx"] = HasLeaf7 && ((EBX >> 2) & 1);
	Features["bmi"] = HasLeaf7 && ((EBX >> 3) & 1);
	// AVX2 is only supported if we have the OS save support from AVX.
	Features["avx2"] = HasLeaf7 && ((EBX >> 5) & 1) && HasAVXSave;
	Features["bmi2"] = HasLeaf7 && ((EBX >> 8) & 1);
	Features["invpcid"] = HasLeaf7 && ((EBX >> 10) & 1);
	Features["rtm"] = HasLeaf7 && ((EBX >> 11) & 1);
	// AVX512 is only supported if the OS supports the context save for it.
	Features["avx512f"] = HasLeaf7 && ((EBX >> 16) & 1) && HasAVX512Save;
	if (Features["avx512f"])
	Features["evex512"] = true;
	Features["avx512dq"] = HasLeaf7 && ((EBX >> 17) & 1) && HasAVX512Save;
	Features["rdseed"] = HasLeaf7 && ((EBX >> 18) & 1);
	Features["adx"] = HasLeaf7 && ((EBX >> 19) & 1);
	Features["avx512ifma"] = HasLeaf7 && ((EBX >> 21) & 1) && HasAVX512Save;
	Features["clflushopt"] = HasLeaf7 && ((EBX >> 23) & 1);
	Features["clwb"] = HasLeaf7 && ((EBX >> 24) & 1);
	Features["avx512cd"] = HasLeaf7 && ((EBX >> 28) & 1) && HasAVX512Save;
	Features["sha"] = HasLeaf7 && ((EBX >> 29) & 1);
	Features["avx512bw"] = HasLeaf7 && ((EBX >> 30) & 1) && HasAVX512Save;
	Features["avx512vl"] = HasLeaf7 && ((EBX >> 31) & 1) && HasAVX512Save;

	Features["avx512vbmi"] = HasLeaf7 && ((ECX >> 1) & 1) && HasAVX512Save;
	Features["pku"] = HasLeaf7 && ((ECX >> 4) & 1);
	Features["waitpkg"] = HasLeaf7 && ((ECX >> 5) & 1);
	Features["avx512vbmi2"] = HasLeaf7 && ((ECX >> 6) & 1) && HasAVX512Save;
	Features["shstk"] = HasLeaf7 && ((ECX >> 7) & 1);
	Features["gfni"] = HasLeaf7 && ((ECX >> 8) & 1);
	Features["vaes"] = HasLeaf7 && ((ECX >> 9) & 1) && HasAVXSave;
	Features["vpclmulqdq"] = HasLeaf7 && ((ECX >> 10) & 1) && HasAVXSave;
	Features["avx512vnni"] = HasLeaf7 && ((ECX >> 11) & 1) && HasAVX512Save;
	Features["avx512bitalg"] = HasLeaf7 && ((ECX >> 12) & 1) && HasAVX512Save;
	Features["avx512vpopcntdq"] = HasLeaf7 && ((ECX >> 14) & 1) && HasAVX512Save;
	Features["rdpid"] = HasLeaf7 && ((ECX >> 22) & 1);
	Features["kl"] = HasLeaf7 && ((ECX >> 23) & 1); // key locker
	Features["cldemote"] = HasLeaf7 && ((ECX >> 25) & 1);
	Features["movdiri"] = HasLeaf7 && ((ECX >> 27) & 1);
	Features["movdir64b"] = HasLeaf7 && ((ECX >> 28) & 1);
	Features["enqcmd"] = HasLeaf7 && ((ECX >> 29) & 1);

	Features["uintr"] = HasLeaf7 && ((EDX >> 5) & 1);
	Features["avx512vp2intersect"] =
	HasLeaf7 && ((EDX >> 8) & 1) && HasAVX512Save;
	Features["serialize"] = HasLeaf7 && ((EDX >> 14) & 1);
	Features["tsxldtrk"] = HasLeaf7 && ((EDX >> 16) & 1);
	// There are two CPUID leafs which information associated with the pconfig
	// instruction:
	// EAX=0x7, ECX=0x0 indicates the availability of the instruction (via the 18th
	// bit of EDX), while the EAX=0x1b leaf returns information on the
	// availability of specific pconfig leafs.
	// The target feature here only refers to the the first of these two.
	// Users might need to check for the availability of specific pconfig
	// leaves using cpuid, since that information is ignored while
	// detecting features using the "-march=native" flag.
	// For more info, see X86 ISA docs.
	Features["pconfig"] = HasLeaf7 && ((EDX >> 18) & 1);
	Features["amx-bf16"] = HasLeaf7 && ((EDX >> 22) & 1) && HasAMXSave;
	Features["avx512fp16"] = HasLeaf7 && ((EDX >> 23) & 1) && HasAVX512Save;
	Features["amx-tile"] = HasLeaf7 && ((EDX >> 24) & 1) && HasAMXSave;
	Features["amx-int8"] = HasLeaf7 && ((EDX >> 25) & 1) && HasAMXSave;
	// EAX from subleaf 0 is the maximum subleaf supported. Some CPUs don't
	// return all 0s for invalid subleaves so check the limit.
	bool HasLeaf7Subleaf1 =
	HasLeaf7 && EAX >= 1 &&
	!getX86CpuIDAndInfoEx(0x7, 0x1, &EAX, &EBX, &ECX, &EDX);
	Features["sha512"] = HasLeaf7Subleaf1 && ((EAX >> 0) & 1);
	Features["sm3"] = HasLeaf7Subleaf1 && ((EAX >> 1) & 1);
	Features["sm4"] = HasLeaf7Subleaf1 && ((EAX >> 2) & 1);
	Features["raoint"] = HasLeaf7Subleaf1 && ((EAX >> 3) & 1);
	Features["avxvnni"] = HasLeaf7Subleaf1 && ((EAX >> 4) & 1) && HasAVXSave;
	Features["avx512bf16"] = HasLeaf7Subleaf1 && ((EAX >> 5) & 1) && HasAVX512Save;
	Features["amx-fp16"] = HasLeaf7Subleaf1 && ((EAX >> 21) & 1) && HasAMXSave;
	Features["cmpccxadd"] = HasLeaf7Subleaf1 && ((EAX >> 7) & 1);
	Features["hreset"] = HasLeaf7Subleaf1 && ((EAX >> 22) & 1);
	Features["avxifma"] = HasLeaf7Subleaf1 && ((EAX >> 23) & 1) && HasAVXSave;
	Features["avxvnniint8"] = HasLeaf7Subleaf1 && ((EDX >> 4) & 1) && HasAVXSave;
	Features["avxneconvert"] = HasLeaf7Subleaf1 && ((EDX >> 5) & 1) && HasAVXSave;
	Features["amx-complex"] = HasLeaf7Subleaf1 && ((EDX >> 8) & 1) && HasAMXSave;
	Features["avxvnniint16"] = HasLeaf7Subleaf1 && ((EDX >> 10) & 1) && HasAVXSave;
	Features["prefetchi"] = HasLeaf7Subleaf1 && ((EDX >> 14) & 1);
	Features["usermsr"] = HasLeaf7Subleaf1 && ((EDX >> 15) & 1);
	Features["avx10.1-256"] = HasLeaf7Subleaf1 && ((EDX >> 19) & 1);
	bool HasAPXF = HasLeaf7Subleaf1 && ((EDX >> 21) & 1);
	Features["egpr"] = HasAPXF;
	Features["push2pop2"] = HasAPXF;
	Features["ppx"] = HasAPXF;
	Features["ndd"] = HasAPXF;
	Features["ccmp"] = HasAPXF;
	Features["cf"] = HasAPXF;

	bool HasLeafD = MaxLevel >= 0xd &&
	!getX86CpuIDAndInfoEx(0xd, 0x1, &EAX, &EBX, &ECX, &EDX);

	// Only enable XSAVE if OS has enabled support for saving YMM state.
	Features["xsaveopt"] = HasLeafD && ((EAX >> 0) & 1) && HasAVXSave;
	Features["xsavec"] = HasLeafD && ((EAX >> 1) & 1) && HasAVXSave;
	Features["xsaves"] = HasLeafD && ((EAX >> 3) & 1) && HasAVXSave;

	bool HasLeaf14 = MaxLevel >= 0x14 &&
	!getX86CpuIDAndInfoEx(0x14, 0x0, &EAX, &EBX, &ECX, &EDX);

	Features["ptwrite"] = HasLeaf14 && ((EBX >> 4) & 1);

	bool HasLeaf19 =
	MaxLevel >= 0x19 && !getX86CpuIDAndInfo(0x19, &EAX, &EBX, &ECX, &EDX);
	Features["widekl"] = HasLeaf7 && HasLeaf19 && ((EBX >> 2) & 1);

	bool HasLeaf24 =
	MaxLevel >= 0x24 && !getX86CpuIDAndInfo(0x24, &EAX, &EBX, &ECX, &EDX);
	Features["avx10.1-512"] =
	Features["avx10.1-256"] && HasLeaf24 && ((EBX >> 18) & 1);

	return Features;
	}
	#elif defined(__linux__) && (defined(__arm__) \|\| defined(__aarch64__))
	const StringMap<bool> sys::getHostCPUFeatures() {
	StringMap<bool> Features;
	std::unique_ptr<llvm::MemoryBuffer> P = getProcCpuinfoContent();
	if (!P)
	return Features;

	SmallVector<StringRef, 32> Lines;
	P->getBuffer().split(Lines, "\n");

	SmallVector<StringRef, 32> CPUFeatures;

	// Look for the CPU features.
	for (unsigned I = 0, E = Lines.size(); I != E; ++I)
	if (Lines[I].starts_with("Features")) {
	Lines[I].split(CPUFeatures, ' ');
	break;
	}

	#if defined(__aarch64__)
	// Keep track of which crypto features we have seen
	enum { CAP_AES = 0x1, CAP_PMULL = 0x2, CAP_SHA1 = 0x4, CAP_SHA2 = 0x8 };
	uint32_t crypto = 0;
	#endif

	for (unsigned I = 0, E = CPUFeatures.size(); I != E; ++I) {
	StringRef LLVMFeatureStr = StringSwitch<StringRef>(CPUFeatures[I])
	#if defined(__aarch64__)
	.Case("asimd", "neon")
	.Case("fp", "fp-armv8")
	.Case("crc32", "crc")
	.Case("atomics", "lse")
	.Case("sve", "sve")
	.Case("sve2", "sve2")
	#else
	.Case("half", "fp16")
	.Case("neon", "neon")
	.Case("vfpv3", "vfp3")
	.Case("vfpv3d16", "vfp3d16")
	.Case("vfpv4", "vfp4")
	.Case("idiva", "hwdiv-arm")
	.Case("idivt", "hwdiv")
	#endif
	.Default("");

	#if defined(__aarch64__)
	// We need to check crypto separately since we need all of the crypto
	// extensions to enable the subtarget feature
	if (CPUFeatures[I] == "aes")
	crypto \|= CAP_AES;
	else if (CPUFeatures[I] == "pmull")
	crypto \|= CAP_PMULL;
	else if (CPUFeatures[I] == "sha1")
	crypto \|= CAP_SHA1;
	else if (CPUFeatures[I] == "sha2")
	crypto \|= CAP_SHA2;
	#endif

	if (LLVMFeatureStr != "")
	Features[LLVMFeatureStr] = true;
	}

	#if defined(__aarch64__)
	// If we have all crypto bits we can add the feature
	if (crypto == (CAP_AES \| CAP_PMULL \| CAP_SHA1 \| CAP_SHA2))
	Features["crypto"] = true;
	#endif

	return Features;
	}
	#elif defined(_WIN32) && (defined(__aarch64__) \|\| defined(_M_ARM64))
	const StringMap<bool> sys::getHostCPUFeatures() {
	StringMap<bool> Features;

	if (IsProcessorFeaturePresent(PF_ARM_NEON_INSTRUCTIONS_AVAILABLE))
	Features["neon"] = true;
	if (IsProcessorFeaturePresent(PF_ARM_V8_CRC32_INSTRUCTIONS_AVAILABLE))
	Features["crc"] = true;
	if (IsProcessorFeaturePresent(PF_ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE))
	Features["crypto"] = true;

	return Features;
	}
	#elif defined(__linux__) && defined(__loongarch__)
	#include <sys/auxv.h>
	const StringMap<bool> sys::getHostCPUFeatures() {
	unsigned long hwcap = getauxval(AT_HWCAP);
	bool HasFPU = hwcap & (1UL << 3); // HWCAP_LOONGARCH_FPU
	uint32_t cpucfg2 = 0x2;
	__asm__("cpucfg %[cpucfg2], %[cpucfg2]\n\t" : [cpucfg2] "+r"(cpucfg2));

	StringMap<bool> Features;

	Features["f"] = HasFPU && (cpucfg2 & (1U << 1)); // CPUCFG.2.FP_SP
	Features["d"] = HasFPU && (cpucfg2 & (1U << 2)); // CPUCFG.2.FP_DP

	Features["lsx"] = hwcap & (1UL << 4); // HWCAP_LOONGARCH_LSX
	Features["lasx"] = hwcap & (1UL << 5); // HWCAP_LOONGARCH_LASX
	Features["lvz"] = hwcap & (1UL << 9); // HWCAP_LOONGARCH_LVZ

	return Features;
	}
	#elif defined(__linux__) && defined(__riscv)
	// struct riscv_hwprobe
	struct RISCVHwProbe {
	int64_t Key;
	uint64_t Value;
	};
	const StringMap<bool> sys::getHostCPUFeatures() {
	RISCVHwProbe Query[]{{/RISCV_HWPROBE_KEY_BASE_BEHAVIOR=/3, 0},
	{/RISCV_HWPROBE_KEY_IMA_EXT_0=/4, 0}};
	int Ret = syscall(/__NR_riscv_hwprobe=/258, /pairs=/Query,
	/pair_count=/std::size(Query), /cpu_count=/0,
	/cpus=/0, /flags=/0);
	if (Ret != 0)
	return {};

	StringMap<bool> Features;
	uint64_t BaseMask = Query[0].Value;
	// Check whether RISCV_HWPROBE_BASE_BEHAVIOR_IMA is set.
	if (BaseMask & 1) {
	Features["i"] = true;
	Features["m"] = true;
	Features["a"] = true;
	}

	uint64_t ExtMask = Query[1].Value;
	Features["f"] = ExtMask & (1 << 0); // RISCV_HWPROBE_IMA_FD
	Features["d"] = ExtMask & (1 << 0); // RISCV_HWPROBE_IMA_FD
	Features["c"] = ExtMask & (1 << 1); // RISCV_HWPROBE_IMA_C
	Features["v"] = ExtMask & (1 << 2); // RISCV_HWPROBE_IMA_V
	Features["zba"] = ExtMask & (1 << 3); // RISCV_HWPROBE_EXT_ZBA
	Features["zbb"] = ExtMask & (1 << 4); // RISCV_HWPROBE_EXT_ZBB
	Features["zbs"] = ExtMask & (1 << 5); // RISCV_HWPROBE_EXT_ZBS
	Features["zicboz"] = ExtMask & (1 << 6); // RISCV_HWPROBE_EXT_ZICBOZ
	Features["zbc"] = ExtMask & (1 << 7); // RISCV_HWPROBE_EXT_ZBC
	Features["zbkb"] = ExtMask & (1 << 8); // RISCV_HWPROBE_EXT_ZBKB
	Features["zbkc"] = ExtMask & (1 << 9); // RISCV_HWPROBE_EXT_ZBKC
	Features["zbkx"] = ExtMask & (1 << 10); // RISCV_HWPROBE_EXT_ZBKX
	Features["zknd"] = ExtMask & (1 << 11); // RISCV_HWPROBE_EXT_ZKND
	Features["zkne"] = ExtMask & (1 << 12); // RISCV_HWPROBE_EXT_ZKNE
	Features["zknh"] = ExtMask & (1 << 13); // RISCV_HWPROBE_EXT_ZKNH
	Features["zksed"] = ExtMask & (1 << 14); // RISCV_HWPROBE_EXT_ZKSED
	Features["zksh"] = ExtMask & (1 << 15); // RISCV_HWPROBE_EXT_ZKSH
	Features["zkt"] = ExtMask & (1 << 16); // RISCV_HWPROBE_EXT_ZKT
	Features["zvbb"] = ExtMask & (1 << 17); // RISCV_HWPROBE_EXT_ZVBB
	Features["zvbc"] = ExtMask & (1 << 18); // RISCV_HWPROBE_EXT_ZVBC
	Features["zvkb"] = ExtMask & (1 << 19); // RISCV_HWPROBE_EXT_ZVKB
	Features["zvkg"] = ExtMask & (1 << 20); // RISCV_HWPROBE_EXT_ZVKG
	Features["zvkned"] = ExtMask & (1 << 21); // RISCV_HWPROBE_EXT_ZVKNED
	Features["zvknha"] = ExtMask & (1 << 22); // RISCV_HWPROBE_EXT_ZVKNHA
	Features["zvknhb"] = ExtMask & (1 << 23); // RISCV_HWPROBE_EXT_ZVKNHB
	Features["zvksed"] = ExtMask & (1 << 24); // RISCV_HWPROBE_EXT_ZVKSED
	Features["zvksh"] = ExtMask & (1 << 25); // RISCV_HWPROBE_EXT_ZVKSH
	Features["zvkt"] = ExtMask & (1 << 26); // RISCV_HWPROBE_EXT_ZVKT
	Features["zfh"] = ExtMask & (1 << 27); // RISCV_HWPROBE_EXT_ZFH
	Features["zfhmin"] = ExtMask & (1 << 28); // RISCV_HWPROBE_EXT_ZFHMIN
	Features["zihintntl"] = ExtMask & (1 << 29); // RISCV_HWPROBE_EXT_ZIHINTNTL
	Features["zvfh"] = ExtMask & (1 << 30); // RISCV_HWPROBE_EXT_ZVFH
	Features["zvfhmin"] = ExtMask & (1ULL << 31); // RISCV_HWPROBE_EXT_ZVFHMIN
	Features["zfa"] = ExtMask & (1ULL << 32); // RISCV_HWPROBE_EXT_ZFA
	Features["ztso"] = ExtMask & (1ULL << 33); // RISCV_HWPROBE_EXT_ZTSO
	// TODO: Re-enable zacas when it is marked non-experimental again.
	// Features["zacas"] = ExtMask & (1ULL << 34); // RISCV_HWPROBE_EXT_ZACAS
	Features["zicond"] = ExtMask & (1ULL << 35); // RISCV_HWPROBE_EXT_ZICOND
	Features["zihintpause"] =
	ExtMask & (1ULL << 36); // RISCV_HWPROBE_EXT_ZIHINTPAUSE

	// TODO: set unaligned-scalar-mem if RISCV_HWPROBE_KEY_MISALIGNED_PERF returns
	// RISCV_HWPROBE_MISALIGNED_FAST.

	return Features;
	}
	#else
	const StringMap<bool> sys::getHostCPUFeatures() { return {}; }
	#endif

	#if __APPLE__
	/// \returns the \p triple, but with the Host's arch spliced in.
	static Triple withHostArch(Triple T) {
	#if defined(__arm__)
	T.setArch(Triple::arm);
	T.setArchName("arm");
	#elif defined(__arm64e__)
	T.setArch(Triple::aarch64, Triple::AArch64SubArch_arm64e);
	T.setArchName("arm64e");
	#elif defined(__aarch64__)
	T.setArch(Triple::aarch64);
	T.setArchName("arm64");
	#elif defined(__x86_64h__)
	T.setArch(Triple::x86_64);
	T.setArchName("x86_64h");
	#elif defined(__x86_64__)
	T.setArch(Triple::x86_64);
	T.setArchName("x86_64");
	#elif defined(__i386__)
	T.setArch(Triple::x86);
	T.setArchName("i386");
	#elif defined(__powerpc__)
	T.setArch(Triple::ppc);
	T.setArchName("powerpc");
	#else
	# error "Unimplemented host arch fixup"
	#endif
	return T;
	}
	#endif

	std::string sys::getProcessTriple() {
	std::string TargetTripleString = updateTripleOSVersion(LLVM_HOST_TRIPLE);
	Triple PT(Triple::normalize(TargetTripleString));

	#if __APPLE__
	/// In Universal builds, LLVM_HOST_TRIPLE will have the wrong arch in one of
	/// the slices. This fixes that up.
	PT = withHostArch(PT);
	#endif

	if (sizeof(void *) == 8 && PT.isArch32Bit())
	PT = PT.get64BitArchVariant();
	if (sizeof(void *) == 4 && PT.isArch64Bit())
	PT = PT.get32BitArchVariant();

	return PT.str();
	}

	void sys::printDefaultTargetAndDetectedCPU(raw_ostream &OS) {
	#if LLVM_VERSION_PRINTER_SHOW_HOST_TARGET_INFO
	std::string CPU = std::string(sys::getHostCPUName());
	if (CPU == "generic")
	CPU = "(unknown)";
	OS << " Default target: " << sys::getDefaultTargetTriple() << '\n'
	<< " Host CPU: " << CPU << '\n';
	#endif
	}
	diff --git a/contrib/llvm-project/llvm/lib/TargetParser/X86TargetParser.cpp b/contrib/llvm-project/llvm/lib/TargetParser/X86TargetParser.cpp
	index dcf9130052ac..a6f3b5ba5d33 100644
	--- a/contrib/llvm-project/llvm/lib/TargetParser/X86TargetParser.cpp
	+++ b/contrib/llvm-project/llvm/lib/TargetParser/X86TargetParser.cpp
	@@ -1,773 +1,778 @@
	//===-- X86TargetParser - Parser for X86 features ---------------- C++ --===//
	//
	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
	// See https://llvm.org/LICENSE.txt for license information.
	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
	//
	//===----------------------------------------------------------------------===//
	//
	// This file implements a target parser to recognise X86 hardware features.
	//
	//===----------------------------------------------------------------------===//

	#include "llvm/TargetParser/X86TargetParser.h"
	#include "llvm/ADT/Bitset.h"
	#include "llvm/ADT/StringSwitch.h"
	#include <numeric>

	using namespace llvm;
	using namespace llvm::X86;

	namespace {

	using FeatureBitset = Bitset<X86::CPU_FEATURE_MAX>;

	struct ProcInfo {
	StringLiteral Name;
	X86::CPUKind Kind;
	unsigned KeyFeature;
	FeatureBitset Features;
	char Mangling;
	bool OnlyForCPUDispatchSpecific;
	};

	struct FeatureInfo {
	StringLiteral NameWithPlus;
	FeatureBitset ImpliedFeatures;

	StringRef getName(bool WithPlus = false) const {
	assert(NameWithPlus[0] == '+' && "Expected string to start with '+'");
	if (WithPlus)
	return NameWithPlus;
	return NameWithPlus.drop_front();
	}
	};

	} // end anonymous namespace

	#define X86_FEATURE(ENUM, STRING) \
	constexpr FeatureBitset Feature##ENUM = {X86::FEATURE_##ENUM};
	#include "llvm/TargetParser/X86TargetParser.def"

	// Pentium with MMX.
	constexpr FeatureBitset FeaturesPentiumMMX =
	FeatureX87 \| FeatureCMPXCHG8B \| FeatureMMX;

	// Pentium 2 and 3.
	constexpr FeatureBitset FeaturesPentium2 =
	FeatureX87 \| FeatureCMPXCHG8B \| FeatureMMX \| FeatureFXSR \| FeatureCMOV;
	constexpr FeatureBitset FeaturesPentium3 = FeaturesPentium2 \| FeatureSSE;

	// Pentium 4 CPUs
	constexpr FeatureBitset FeaturesPentium4 = FeaturesPentium3 \| FeatureSSE2;
	constexpr FeatureBitset FeaturesPrescott = FeaturesPentium4 \| FeatureSSE3;
	constexpr FeatureBitset FeaturesNocona =
	FeaturesPrescott \| Feature64BIT \| FeatureCMPXCHG16B;

	// Basic 64-bit capable CPU.
	constexpr FeatureBitset FeaturesX86_64 = FeaturesPentium4 \| Feature64BIT;
	constexpr FeatureBitset FeaturesX86_64_V2 = FeaturesX86_64 \| FeatureSAHF \|
	FeaturePOPCNT \| FeatureCRC32 \|
	FeatureSSE4_2 \| FeatureCMPXCHG16B;
	constexpr FeatureBitset FeaturesX86_64_V3 =
	FeaturesX86_64_V2 \| FeatureAVX2 \| FeatureBMI \| FeatureBMI2 \| FeatureF16C \|
	FeatureFMA \| FeatureLZCNT \| FeatureMOVBE \| FeatureXSAVE;
	constexpr FeatureBitset FeaturesX86_64_V4 = FeaturesX86_64_V3 \| FeatureEVEX512 \|
	FeatureAVX512BW \| FeatureAVX512CD \|
	FeatureAVX512DQ \| FeatureAVX512VL;

	// Intel Core CPUs
	constexpr FeatureBitset FeaturesCore2 =
	FeaturesNocona \| FeatureSAHF \| FeatureSSSE3;
	constexpr FeatureBitset FeaturesPenryn = FeaturesCore2 \| FeatureSSE4_1;
	constexpr FeatureBitset FeaturesNehalem =
	FeaturesPenryn \| FeaturePOPCNT \| FeatureCRC32 \| FeatureSSE4_2;
	constexpr FeatureBitset FeaturesWestmere = FeaturesNehalem \| FeaturePCLMUL;
	constexpr FeatureBitset FeaturesSandyBridge =
	FeaturesWestmere \| FeatureAVX \| FeatureXSAVE \| FeatureXSAVEOPT;
	constexpr FeatureBitset FeaturesIvyBridge =
	FeaturesSandyBridge \| FeatureF16C \| FeatureFSGSBASE \| FeatureRDRND;
	constexpr FeatureBitset FeaturesHaswell =
	FeaturesIvyBridge \| FeatureAVX2 \| FeatureBMI \| FeatureBMI2 \| FeatureFMA \|
	FeatureINVPCID \| FeatureLZCNT \| FeatureMOVBE;
	constexpr FeatureBitset FeaturesBroadwell =
	FeaturesHaswell \| FeatureADX \| FeaturePRFCHW \| FeatureRDSEED;

	// Intel Knights Landing and Knights Mill
	// Knights Landing has feature parity with Broadwell.
	constexpr FeatureBitset FeaturesKNL = FeaturesBroadwell \| FeatureAES \|
	FeatureAVX512F \| FeatureEVEX512 \|
	FeatureAVX512CD;
	constexpr FeatureBitset FeaturesKNM = FeaturesKNL \| FeatureAVX512VPOPCNTDQ;

	// Intel Skylake processors.
	constexpr FeatureBitset FeaturesSkylakeClient =
	FeaturesBroadwell \| FeatureAES \| FeatureCLFLUSHOPT \| FeatureXSAVEC \|
	FeatureXSAVES \| FeatureSGX;
	// SkylakeServer inherits all SkylakeClient features except SGX.
	// FIXME: That doesn't match gcc.
	constexpr FeatureBitset FeaturesSkylakeServer =
	(FeaturesSkylakeClient & ~FeatureSGX) \| FeatureAVX512F \| FeatureEVEX512 \|
	FeatureAVX512CD \| FeatureAVX512DQ \| FeatureAVX512BW \| FeatureAVX512VL \|
	FeatureCLWB \| FeaturePKU;
	constexpr FeatureBitset FeaturesCascadeLake =
	FeaturesSkylakeServer \| FeatureAVX512VNNI;
	constexpr FeatureBitset FeaturesCooperLake =
	FeaturesCascadeLake \| FeatureAVX512BF16;

	// Intel 10nm processors.
	constexpr FeatureBitset FeaturesCannonlake =
	FeaturesSkylakeClient \| FeatureAVX512F \| FeatureEVEX512 \| FeatureAVX512CD \|
	FeatureAVX512DQ \| FeatureAVX512BW \| FeatureAVX512VL \| FeatureAVX512IFMA \|
	FeatureAVX512VBMI \| FeaturePKU \| FeatureSHA;
	constexpr FeatureBitset FeaturesICLClient =
	FeaturesCannonlake \| FeatureAVX512BITALG \| FeatureAVX512VBMI2 \|
	FeatureAVX512VNNI \| FeatureAVX512VPOPCNTDQ \| FeatureGFNI \| FeatureRDPID \|
	FeatureVAES \| FeatureVPCLMULQDQ;
	constexpr FeatureBitset FeaturesRocketlake = FeaturesICLClient & ~FeatureSGX;
	constexpr FeatureBitset FeaturesICLServer =
	FeaturesICLClient \| FeatureCLWB \| FeaturePCONFIG \| FeatureWBNOINVD;
	constexpr FeatureBitset FeaturesTigerlake =
	FeaturesICLClient \| FeatureAVX512VP2INTERSECT \| FeatureMOVDIR64B \|
	FeatureCLWB \| FeatureMOVDIRI \| FeatureSHSTK \| FeatureKL \| FeatureWIDEKL;
	constexpr FeatureBitset FeaturesSapphireRapids =
	FeaturesICLServer \| FeatureAMX_BF16 \| FeatureAMX_INT8 \| FeatureAMX_TILE \|
	FeatureAVX512BF16 \| FeatureAVX512FP16 \| FeatureAVXVNNI \| FeatureCLDEMOTE \|
	FeatureENQCMD \| FeatureMOVDIR64B \| FeatureMOVDIRI \| FeaturePTWRITE \|
	FeatureSERIALIZE \| FeatureSHSTK \| FeatureTSXLDTRK \| FeatureUINTR \|
	FeatureWAITPKG;
	constexpr FeatureBitset FeaturesGraniteRapids =
	FeaturesSapphireRapids \| FeatureAMX_FP16 \| FeaturePREFETCHI;

	// Intel Atom processors.
	// Bonnell has feature parity with Core2 and adds MOVBE.
	constexpr FeatureBitset FeaturesBonnell = FeaturesCore2 \| FeatureMOVBE;
	// Silvermont has parity with Westmere and Bonnell plus PRFCHW and RDRND.
	constexpr FeatureBitset FeaturesSilvermont =
	FeaturesBonnell \| FeaturesWestmere \| FeaturePRFCHW \| FeatureRDRND;
	constexpr FeatureBitset FeaturesGoldmont =
	FeaturesSilvermont \| FeatureAES \| FeatureCLFLUSHOPT \| FeatureFSGSBASE \|
	FeatureRDSEED \| FeatureSHA \| FeatureXSAVE \| FeatureXSAVEC \|
	FeatureXSAVEOPT \| FeatureXSAVES;
	constexpr FeatureBitset FeaturesGoldmontPlus =
	FeaturesGoldmont \| FeaturePTWRITE \| FeatureRDPID \| FeatureSGX;
	constexpr FeatureBitset FeaturesTremont =
	FeaturesGoldmontPlus \| FeatureCLWB \| FeatureGFNI;
	constexpr FeatureBitset FeaturesAlderlake =
	FeaturesTremont \| FeatureADX \| FeatureBMI \| FeatureBMI2 \| FeatureF16C \|
	FeatureFMA \| FeatureINVPCID \| FeatureLZCNT \| FeaturePCONFIG \| FeaturePKU \|
	FeatureSERIALIZE \| FeatureSHSTK \| FeatureVAES \| FeatureVPCLMULQDQ \|
	FeatureCLDEMOTE \| FeatureMOVDIR64B \| FeatureMOVDIRI \| FeatureWAITPKG \|
	FeatureAVXVNNI \| FeatureHRESET \| FeatureWIDEKL;
	constexpr FeatureBitset FeaturesSierraforest =
	FeaturesAlderlake \| FeatureCMPCCXADD \| FeatureAVXIFMA \| FeatureUINTR \|
	FeatureENQCMD \| FeatureAVXNECONVERT \| FeatureAVXVNNIINT8;
	constexpr FeatureBitset FeaturesArrowlakeS = FeaturesSierraforest \|
	FeatureAVXVNNIINT16 \| FeatureSHA512 \| FeatureSM3 \| FeatureSM4;
	constexpr FeatureBitset FeaturesPantherlake =
	FeaturesArrowlakeS \| FeaturePREFETCHI;
	constexpr FeatureBitset FeaturesClearwaterforest =
	FeaturesArrowlakeS \| FeatureUSERMSR \| FeaturePREFETCHI;

	// Geode Processor.
	constexpr FeatureBitset FeaturesGeode =
	FeatureX87 \| FeatureCMPXCHG8B \| FeatureMMX \| FeaturePRFCHW;

	// K6 processor.
	constexpr FeatureBitset FeaturesK6 = FeatureX87 \| FeatureCMPXCHG8B \| FeatureMMX;

	// K7 and K8 architecture processors.
	constexpr FeatureBitset FeaturesAthlon =
	FeatureX87 \| FeatureCMPXCHG8B \| FeatureMMX \| FeaturePRFCHW;
	constexpr FeatureBitset FeaturesAthlonXP =
	FeaturesAthlon \| FeatureFXSR \| FeatureSSE;
	constexpr FeatureBitset FeaturesK8 =
	FeaturesAthlonXP \| FeatureSSE2 \| Feature64BIT;
	constexpr FeatureBitset FeaturesK8SSE3 = FeaturesK8 \| FeatureSSE3;
	constexpr FeatureBitset FeaturesAMDFAM10 =
	FeaturesK8SSE3 \| FeatureCMPXCHG16B \| FeatureLZCNT \| FeaturePOPCNT \|
	FeaturePRFCHW \| FeatureSAHF \| FeatureSSE4_A;

	// Bobcat architecture processors.
	constexpr FeatureBitset FeaturesBTVER1 =
	FeatureX87 \| FeatureCMPXCHG8B \| FeatureCMPXCHG16B \| Feature64BIT \|
	FeatureFXSR \| FeatureLZCNT \| FeatureMMX \| FeaturePOPCNT \| FeaturePRFCHW \|
	FeatureSSE \| FeatureSSE2 \| FeatureSSE3 \| FeatureSSSE3 \| FeatureSSE4_A \|
	FeatureSAHF;
	constexpr FeatureBitset FeaturesBTVER2 =
	FeaturesBTVER1 \| FeatureAES \| FeatureAVX \| FeatureBMI \| FeatureCRC32 \|
	FeatureF16C \| FeatureMOVBE \| FeaturePCLMUL \| FeatureXSAVE \| FeatureXSAVEOPT;

	// AMD Bulldozer architecture processors.
	constexpr FeatureBitset FeaturesBDVER1 =
	FeatureX87 \| FeatureAES \| FeatureAVX \| FeatureCMPXCHG8B \|
	FeatureCMPXCHG16B \| FeatureCRC32 \| Feature64BIT \| FeatureFMA4 \|
	FeatureFXSR \| FeatureLWP \| FeatureLZCNT \| FeatureMMX \| FeaturePCLMUL \|
	FeaturePOPCNT \| FeaturePRFCHW \| FeatureSAHF \| FeatureSSE \| FeatureSSE2 \|
	FeatureSSE3 \| FeatureSSSE3 \| FeatureSSE4_1 \| FeatureSSE4_2 \| FeatureSSE4_A \|
	FeatureXOP \| FeatureXSAVE;
	constexpr FeatureBitset FeaturesBDVER2 =
	FeaturesBDVER1 \| FeatureBMI \| FeatureFMA \| FeatureF16C \| FeatureTBM;
	constexpr FeatureBitset FeaturesBDVER3 =
	FeaturesBDVER2 \| FeatureFSGSBASE \| FeatureXSAVEOPT;
	constexpr FeatureBitset FeaturesBDVER4 = FeaturesBDVER3 \| FeatureAVX2 \|
	FeatureBMI2 \| FeatureMOVBE \|
	FeatureMWAITX \| FeatureRDRND;

	// AMD Zen architecture processors.
	constexpr FeatureBitset FeaturesZNVER1 =
	FeatureX87 \| FeatureADX \| FeatureAES \| FeatureAVX \| FeatureAVX2 \|
	FeatureBMI \| FeatureBMI2 \| FeatureCLFLUSHOPT \| FeatureCLZERO \|
	FeatureCMPXCHG8B \| FeatureCMPXCHG16B \| FeatureCRC32 \| Feature64BIT \|
	FeatureF16C \| FeatureFMA \| FeatureFSGSBASE \| FeatureFXSR \| FeatureLZCNT \|
	FeatureMMX \| FeatureMOVBE \| FeatureMWAITX \| FeaturePCLMUL \| FeaturePOPCNT \|
	FeaturePRFCHW \| FeatureRDRND \| FeatureRDSEED \| FeatureSAHF \| FeatureSHA \|
	FeatureSSE \| FeatureSSE2 \| FeatureSSE3 \| FeatureSSSE3 \| FeatureSSE4_1 \|
	FeatureSSE4_2 \| FeatureSSE4_A \| FeatureXSAVE \| FeatureXSAVEC \|
	FeatureXSAVEOPT \| FeatureXSAVES;
	constexpr FeatureBitset FeaturesZNVER2 = FeaturesZNVER1 \| FeatureCLWB \|
	FeatureRDPID \| FeatureRDPRU \|
	FeatureWBNOINVD;
	static constexpr FeatureBitset FeaturesZNVER3 = FeaturesZNVER2 \|
	FeatureINVPCID \| FeaturePKU \|
	FeatureVAES \| FeatureVPCLMULQDQ;
	static constexpr FeatureBitset FeaturesZNVER4 =
	FeaturesZNVER3 \| FeatureAVX512F \| FeatureEVEX512 \| FeatureAVX512CD \|
	FeatureAVX512DQ \| FeatureAVX512BW \| FeatureAVX512VL \| FeatureAVX512IFMA \|
	FeatureAVX512VBMI \| FeatureAVX512VBMI2 \| FeatureAVX512VNNI \|
	FeatureAVX512BITALG \| FeatureAVX512VPOPCNTDQ \| FeatureAVX512BF16 \|
	FeatureGFNI \| FeatureSHSTK;

	+static constexpr FeatureBitset FeaturesZNVER5 =
	+ FeaturesZNVER4 \| FeatureAVXVNNI \| FeatureMOVDIRI \| FeatureMOVDIR64B \|
	+ FeatureAVX512VP2INTERSECT \| FeaturePREFETCHI \| FeatureAVXVNNI;
	+
	// D151696 tranplanted Mangling and OnlyForCPUDispatchSpecific from
	// X86TargetParser.def to here. They are assigned by following ways:
	// 1. Copy the mangling from the original CPU_SPEICIFC MACROs. If no, assign
	// to '\0' by default, which means not support cpu_specific/dispatch feature.
	// 2. set OnlyForCPUDispatchSpecific as true if this cpu name was not
	// listed here before, which means it doesn't support -march, -mtune and so on.
	// FIXME: Remove OnlyForCPUDispatchSpecific after all CPUs here support both
	// cpu_dispatch/specific() feature and -march, -mtune, and so on.
	// clang-format off
	constexpr ProcInfo Processors[] = {
	// Empty processor. Include X87 and CMPXCHG8 for backwards compatibility.
	{ {""}, CK_None, ~0U, FeatureX87 \| FeatureCMPXCHG8B, '\0', false },
	{ {"generic"}, CK_None, ~0U, FeatureX87 \| FeatureCMPXCHG8B \| Feature64BIT, 'A', true },
	// i386-generation processors.
	{ {"i386"}, CK_i386, ~0U, FeatureX87, '\0', false },
	// i486-generation processors.
	{ {"i486"}, CK_i486, ~0U, FeatureX87, '\0', false },
	{ {"winchip-c6"}, CK_WinChipC6, ~0U, FeaturesPentiumMMX, '\0', false },
	{ {"winchip2"}, CK_WinChip2, ~0U, FeaturesPentiumMMX \| FeaturePRFCHW, '\0', false },
	{ {"c3"}, CK_C3, ~0U, FeaturesPentiumMMX \| FeaturePRFCHW, '\0', false },
	// i586-generation processors, P5 microarchitecture based.
	{ {"i586"}, CK_i586, ~0U, FeatureX87 \| FeatureCMPXCHG8B, '\0', false },
	{ {"pentium"}, CK_Pentium, ~0U, FeatureX87 \| FeatureCMPXCHG8B, 'B', false },
	{ {"pentium-mmx"}, CK_PentiumMMX, ~0U, FeaturesPentiumMMX, '\0', false },
	{ {"pentium_mmx"}, CK_PentiumMMX, ~0U, FeaturesPentiumMMX, 'D', true },
	// i686-generation processors, P6 / Pentium M microarchitecture based.
	{ {"pentiumpro"}, CK_PentiumPro, ~0U, FeatureCMOV \| FeatureX87 \| FeatureCMPXCHG8B, 'C', false },
	{ {"pentium_pro"}, CK_PentiumPro, ~0U, FeatureCMOV \| FeatureX87 \| FeatureCMPXCHG8B, 'C', true },
	{ {"i686"}, CK_i686, ~0U, FeatureCMOV \| FeatureX87 \| FeatureCMPXCHG8B, '\0', false },
	{ {"pentium2"}, CK_Pentium2, ~0U, FeaturesPentium2, 'E', false },
	{ {"pentium_ii"}, CK_Pentium2, ~0U, FeaturesPentium2, 'E', true },
	{ {"pentium3"}, CK_Pentium3, ~0U, FeaturesPentium3, 'H', false },
	{ {"pentium3m"}, CK_Pentium3, ~0U, FeaturesPentium3, 'H', false },
	{ {"pentium_iii"}, CK_Pentium3, ~0U, FeaturesPentium3, 'H', true },
	{ {"pentium_iii_no_xmm_regs"}, CK_Pentium3, ~0U, FeaturesPentium3, 'H', true },
	{ {"pentium-m"}, CK_PentiumM, ~0U, FeaturesPentium4, '\0', false },
	{ {"pentium_m"}, CK_PentiumM, ~0U, FeaturesPentium4, 'K', true },
	{ {"c3-2"}, CK_C3_2, ~0U, FeaturesPentium3, '\0', false },
	{ {"yonah"}, CK_Yonah, ~0U, FeaturesPrescott, 'L', false },
	// Netburst microarchitecture based processors.
	{ {"pentium4"}, CK_Pentium4, ~0U, FeaturesPentium4, 'J', false },
	{ {"pentium4m"}, CK_Pentium4, ~0U, FeaturesPentium4, 'J', false },
	{ {"pentium_4"}, CK_Pentium4, ~0U, FeaturesPentium4, 'J', true },
	{ {"pentium_4_sse3"}, CK_Prescott, ~0U, FeaturesPrescott, 'L', true },
	{ {"prescott"}, CK_Prescott, ~0U, FeaturesPrescott, 'L', false },
	{ {"nocona"}, CK_Nocona, ~0U, FeaturesNocona, 'L', false },
	// Core microarchitecture based processors.
	{ {"core2"}, CK_Core2, FEATURE_SSSE3, FeaturesCore2, 'M', false },
	{ {"core_2_duo_ssse3"}, CK_Core2, ~0U, FeaturesCore2, 'M', true },
	{ {"penryn"}, CK_Penryn, ~0U, FeaturesPenryn, 'N', false },
	{ {"core_2_duo_sse4_1"}, CK_Penryn, ~0U, FeaturesPenryn, 'N', true },
	// Atom processors
	{ {"bonnell"}, CK_Bonnell, FEATURE_SSSE3, FeaturesBonnell, 'O', false },
	{ {"atom"}, CK_Bonnell, FEATURE_SSSE3, FeaturesBonnell, 'O', false },
	{ {"silvermont"}, CK_Silvermont, FEATURE_SSE4_2, FeaturesSilvermont, 'c', false },
	{ {"slm"}, CK_Silvermont, FEATURE_SSE4_2, FeaturesSilvermont, 'c', false },
	{ {"atom_sse4_2"}, CK_Nehalem, FEATURE_SSE4_2, FeaturesNehalem, 'c', true },
	{ {"atom_sse4_2_movbe"}, CK_Goldmont, FEATURE_SSE4_2, FeaturesGoldmont, 'd', true },
	{ {"goldmont"}, CK_Goldmont, FEATURE_SSE4_2, FeaturesGoldmont, 'i', false },
	{ {"goldmont-plus"}, CK_GoldmontPlus, FEATURE_SSE4_2, FeaturesGoldmontPlus, '\0', false },
	{ {"goldmont_plus"}, CK_GoldmontPlus, FEATURE_SSE4_2, FeaturesGoldmontPlus, 'd', true },
	{ {"tremont"}, CK_Tremont, FEATURE_SSE4_2, FeaturesTremont, 'd', false },
	// Nehalem microarchitecture based processors.
	{ {"nehalem"}, CK_Nehalem, FEATURE_SSE4_2, FeaturesNehalem, 'P', false },
	{ {"core_i7_sse4_2"}, CK_Nehalem, FEATURE_SSE4_2, FeaturesNehalem, 'P', true },
	{ {"corei7"}, CK_Nehalem, FEATURE_SSE4_2, FeaturesNehalem, 'P', false },
	// Westmere microarchitecture based processors.
	{ {"westmere"}, CK_Westmere, FEATURE_PCLMUL, FeaturesWestmere, 'Q', false },
	{ {"core_aes_pclmulqdq"}, CK_Nehalem, FEATURE_SSE4_2, FeaturesNehalem, 'Q', true },
	// Sandy Bridge microarchitecture based processors.
	{ {"sandybridge"}, CK_SandyBridge, FEATURE_AVX, FeaturesSandyBridge, 'R', false },
	{ {"core_2nd_gen_avx"}, CK_SandyBridge, FEATURE_AVX, FeaturesSandyBridge, 'R', true },
	{ {"corei7-avx"}, CK_SandyBridge, FEATURE_AVX, FeaturesSandyBridge, '\0', false },
	// Ivy Bridge microarchitecture based processors.
	{ {"ivybridge"}, CK_IvyBridge, FEATURE_AVX, FeaturesIvyBridge, 'S', false },
	{ {"core_3rd_gen_avx"}, CK_IvyBridge, FEATURE_AVX, FeaturesIvyBridge, 'S', true },
	{ {"core-avx-i"}, CK_IvyBridge, FEATURE_AVX, FeaturesIvyBridge, '\0', false },
	// Haswell microarchitecture based processors.
	{ {"haswell"}, CK_Haswell, FEATURE_AVX2, FeaturesHaswell, 'V', false },
	{ {"core-avx2"}, CK_Haswell, FEATURE_AVX2, FeaturesHaswell, '\0', false },
	{ {"core_4th_gen_avx"}, CK_Haswell, FEATURE_AVX2, FeaturesHaswell, 'V', true },
	{ {"core_4th_gen_avx_tsx"}, CK_Haswell, FEATURE_AVX2, FeaturesHaswell, 'W', true },
	// Broadwell microarchitecture based processors.
	{ {"broadwell"}, CK_Broadwell, FEATURE_AVX2, FeaturesBroadwell, 'X', false },
	{ {"core_5th_gen_avx"}, CK_Broadwell, FEATURE_AVX2, FeaturesBroadwell, 'X', true },
	{ {"core_5th_gen_avx_tsx"}, CK_Broadwell, FEATURE_AVX2, FeaturesBroadwell, 'Y', true },
	// Skylake client microarchitecture based processors.
	{ {"skylake"}, CK_SkylakeClient, FEATURE_AVX2, FeaturesSkylakeClient, 'b', false },
	// Skylake server microarchitecture based processors.
	{ {"skylake-avx512"}, CK_SkylakeServer, FEATURE_AVX512F, FeaturesSkylakeServer, '\0', false },
	{ {"skx"}, CK_SkylakeServer, FEATURE_AVX512F, FeaturesSkylakeServer, 'a', false },
	{ {"skylake_avx512"}, CK_SkylakeServer, FEATURE_AVX512F, FeaturesSkylakeServer, 'a', true },
	// Cascadelake Server microarchitecture based processors.
	{ {"cascadelake"}, CK_Cascadelake, FEATURE_AVX512VNNI, FeaturesCascadeLake, 'o', false },
	// Cooperlake Server microarchitecture based processors.
	{ {"cooperlake"}, CK_Cooperlake, FEATURE_AVX512BF16, FeaturesCooperLake, 'f', false },
	// Cannonlake client microarchitecture based processors.
	{ {"cannonlake"}, CK_Cannonlake, FEATURE_AVX512VBMI, FeaturesCannonlake, 'e', false },
	// Icelake client microarchitecture based processors.
	{ {"icelake-client"}, CK_IcelakeClient, FEATURE_AVX512VBMI2, FeaturesICLClient, '\0', false },
	{ {"icelake_client"}, CK_IcelakeClient, FEATURE_AVX512VBMI2, FeaturesICLClient, 'k', true },
	// Rocketlake microarchitecture based processors.
	{ {"rocketlake"}, CK_Rocketlake, FEATURE_AVX512VBMI2, FeaturesRocketlake, 'k', false },
	// Icelake server microarchitecture based processors.
	{ {"icelake-server"}, CK_IcelakeServer, FEATURE_AVX512VBMI2, FeaturesICLServer, '\0', false },
	{ {"icelake_server"}, CK_IcelakeServer, FEATURE_AVX512VBMI2, FeaturesICLServer, 'k', true },
	// Tigerlake microarchitecture based processors.
	{ {"tigerlake"}, CK_Tigerlake, FEATURE_AVX512VP2INTERSECT, FeaturesTigerlake, 'l', false },
	// Sapphire Rapids microarchitecture based processors.
	{ {"sapphirerapids"}, CK_SapphireRapids, FEATURE_AVX512FP16, FeaturesSapphireRapids, 'n', false },
	// Alderlake microarchitecture based processors.
	{ {"alderlake"}, CK_Alderlake, FEATURE_AVX2, FeaturesAlderlake, 'p', false },
	// Raptorlake microarchitecture based processors.
	{ {"raptorlake"}, CK_Raptorlake, FEATURE_AVX2, FeaturesAlderlake, 'p', false },
	// Meteorlake microarchitecture based processors.
	{ {"meteorlake"}, CK_Meteorlake, FEATURE_AVX2, FeaturesAlderlake, 'p', false },
	// Arrowlake microarchitecture based processors.
	{ {"arrowlake"}, CK_Arrowlake, FEATURE_AVX2, FeaturesSierraforest, 'p', false },
	{ {"arrowlake-s"}, CK_ArrowlakeS, FEATURE_AVX2, FeaturesArrowlakeS, '\0', false },
	{ {"arrowlake_s"}, CK_ArrowlakeS, FEATURE_AVX2, FeaturesArrowlakeS, 'p', true },
	// Lunarlake microarchitecture based processors.
	{ {"lunarlake"}, CK_Lunarlake, FEATURE_AVX2, FeaturesArrowlakeS, 'p', false },
	// Gracemont microarchitecture based processors.
	{ {"gracemont"}, CK_Gracemont, FEATURE_AVX2, FeaturesAlderlake, 'p', false },
	// Pantherlake microarchitecture based processors.
	{ {"pantherlake"}, CK_Lunarlake, FEATURE_AVX2, FeaturesPantherlake, 'p', false },
	// Sierraforest microarchitecture based processors.
	{ {"sierraforest"}, CK_Sierraforest, FEATURE_AVX2, FeaturesSierraforest, 'p', false },
	// Grandridge microarchitecture based processors.
	{ {"grandridge"}, CK_Grandridge, FEATURE_AVX2, FeaturesSierraforest, 'p', false },
	// Granite Rapids microarchitecture based processors.
	{ {"graniterapids"}, CK_Graniterapids, FEATURE_AVX512FP16, FeaturesGraniteRapids, 'n', false },
	// Granite Rapids D microarchitecture based processors.
	{ {"graniterapids-d"}, CK_GraniterapidsD, FEATURE_AVX512FP16, FeaturesGraniteRapids \| FeatureAMX_COMPLEX, '\0', false },
	{ {"graniterapids_d"}, CK_GraniterapidsD, FEATURE_AVX512FP16, FeaturesGraniteRapids \| FeatureAMX_COMPLEX, 'n', true },
	// Emerald Rapids microarchitecture based processors.
	{ {"emeraldrapids"}, CK_Emeraldrapids, FEATURE_AVX512FP16, FeaturesSapphireRapids, 'n', false },
	// Clearwaterforest microarchitecture based processors.
	{ {"clearwaterforest"}, CK_Lunarlake, FEATURE_AVX2, FeaturesClearwaterforest, 'p', false },
	// Knights Landing processor.
	{ {"knl"}, CK_KNL, FEATURE_AVX512F, FeaturesKNL, 'Z', false },
	{ {"mic_avx512"}, CK_KNL, FEATURE_AVX512F, FeaturesKNL, 'Z', true },
	// Knights Mill processor.
	{ {"knm"}, CK_KNM, FEATURE_AVX5124FMAPS, FeaturesKNM, 'j', false },
	// Lakemont microarchitecture based processors.
	{ {"lakemont"}, CK_Lakemont, ~0U, FeatureCMPXCHG8B, '\0', false },
	// K6 architecture processors.
	{ {"k6"}, CK_K6, ~0U, FeaturesK6, '\0', false },
	{ {"k6-2"}, CK_K6_2, ~0U, FeaturesK6 \| FeaturePRFCHW, '\0', false },
	{ {"k6-3"}, CK_K6_3, ~0U, FeaturesK6 \| FeaturePRFCHW, '\0', false },
	// K7 architecture processors.
	{ {"athlon"}, CK_Athlon, ~0U, FeaturesAthlon, '\0', false },
	{ {"athlon-tbird"}, CK_Athlon, ~0U, FeaturesAthlon, '\0', false },
	{ {"athlon-xp"}, CK_AthlonXP, ~0U, FeaturesAthlonXP, '\0', false },
	{ {"athlon-mp"}, CK_AthlonXP, ~0U, FeaturesAthlonXP, '\0', false },
	{ {"athlon-4"}, CK_AthlonXP, ~0U, FeaturesAthlonXP, '\0', false },
	// K8 architecture processors.
	{ {"k8"}, CK_K8, ~0U, FeaturesK8, '\0', false },
	{ {"athlon64"}, CK_K8, ~0U, FeaturesK8, '\0', false },
	{ {"athlon-fx"}, CK_K8, ~0U, FeaturesK8, '\0', false },
	{ {"opteron"}, CK_K8, ~0U, FeaturesK8, '\0', false },
	{ {"k8-sse3"}, CK_K8SSE3, ~0U, FeaturesK8SSE3, '\0', false },
	{ {"athlon64-sse3"}, CK_K8SSE3, ~0U, FeaturesK8SSE3, '\0', false },
	{ {"opteron-sse3"}, CK_K8SSE3, ~0U, FeaturesK8SSE3, '\0', false },
	{ {"amdfam10"}, CK_AMDFAM10, FEATURE_SSE4_A, FeaturesAMDFAM10, '\0', false },
	{ {"barcelona"}, CK_AMDFAM10, FEATURE_SSE4_A, FeaturesAMDFAM10, '\0', false },
	// Bobcat architecture processors.
	{ {"btver1"}, CK_BTVER1, FEATURE_SSE4_A, FeaturesBTVER1, '\0', false },
	{ {"btver2"}, CK_BTVER2, FEATURE_BMI, FeaturesBTVER2, '\0', false },
	// Bulldozer architecture processors.
	{ {"bdver1"}, CK_BDVER1, FEATURE_XOP, FeaturesBDVER1, '\0', false },
	{ {"bdver2"}, CK_BDVER2, FEATURE_FMA, FeaturesBDVER2, '\0', false },
	{ {"bdver3"}, CK_BDVER3, FEATURE_FMA, FeaturesBDVER3, '\0', false },
	{ {"bdver4"}, CK_BDVER4, FEATURE_AVX2, FeaturesBDVER4, '\0', false },
	// Zen architecture processors.
	{ {"znver1"}, CK_ZNVER1, FEATURE_AVX2, FeaturesZNVER1, '\0', false },
	{ {"znver2"}, CK_ZNVER2, FEATURE_AVX2, FeaturesZNVER2, '\0', false },
	{ {"znver3"}, CK_ZNVER3, FEATURE_AVX2, FeaturesZNVER3, '\0', false },
	{ {"znver4"}, CK_ZNVER4, FEATURE_AVX512VBMI2, FeaturesZNVER4, '\0', false },
	+ { {"znver5"}, CK_ZNVER5, FEATURE_AVX512VP2INTERSECT, FeaturesZNVER5, '\0', false },
	// Generic 64-bit processor.
	{ {"x86-64"}, CK_x86_64, FEATURE_SSE2 , FeaturesX86_64, '\0', false },
	{ {"x86-64-v2"}, CK_x86_64_v2, FEATURE_SSE4_2 , FeaturesX86_64_V2, '\0', false },
	{ {"x86-64-v3"}, CK_x86_64_v3, FEATURE_AVX2, FeaturesX86_64_V3, '\0', false },
	{ {"x86-64-v4"}, CK_x86_64_v4, FEATURE_AVX512VL, FeaturesX86_64_V4, '\0', false },
	// Geode processors.
	{ {"geode"}, CK_Geode, ~0U, FeaturesGeode, '\0', false },
	};
	// clang-format on

	constexpr const char *NoTuneList[] = {"x86-64-v2", "x86-64-v3", "x86-64-v4"};

	X86::CPUKind llvm::X86::parseArchX86(StringRef CPU, bool Only64Bit) {
	for (const auto &P : Processors)
	if (!P.OnlyForCPUDispatchSpecific && P.Name == CPU &&
	(P.Features[FEATURE_64BIT] \|\| !Only64Bit))
	return P.Kind;

	return CK_None;
	}

	X86::CPUKind llvm::X86::parseTuneCPU(StringRef CPU, bool Only64Bit) {
	if (llvm::is_contained(NoTuneList, CPU))
	return CK_None;
	return parseArchX86(CPU, Only64Bit);
	}

	void llvm::X86::fillValidCPUArchList(SmallVectorImpl<StringRef> &Values,
	bool Only64Bit) {
	for (const auto &P : Processors)
	if (!P.OnlyForCPUDispatchSpecific && !P.Name.empty() &&
	(P.Features[FEATURE_64BIT] \|\| !Only64Bit))
	Values.emplace_back(P.Name);
	}

	void llvm::X86::fillValidTuneCPUList(SmallVectorImpl<StringRef> &Values,
	bool Only64Bit) {
	for (const ProcInfo &P : Processors)
	if (!P.OnlyForCPUDispatchSpecific && !P.Name.empty() &&
	(P.Features[FEATURE_64BIT] \|\| !Only64Bit) &&
	!llvm::is_contained(NoTuneList, P.Name))
	Values.emplace_back(P.Name);
	}

	ProcessorFeatures llvm::X86::getKeyFeature(X86::CPUKind Kind) {
	// FIXME: Can we avoid a linear search here? The table might be sorted by
	// CPUKind so we could binary search?
	for (const auto &P : Processors) {
	if (P.Kind == Kind) {
	assert(P.KeyFeature != ~0U && "Processor does not have a key feature.");
	return static_cast<ProcessorFeatures>(P.KeyFeature);
	}
	}

	llvm_unreachable("Unable to find CPU kind!");
	}

	// Features with no dependencies.
	constexpr FeatureBitset ImpliedFeatures64BIT = {};
	constexpr FeatureBitset ImpliedFeaturesADX = {};
	constexpr FeatureBitset ImpliedFeaturesBMI = {};
	constexpr FeatureBitset ImpliedFeaturesBMI2 = {};
	constexpr FeatureBitset ImpliedFeaturesCLDEMOTE = {};
	constexpr FeatureBitset ImpliedFeaturesCLFLUSHOPT = {};
	constexpr FeatureBitset ImpliedFeaturesCLWB = {};
	constexpr FeatureBitset ImpliedFeaturesCLZERO = {};
	constexpr FeatureBitset ImpliedFeaturesCMOV = {};
	constexpr FeatureBitset ImpliedFeaturesCMPXCHG16B = {};
	constexpr FeatureBitset ImpliedFeaturesCMPXCHG8B = {};
	constexpr FeatureBitset ImpliedFeaturesCRC32 = {};
	constexpr FeatureBitset ImpliedFeaturesENQCMD = {};
	constexpr FeatureBitset ImpliedFeaturesFSGSBASE = {};
	constexpr FeatureBitset ImpliedFeaturesFXSR = {};
	constexpr FeatureBitset ImpliedFeaturesINVPCID = {};
	constexpr FeatureBitset ImpliedFeaturesLWP = {};
	constexpr FeatureBitset ImpliedFeaturesLZCNT = {};
	constexpr FeatureBitset ImpliedFeaturesMMX = {};
	constexpr FeatureBitset ImpliedFeaturesMWAITX = {};
	constexpr FeatureBitset ImpliedFeaturesMOVBE = {};
	constexpr FeatureBitset ImpliedFeaturesMOVDIR64B = {};
	constexpr FeatureBitset ImpliedFeaturesMOVDIRI = {};
	constexpr FeatureBitset ImpliedFeaturesPCONFIG = {};
	constexpr FeatureBitset ImpliedFeaturesPOPCNT = {};
	constexpr FeatureBitset ImpliedFeaturesPKU = {};
	constexpr FeatureBitset ImpliedFeaturesPRFCHW = {};
	constexpr FeatureBitset ImpliedFeaturesPTWRITE = {};
	constexpr FeatureBitset ImpliedFeaturesRDPID = {};
	constexpr FeatureBitset ImpliedFeaturesRDPRU = {};
	constexpr FeatureBitset ImpliedFeaturesRDRND = {};
	constexpr FeatureBitset ImpliedFeaturesRDSEED = {};
	constexpr FeatureBitset ImpliedFeaturesRTM = {};
	constexpr FeatureBitset ImpliedFeaturesSAHF = {};
	constexpr FeatureBitset ImpliedFeaturesSERIALIZE = {};
	constexpr FeatureBitset ImpliedFeaturesSGX = {};
	constexpr FeatureBitset ImpliedFeaturesSHSTK = {};
	constexpr FeatureBitset ImpliedFeaturesTBM = {};
	constexpr FeatureBitset ImpliedFeaturesTSXLDTRK = {};
	constexpr FeatureBitset ImpliedFeaturesUINTR = {};
	constexpr FeatureBitset ImpliedFeaturesUSERMSR = {};
	constexpr FeatureBitset ImpliedFeaturesWAITPKG = {};
	constexpr FeatureBitset ImpliedFeaturesWBNOINVD = {};
	constexpr FeatureBitset ImpliedFeaturesVZEROUPPER = {};
	constexpr FeatureBitset ImpliedFeaturesX87 = {};
	constexpr FeatureBitset ImpliedFeaturesXSAVE = {};
	constexpr FeatureBitset ImpliedFeaturesDUMMYFEATURE1 = {};
	constexpr FeatureBitset ImpliedFeaturesDUMMYFEATURE2 = {};

	// Not really CPU features, but need to be in the table because clang uses
	// target features to communicate them to the backend.
	constexpr FeatureBitset ImpliedFeaturesRETPOLINE_EXTERNAL_THUNK = {};
	constexpr FeatureBitset ImpliedFeaturesRETPOLINE_INDIRECT_BRANCHES = {};
	constexpr FeatureBitset ImpliedFeaturesRETPOLINE_INDIRECT_CALLS = {};
	constexpr FeatureBitset ImpliedFeaturesLVI_CFI = {};
	constexpr FeatureBitset ImpliedFeaturesLVI_LOAD_HARDENING = {};

	// XSAVE features are dependent on basic XSAVE.
	constexpr FeatureBitset ImpliedFeaturesXSAVEC = FeatureXSAVE;
	constexpr FeatureBitset ImpliedFeaturesXSAVEOPT = FeatureXSAVE;
	constexpr FeatureBitset ImpliedFeaturesXSAVES = FeatureXSAVE;

	// SSE/AVX/AVX512F chain.
	constexpr FeatureBitset ImpliedFeaturesSSE = {};
	constexpr FeatureBitset ImpliedFeaturesSSE2 = FeatureSSE;
	constexpr FeatureBitset ImpliedFeaturesSSE3 = FeatureSSE2;
	constexpr FeatureBitset ImpliedFeaturesSSSE3 = FeatureSSE3;
	constexpr FeatureBitset ImpliedFeaturesSSE4_1 = FeatureSSSE3;
	constexpr FeatureBitset ImpliedFeaturesSSE4_2 = FeatureSSE4_1;
	constexpr FeatureBitset ImpliedFeaturesAVX = FeatureSSE4_2;
	constexpr FeatureBitset ImpliedFeaturesAVX2 = FeatureAVX;
	constexpr FeatureBitset ImpliedFeaturesEVEX512 = {};
	constexpr FeatureBitset ImpliedFeaturesAVX512F =
	FeatureAVX2 \| FeatureF16C \| FeatureFMA;

	// Vector extensions that build on SSE or AVX.
	constexpr FeatureBitset ImpliedFeaturesAES = FeatureSSE2;
	constexpr FeatureBitset ImpliedFeaturesF16C = FeatureAVX;
	constexpr FeatureBitset ImpliedFeaturesFMA = FeatureAVX;
	constexpr FeatureBitset ImpliedFeaturesGFNI = FeatureSSE2;
	constexpr FeatureBitset ImpliedFeaturesPCLMUL = FeatureSSE2;
	constexpr FeatureBitset ImpliedFeaturesSHA = FeatureSSE2;
	constexpr FeatureBitset ImpliedFeaturesVAES = FeatureAES \| FeatureAVX2;
	constexpr FeatureBitset ImpliedFeaturesVPCLMULQDQ = FeatureAVX \| FeaturePCLMUL;
	constexpr FeatureBitset ImpliedFeaturesSM3 = FeatureAVX;
	constexpr FeatureBitset ImpliedFeaturesSM4 = FeatureAVX2;

	// AVX512 features.
	constexpr FeatureBitset ImpliedFeaturesAVX512CD = FeatureAVX512F;
	constexpr FeatureBitset ImpliedFeaturesAVX512BW = FeatureAVX512F;
	constexpr FeatureBitset ImpliedFeaturesAVX512DQ = FeatureAVX512F;
	constexpr FeatureBitset ImpliedFeaturesAVX512VL = FeatureAVX512F;

	constexpr FeatureBitset ImpliedFeaturesAVX512BF16 = FeatureAVX512BW;
	constexpr FeatureBitset ImpliedFeaturesAVX512BITALG = FeatureAVX512BW;
	constexpr FeatureBitset ImpliedFeaturesAVX512IFMA = FeatureAVX512F;
	constexpr FeatureBitset ImpliedFeaturesAVX512VNNI = FeatureAVX512F;
	constexpr FeatureBitset ImpliedFeaturesAVX512VPOPCNTDQ = FeatureAVX512F;
	constexpr FeatureBitset ImpliedFeaturesAVX512VBMI = FeatureAVX512BW;
	constexpr FeatureBitset ImpliedFeaturesAVX512VBMI2 = FeatureAVX512BW;
	constexpr FeatureBitset ImpliedFeaturesAVX512VP2INTERSECT = FeatureAVX512F;

	// FIXME: These two aren't really implemented and just exist in the feature
	// list for __builtin_cpu_supports. So omit their dependencies.
	constexpr FeatureBitset ImpliedFeaturesAVX5124FMAPS = {};
	constexpr FeatureBitset ImpliedFeaturesAVX5124VNNIW = {};

	// SSE4_A->FMA4->XOP chain.
	constexpr FeatureBitset ImpliedFeaturesSSE4_A = FeatureSSE3;
	constexpr FeatureBitset ImpliedFeaturesFMA4 = FeatureAVX \| FeatureSSE4_A;
	constexpr FeatureBitset ImpliedFeaturesXOP = FeatureFMA4;

	// AMX Features
	constexpr FeatureBitset ImpliedFeaturesAMX_TILE = {};
	constexpr FeatureBitset ImpliedFeaturesAMX_BF16 = FeatureAMX_TILE;
	constexpr FeatureBitset ImpliedFeaturesAMX_FP16 = FeatureAMX_TILE;
	constexpr FeatureBitset ImpliedFeaturesAMX_INT8 = FeatureAMX_TILE;
	constexpr FeatureBitset ImpliedFeaturesAMX_COMPLEX = FeatureAMX_TILE;
	constexpr FeatureBitset ImpliedFeaturesHRESET = {};

	constexpr FeatureBitset ImpliedFeaturesPREFETCHI = {};
	constexpr FeatureBitset ImpliedFeaturesCMPCCXADD = {};
	constexpr FeatureBitset ImpliedFeaturesRAOINT = {};
	constexpr FeatureBitset ImpliedFeaturesAVXVNNIINT16 = FeatureAVX2;
	constexpr FeatureBitset ImpliedFeaturesAVXVNNIINT8 = FeatureAVX2;
	constexpr FeatureBitset ImpliedFeaturesAVXIFMA = FeatureAVX2;
	constexpr FeatureBitset ImpliedFeaturesAVXNECONVERT = FeatureAVX2;
	constexpr FeatureBitset ImpliedFeaturesSHA512 = FeatureAVX2;
	constexpr FeatureBitset ImpliedFeaturesAVX512FP16 =
	FeatureAVX512BW \| FeatureAVX512DQ \| FeatureAVX512VL;
	// Key Locker Features
	constexpr FeatureBitset ImpliedFeaturesKL = FeatureSSE2;
	constexpr FeatureBitset ImpliedFeaturesWIDEKL = FeatureKL;

	// AVXVNNI Features
	constexpr FeatureBitset ImpliedFeaturesAVXVNNI = FeatureAVX2;

	// AVX10 Features
	constexpr FeatureBitset ImpliedFeaturesAVX10_1 =
	FeatureAVX512CD \| FeatureAVX512VBMI \| FeatureAVX512IFMA \|
	FeatureAVX512VNNI \| FeatureAVX512BF16 \| FeatureAVX512VPOPCNTDQ \|
	FeatureAVX512VBMI2 \| FeatureAVX512BITALG \| FeatureVAES \| FeatureVPCLMULQDQ \|
	FeatureAVX512FP16;
	constexpr FeatureBitset ImpliedFeaturesAVX10_1_512 =
	FeatureAVX10_1 \| FeatureEVEX512;

	// APX Features
	constexpr FeatureBitset ImpliedFeaturesEGPR = {};
	constexpr FeatureBitset ImpliedFeaturesPush2Pop2 = {};
	constexpr FeatureBitset ImpliedFeaturesPPX = {};
	constexpr FeatureBitset ImpliedFeaturesNDD = {};
	constexpr FeatureBitset ImpliedFeaturesCCMP = {};
	constexpr FeatureBitset ImpliedFeaturesNF = {};
	constexpr FeatureBitset ImpliedFeaturesCF = {};
	constexpr FeatureBitset ImpliedFeaturesZU = {};

	constexpr FeatureInfo FeatureInfos[X86::CPU_FEATURE_MAX] = {
	#define X86_FEATURE(ENUM, STR) {{"+" STR}, ImpliedFeatures##ENUM},
	#include "llvm/TargetParser/X86TargetParser.def"
	};

	void llvm::X86::getFeaturesForCPU(StringRef CPU,
	SmallVectorImpl<StringRef> &EnabledFeatures,
	bool NeedPlus) {
	auto I = llvm::find_if(Processors,
	[&](const ProcInfo &P) { return P.Name == CPU; });
	assert(I != std::end(Processors) && "Processor not found!");

	FeatureBitset Bits = I->Features;

	// Remove the 64-bit feature which we only use to validate if a CPU can
	// be used with 64-bit mode.
	Bits &= ~Feature64BIT;

	// Add the string version of all set bits.
	for (unsigned i = 0; i != CPU_FEATURE_MAX; ++i)
	if (Bits[i] && !FeatureInfos[i].getName(NeedPlus).empty())
	EnabledFeatures.push_back(FeatureInfos[i].getName(NeedPlus));
	}

	// For each feature that is (transitively) implied by this feature, set it.
	static void getImpliedEnabledFeatures(FeatureBitset &Bits,
	const FeatureBitset &Implies) {
	// Fast path: Implies is often empty.
	if (!Implies.any())
	return;
	FeatureBitset Prev;
	Bits \|= Implies;
	do {
	Prev = Bits;
	for (unsigned i = CPU_FEATURE_MAX; i;)
	if (Bits[--i])
	Bits \|= FeatureInfos[i].ImpliedFeatures;
	} while (Prev != Bits);
	}

	/// Create bit vector of features that are implied disabled if the feature
	/// passed in Value is disabled.
	static void getImpliedDisabledFeatures(FeatureBitset &Bits, unsigned Value) {
	// Check all features looking for any dependent on this feature. If we find
	// one, mark it and recursively find any feature that depend on it.
	FeatureBitset Prev;
	Bits.set(Value);
	do {
	Prev = Bits;
	for (unsigned i = 0; i != CPU_FEATURE_MAX; ++i)
	if ((FeatureInfos[i].ImpliedFeatures & Bits).any())
	Bits.set(i);
	} while (Prev != Bits);
	}

	void llvm::X86::updateImpliedFeatures(
	StringRef Feature, bool Enabled,
	StringMap<bool> &Features) {
	auto I = llvm::find_if(FeatureInfos, [&](const FeatureInfo &FI) {
	return FI.getName() == Feature;
	});
	if (I == std::end(FeatureInfos)) {
	// FIXME: This shouldn't happen, but may not have all features in the table
	// yet.
	return;
	}

	FeatureBitset ImpliedBits;
	if (Enabled)
	getImpliedEnabledFeatures(ImpliedBits, I->ImpliedFeatures);
	else
	getImpliedDisabledFeatures(ImpliedBits,
	std::distance(std::begin(FeatureInfos), I));

	// Update the map entry for all implied features.
	for (unsigned i = 0; i != CPU_FEATURE_MAX; ++i)
	if (ImpliedBits[i] && !FeatureInfos[i].getName().empty())
	Features[FeatureInfos[i].getName()] = Enabled;
	}

	char llvm::X86::getCPUDispatchMangling(StringRef CPU) {
	auto I = llvm::find_if(Processors,
	[&](const ProcInfo &P) { return P.Name == CPU; });
	assert(I != std::end(Processors) && "Processor not found!");
	assert(I->Mangling != '\0' && "Processor dooesn't support function multiversion!");
	return I->Mangling;
	}

	bool llvm::X86::validateCPUSpecificCPUDispatch(StringRef Name) {
	auto I = llvm::find_if(Processors,
	[&](const ProcInfo &P) { return P.Name == Name; });
	return I != std::end(Processors);
	}

	std::array<uint32_t, 4>
	llvm::X86::getCpuSupportsMask(ArrayRef<StringRef> FeatureStrs) {
	// Processor features and mapping to processor feature value.
	std::array<uint32_t, 4> FeatureMask{};
	for (StringRef FeatureStr : FeatureStrs) {
	unsigned Feature = StringSwitch<unsigned>(FeatureStr)
	#define X86_FEATURE_COMPAT(ENUM, STR, PRIORITY) \
	.Case(STR, llvm::X86::FEATURE_##ENUM)
	#define X86_MICROARCH_LEVEL(ENUM, STR, PRIORITY) \
	.Case(STR, llvm::X86::FEATURE_##ENUM)
	#include "llvm/TargetParser/X86TargetParser.def"
	;
	assert(Feature / 32 < FeatureMask.size());
	FeatureMask[Feature / 32] \|= 1U << (Feature % 32);
	}
	return FeatureMask;
	}

	unsigned llvm::X86::getFeaturePriority(ProcessorFeatures Feat) {
	#ifndef NDEBUG
	// Check that priorities are set properly in the .def file. We expect that
	// "compat" features are assigned non-duplicate consecutive priorities
	// starting from one (1, ..., 37) and multiple zeros.
	#define X86_FEATURE_COMPAT(ENUM, STR, PRIORITY) PRIORITY,
	unsigned Priorities[] = {
	#include "llvm/TargetParser/X86TargetParser.def"
	};
	std::array<unsigned, std::size(Priorities)> HelperList;
	const size_t MaxPriority = 37;
	std::iota(HelperList.begin(), HelperList.begin() + MaxPriority + 1, 0);
	for (size_t i = MaxPriority + 1; i != std::size(Priorities); ++i)
	HelperList[i] = 0;
	assert(std::is_permutation(HelperList.begin(), HelperList.end(),
	std::begin(Priorities), std::end(Priorities)) &&
	"Priorities don't form consecutive range!");
	#endif

	switch (Feat) {
	#define X86_FEATURE_COMPAT(ENUM, STR, PRIORITY) \
	case X86::FEATURE_##ENUM: \
	return PRIORITY;
	#include "llvm/TargetParser/X86TargetParser.def"
	default:
	llvm_unreachable("No Feature Priority for non-CPUSupports Features");
	}
	}
	diff --git a/contrib/llvm-project/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/contrib/llvm-project/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
	index cca9eeebaa53..ab2b96cdc42d 100644
	--- a/contrib/llvm-project/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
	+++ b/contrib/llvm-project/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
	@@ -1,19181 +1,19187 @@
	//===- SLPVectorizer.cpp - A bottom up SLP Vectorizer ---------------------===//
	//
	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
	// See https://llvm.org/LICENSE.txt for license information.
	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
	//
	//===----------------------------------------------------------------------===//
	//
	// This pass implements the Bottom Up SLP vectorizer. It detects consecutive
	// stores that can be put together into vector-stores. Next, it attempts to
	// construct vectorizable tree using the use-def chains. If a profitable tree
	// was found, the SLP vectorizer performs vectorization on the tree.
	//
	// The pass is inspired by the work described in the paper:
	// "Loop-Aware SLP in GCC" by Ira Rosen, Dorit Nuzman, Ayal Zaks.
	//
	//===----------------------------------------------------------------------===//

	#include "llvm/Transforms/Vectorize/SLPVectorizer.h"
	#include "llvm/ADT/DenseMap.h"
	#include "llvm/ADT/DenseSet.h"
	#include "llvm/ADT/PriorityQueue.h"
	#include "llvm/ADT/STLExtras.h"
	#include "llvm/ADT/ScopeExit.h"
	#include "llvm/ADT/SetOperations.h"
	#include "llvm/ADT/SetVector.h"
	#include "llvm/ADT/SmallBitVector.h"
	#include "llvm/ADT/SmallPtrSet.h"
	#include "llvm/ADT/SmallSet.h"
	#include "llvm/ADT/SmallString.h"
	#include "llvm/ADT/Statistic.h"
	#include "llvm/ADT/iterator.h"
	#include "llvm/ADT/iterator_range.h"
	#include "llvm/Analysis/AliasAnalysis.h"
	#include "llvm/Analysis/AssumptionCache.h"
	#include "llvm/Analysis/CodeMetrics.h"
	#include "llvm/Analysis/ConstantFolding.h"
	#include "llvm/Analysis/DemandedBits.h"
	#include "llvm/Analysis/GlobalsModRef.h"
	#include "llvm/Analysis/IVDescriptors.h"
	#include "llvm/Analysis/LoopAccessAnalysis.h"
	#include "llvm/Analysis/LoopInfo.h"
	#include "llvm/Analysis/MemoryLocation.h"
	#include "llvm/Analysis/OptimizationRemarkEmitter.h"
	#include "llvm/Analysis/ScalarEvolution.h"
	#include "llvm/Analysis/ScalarEvolutionExpressions.h"
	#include "llvm/Analysis/TargetLibraryInfo.h"
	#include "llvm/Analysis/TargetTransformInfo.h"
	#include "llvm/Analysis/ValueTracking.h"
	#include "llvm/Analysis/VectorUtils.h"
	#include "llvm/IR/Attributes.h"
	#include "llvm/IR/BasicBlock.h"
	#include "llvm/IR/Constant.h"
	#include "llvm/IR/Constants.h"
	#include "llvm/IR/DataLayout.h"
	#include "llvm/IR/DerivedTypes.h"
	#include "llvm/IR/Dominators.h"
	#include "llvm/IR/Function.h"
	#include "llvm/IR/IRBuilder.h"
	#include "llvm/IR/InstrTypes.h"
	#include "llvm/IR/Instruction.h"
	#include "llvm/IR/Instructions.h"
	#include "llvm/IR/IntrinsicInst.h"
	#include "llvm/IR/Intrinsics.h"
	#include "llvm/IR/Module.h"
	#include "llvm/IR/Operator.h"
	#include "llvm/IR/PatternMatch.h"
	#include "llvm/IR/Type.h"
	#include "llvm/IR/Use.h"
	#include "llvm/IR/User.h"
	#include "llvm/IR/Value.h"
	#include "llvm/IR/ValueHandle.h"
	#ifdef EXPENSIVE_CHECKS
	#include "llvm/IR/Verifier.h"
	#endif
	#include "llvm/Pass.h"
	#include "llvm/Support/Casting.h"
	#include "llvm/Support/CommandLine.h"
	#include "llvm/Support/Compiler.h"
	#include "llvm/Support/DOTGraphTraits.h"
	#include "llvm/Support/Debug.h"
	#include "llvm/Support/ErrorHandling.h"
	#include "llvm/Support/GraphWriter.h"
	#include "llvm/Support/InstructionCost.h"
	#include "llvm/Support/KnownBits.h"
	#include "llvm/Support/MathExtras.h"
	#include "llvm/Support/raw_ostream.h"
	#include "llvm/Transforms/Utils/InjectTLIMappings.h"
	#include "llvm/Transforms/Utils/Local.h"
	#include "llvm/Transforms/Utils/LoopUtils.h"
	#include "llvm/Transforms/Utils/ScalarEvolutionExpander.h"
	#include <algorithm>
	#include <cassert>
	#include <cstdint>
	#include <iterator>
	#include <memory>
	#include <optional>
	#include <set>
	#include <string>
	#include <tuple>
	#include <utility>

	using namespace llvm;
	using namespace llvm::PatternMatch;
	using namespace slpvectorizer;

	#define SV_NAME "slp-vectorizer"
	#define DEBUG_TYPE "SLP"

	STATISTIC(NumVectorInstructions, "Number of vector instructions generated");

	static cl::opt<bool>
	RunSLPVectorization("vectorize-slp", cl::init(true), cl::Hidden,
	cl::desc("Run the SLP vectorization passes"));

	static cl::opt<bool>
	SLPReVec("slp-revec", cl::init(false), cl::Hidden,
	cl::desc("Enable vectorization for wider vector utilization"));

	static cl::opt<int>
	SLPCostThreshold("slp-threshold", cl::init(0), cl::Hidden,
	cl::desc("Only vectorize if you gain more than this "
	"number "));

	static cl::opt<bool> SLPSkipEarlyProfitabilityCheck(
	"slp-skip-early-profitability-check", cl::init(false), cl::Hidden,
	cl::desc("When true, SLP vectorizer bypasses profitability checks based on "
	"heuristics and makes vectorization decision via cost modeling."));

	static cl::opt<bool>
	ShouldVectorizeHor("slp-vectorize-hor", cl::init(true), cl::Hidden,
	cl::desc("Attempt to vectorize horizontal reductions"));

	static cl::opt<bool> ShouldStartVectorizeHorAtStore(
	"slp-vectorize-hor-store", cl::init(false), cl::Hidden,
	cl::desc(
	"Attempt to vectorize horizontal reductions feeding into a store"));

	// NOTE: If AllowHorRdxIdenityOptimization is true, the optimization will run
	// even if we match a reduction but do not vectorize in the end.
	static cl::opt<bool> AllowHorRdxIdenityOptimization(
	"slp-optimize-identity-hor-reduction-ops", cl::init(true), cl::Hidden,
	cl::desc("Allow optimization of original scalar identity operations on "
	"matched horizontal reductions."));

	static cl::opt<int>
	MaxVectorRegSizeOption("slp-max-reg-size", cl::init(128), cl::Hidden,
	cl::desc("Attempt to vectorize for this register size in bits"));

	static cl::opt<unsigned>
	MaxVFOption("slp-max-vf", cl::init(0), cl::Hidden,
	cl::desc("Maximum SLP vectorization factor (0=unlimited)"));

	/// Limits the size of scheduling regions in a block.
	/// It avoid long compile times for _very_ large blocks where vector
	/// instructions are spread over a wide range.
	/// This limit is way higher than needed by real-world functions.
	static cl::opt<int>
	ScheduleRegionSizeBudget("slp-schedule-budget", cl::init(100000), cl::Hidden,
	cl::desc("Limit the size of the SLP scheduling region per block"));

	static cl::opt<int> MinVectorRegSizeOption(
	"slp-min-reg-size", cl::init(128), cl::Hidden,
	cl::desc("Attempt to vectorize for this register size in bits"));

	static cl::opt<unsigned> RecursionMaxDepth(
	"slp-recursion-max-depth", cl::init(12), cl::Hidden,
	cl::desc("Limit the recursion depth when building a vectorizable tree"));

	static cl::opt<unsigned> MinTreeSize(
	"slp-min-tree-size", cl::init(3), cl::Hidden,
	cl::desc("Only vectorize small trees if they are fully vectorizable"));

	// The maximum depth that the look-ahead score heuristic will explore.
	// The higher this value, the higher the compilation time overhead.
	static cl::opt<int> LookAheadMaxDepth(
	"slp-max-look-ahead-depth", cl::init(2), cl::Hidden,
	cl::desc("The maximum look-ahead depth for operand reordering scores"));

	// The maximum depth that the look-ahead score heuristic will explore
	// when it probing among candidates for vectorization tree roots.
	// The higher this value, the higher the compilation time overhead but unlike
	// similar limit for operands ordering this is less frequently used, hence
	// impact of higher value is less noticeable.
	static cl::opt<int> RootLookAheadMaxDepth(
	"slp-max-root-look-ahead-depth", cl::init(2), cl::Hidden,
	cl::desc("The maximum look-ahead depth for searching best rooting option"));

	static cl::opt<unsigned> MinProfitableStridedLoads(
	"slp-min-strided-loads", cl::init(2), cl::Hidden,
	cl::desc("The minimum number of loads, which should be considered strided, "
	"if the stride is > 1 or is runtime value"));

	static cl::opt<unsigned> MaxProfitableLoadStride(
	"slp-max-stride", cl::init(8), cl::Hidden,
	cl::desc("The maximum stride, considered to be profitable."));

	static cl::opt<bool>
	ViewSLPTree("view-slp-tree", cl::Hidden,
	cl::desc("Display the SLP trees with Graphviz"));

	static cl::opt<bool> VectorizeNonPowerOf2(
	"slp-vectorize-non-power-of-2", cl::init(false), cl::Hidden,
	cl::desc("Try to vectorize with non-power-of-2 number of elements."));

	// Limit the number of alias checks. The limit is chosen so that
	// it has no negative effect on the llvm benchmarks.
	static const unsigned AliasedCheckLimit = 10;

	// Limit of the number of uses for potentially transformed instructions/values,
	// used in checks to avoid compile-time explode.
	static constexpr int UsesLimit = 64;

	// Another limit for the alias checks: The maximum distance between load/store
	// instructions where alias checks are done.
	// This limit is useful for very large basic blocks.
	static const unsigned MaxMemDepDistance = 160;

	/// If the ScheduleRegionSizeBudget is exhausted, we allow small scheduling
	/// regions to be handled.
	static const int MinScheduleRegionSize = 16;

	/// Maximum allowed number of operands in the PHI nodes.
	static const unsigned MaxPHINumOperands = 128;

	/// Predicate for the element types that the SLP vectorizer supports.
	///
	/// The most important thing to filter here are types which are invalid in LLVM
	/// vectors. We also filter target specific types which have absolutely no
	/// meaningful vectorization path such as x86_fp80 and ppc_f128. This just
	/// avoids spending time checking the cost model and realizing that they will
	/// be inevitably scalarized.
	static bool isValidElementType(Type *Ty) {
	// TODO: Support ScalableVectorType.
	if (SLPReVec && isa<FixedVectorType>(Ty))
	Ty = Ty->getScalarType();
	return VectorType::isValidElementType(Ty) && !Ty->isX86_FP80Ty() &&
	!Ty->isPPC_FP128Ty();
	}

	/// \returns the number of elements for Ty.
	static unsigned getNumElements(Type *Ty) {
	assert(!isa<ScalableVectorType>(Ty) &&
	"ScalableVectorType is not supported.");
	if (auto *VecTy = dyn_cast<FixedVectorType>(Ty))
	return VecTy->getNumElements();
	return 1;
	}

	/// \returns the vector type of ScalarTy based on vectorization factor.
	static FixedVectorType getWidenedType(Type ScalarTy, unsigned VF) {
	return FixedVectorType::get(ScalarTy->getScalarType(),
	VF * getNumElements(ScalarTy));
	}

	/// \returns True if the value is a constant (but not globals/constant
	/// expressions).
	static bool isConstant(Value *V) {
	return isa<Constant>(V) && !isa<ConstantExpr, GlobalValue>(V);
	}

	/// Checks if \p V is one of vector-like instructions, i.e. undef,
	/// insertelement/extractelement with constant indices for fixed vector type or
	/// extractvalue instruction.
	static bool isVectorLikeInstWithConstOps(Value *V) {
	if (!isa<InsertElementInst, ExtractElementInst>(V) &&
	!isa<ExtractValueInst, UndefValue>(V))
	return false;
	auto *I = dyn_cast<Instruction>(V);
	if (!I \|\| isa<ExtractValueInst>(I))
	return true;
	if (!isa<FixedVectorType>(I->getOperand(0)->getType()))
	return false;
	if (isa<ExtractElementInst>(I))
	return isConstant(I->getOperand(1));
	assert(isa<InsertElementInst>(V) && "Expected only insertelement.");
	return isConstant(I->getOperand(2));
	}

	/// Returns power-of-2 number of elements in a single register (part), given the
	/// total number of elements \p Size and number of registers (parts) \p
	/// NumParts.
	static unsigned getPartNumElems(unsigned Size, unsigned NumParts) {
	return PowerOf2Ceil(divideCeil(Size, NumParts));
	}

	/// Returns correct remaining number of elements, considering total amount \p
	/// Size, (power-of-2 number) of elements in a single register \p PartNumElems
	/// and current register (part) \p Part.
	static unsigned getNumElems(unsigned Size, unsigned PartNumElems,
	unsigned Part) {
	return std::min<unsigned>(PartNumElems, Size - Part * PartNumElems);
	}

	#if !defined(NDEBUG)
	/// Print a short descriptor of the instruction bundle suitable for debug output.
	static std::string shortBundleName(ArrayRef<Value *> VL) {
	std::string Result;
	raw_string_ostream OS(Result);
	OS << "n=" << VL.size() << " [" << *VL.front() << ", ..]";
	OS.flush();
	return Result;
	}
	#endif

	/// \returns true if all of the instructions in \p VL are in the same block or
	/// false otherwise.
	static bool allSameBlock(ArrayRef<Value *> VL) {
	Instruction *I0 = dyn_cast<Instruction>(VL[0]);
	if (!I0)
	return false;
	if (all_of(VL, isVectorLikeInstWithConstOps))
	return true;

	BasicBlock *BB = I0->getParent();
	for (int I = 1, E = VL.size(); I < E; I++) {
	auto *II = dyn_cast<Instruction>(VL[I]);
	if (!II)
	return false;

	if (BB != II->getParent())
	return false;
	}
	return true;
	}

	/// \returns True if all of the values in \p VL are constants (but not
	/// globals/constant expressions).
	static bool allConstant(ArrayRef<Value *> VL) {
	// Constant expressions and globals can't be vectorized like normal integer/FP
	// constants.
	return all_of(VL, isConstant);
	}

	/// \returns True if all of the values in \p VL are identical or some of them
	/// are UndefValue.
	static bool isSplat(ArrayRef<Value *> VL) {
	Value *FirstNonUndef = nullptr;
	for (Value *V : VL) {
	if (isa<UndefValue>(V))
	continue;
	if (!FirstNonUndef) {
	FirstNonUndef = V;
	continue;
	}
	if (V != FirstNonUndef)
	return false;
	}
	return FirstNonUndef != nullptr;
	}

	/// \returns True if \p I is commutative, handles CmpInst and BinaryOperator.
	static bool isCommutative(Instruction *I) {
	if (auto *Cmp = dyn_cast<CmpInst>(I))
	return Cmp->isCommutative();
	if (auto *BO = dyn_cast<BinaryOperator>(I))
	return BO->isCommutative() \|\|
	(BO->getOpcode() == Instruction::Sub &&
	!BO->hasNUsesOrMore(UsesLimit) &&
	all_of(
	BO->uses(),
	[](const Use &U) {
	// Commutative, if icmp eq/ne sub, 0
	ICmpInst::Predicate Pred;
	if (match(U.getUser(),
	m_ICmp(Pred, m_Specific(U.get()), m_Zero())) &&
	(Pred == ICmpInst::ICMP_EQ \|\| Pred == ICmpInst::ICMP_NE))
	return true;
	// Commutative, if abs(sub nsw, true) or abs(sub, false).
	ConstantInt *Flag;
	return match(U.getUser(),
	m_Intrinsic<Intrinsic::abs>(
	m_Specific(U.get()), m_ConstantInt(Flag))) &&
	(!cast<Instruction>(U.get())->hasNoSignedWrap() \|\|
	Flag->isOne());
	})) \|\|
	(BO->getOpcode() == Instruction::FSub &&
	!BO->hasNUsesOrMore(UsesLimit) &&
	all_of(BO->uses(), [](const Use &U) {
	return match(U.getUser(),
	m_Intrinsic<Intrinsic::fabs>(m_Specific(U.get())));
	}));
	return I->isCommutative();
	}

	template <typename T>
	static std::optional<unsigned> getInsertExtractIndex(const Value *Inst,
	unsigned Offset) {
	static_assert(std::is_same_v<T, InsertElementInst> \|\|
	std::is_same_v<T, ExtractElementInst>,
	"unsupported T");
	int Index = Offset;
	if (const auto *IE = dyn_cast<T>(Inst)) {
	const auto *VT = dyn_cast<FixedVectorType>(IE->getType());
	if (!VT)
	return std::nullopt;
	const auto *CI = dyn_cast<ConstantInt>(IE->getOperand(2));
	if (!CI)
	return std::nullopt;
	if (CI->getValue().uge(VT->getNumElements()))
	return std::nullopt;
	Index *= VT->getNumElements();
	Index += CI->getZExtValue();
	return Index;
	}
	return std::nullopt;
	}

	/// \returns inserting or extracting index of InsertElement, ExtractElement or
	/// InsertValue instruction, using Offset as base offset for index.
	/// \returns std::nullopt if the index is not an immediate.
	static std::optional<unsigned> getElementIndex(const Value *Inst,
	unsigned Offset = 0) {
	if (auto Index = getInsertExtractIndex<InsertElementInst>(Inst, Offset))
	return Index;
	if (auto Index = getInsertExtractIndex<ExtractElementInst>(Inst, Offset))
	return Index;

	int Index = Offset;

	const auto *IV = dyn_cast<InsertValueInst>(Inst);
	if (!IV)
	return std::nullopt;

	Type *CurrentType = IV->getType();
	for (unsigned I : IV->indices()) {
	if (const auto *ST = dyn_cast<StructType>(CurrentType)) {
	Index *= ST->getNumElements();
	CurrentType = ST->getElementType(I);
	} else if (const auto *AT = dyn_cast<ArrayType>(CurrentType)) {
	Index *= AT->getNumElements();
	CurrentType = AT->getElementType();
	} else {
	return std::nullopt;
	}
	Index += I;
	}
	return Index;
	}

	namespace {
	/// Specifies the way the mask should be analyzed for undefs/poisonous elements
	/// in the shuffle mask.
	enum class UseMask {
	FirstArg, ///< The mask is expected to be for permutation of 1-2 vectors,
	///< check for the mask elements for the first argument (mask
	///< indices are in range [0:VF)).
	SecondArg, ///< The mask is expected to be for permutation of 2 vectors, check
	///< for the mask elements for the second argument (mask indices
	///< are in range [VF:2*VF))
	UndefsAsMask ///< Consider undef mask elements (-1) as placeholders for
	///< future shuffle elements and mark them as ones as being used
	///< in future. Non-undef elements are considered as unused since
	///< they're already marked as used in the mask.
	};
	} // namespace

	/// Prepares a use bitset for the given mask either for the first argument or
	/// for the second.
	static SmallBitVector buildUseMask(int VF, ArrayRef<int> Mask,
	UseMask MaskArg) {
	SmallBitVector UseMask(VF, true);
	for (auto [Idx, Value] : enumerate(Mask)) {
	if (Value == PoisonMaskElem) {
	if (MaskArg == UseMask::UndefsAsMask)
	UseMask.reset(Idx);
	continue;
	}
	if (MaskArg == UseMask::FirstArg && Value < VF)
	UseMask.reset(Value);
	else if (MaskArg == UseMask::SecondArg && Value >= VF)
	UseMask.reset(Value - VF);
	}
	return UseMask;
	}

	/// Checks if the given value is actually an undefined constant vector.
	/// Also, if the \p UseMask is not empty, tries to check if the non-masked
	/// elements actually mask the insertelement buildvector, if any.
	template <bool IsPoisonOnly = false>
	static SmallBitVector isUndefVector(const Value *V,
	const SmallBitVector &UseMask = {}) {
	SmallBitVector Res(UseMask.empty() ? 1 : UseMask.size(), true);
	using T = std::conditional_t<IsPoisonOnly, PoisonValue, UndefValue>;
	if (isa<T>(V))
	return Res;
	auto *VecTy = dyn_cast<FixedVectorType>(V->getType());
	if (!VecTy)
	return Res.reset();
	auto *C = dyn_cast<Constant>(V);
	if (!C) {
	if (!UseMask.empty()) {
	const Value *Base = V;
	while (auto *II = dyn_cast<InsertElementInst>(Base)) {
	Base = II->getOperand(0);
	if (isa<T>(II->getOperand(1)))
	continue;
	std::optional<unsigned> Idx = getElementIndex(II);
	if (!Idx) {
	Res.reset();
	return Res;
	}
	if (Idx < UseMask.size() && !UseMask.test(Idx))
	Res.reset(*Idx);
	}
	// TODO: Add analysis for shuffles here too.
	if (V == Base) {
	Res.reset();
	} else {
	SmallBitVector SubMask(UseMask.size(), false);
	Res &= isUndefVector<IsPoisonOnly>(Base, SubMask);
	}
	} else {
	Res.reset();
	}
	return Res;
	}
	for (unsigned I = 0, E = VecTy->getNumElements(); I != E; ++I) {
	if (Constant *Elem = C->getAggregateElement(I))
	if (!isa<T>(Elem) &&
	(UseMask.empty() \|\| (I < UseMask.size() && !UseMask.test(I))))
	Res.reset(I);
	}
	return Res;
	}

	/// Checks if the vector of instructions can be represented as a shuffle, like:
	/// %x0 = extractelement <4 x i8> %x, i32 0
	/// %x3 = extractelement <4 x i8> %x, i32 3
	/// %y1 = extractelement <4 x i8> %y, i32 1
	/// %y2 = extractelement <4 x i8> %y, i32 2
	/// %x0x0 = mul i8 %x0, %x0
	/// %x3x3 = mul i8 %x3, %x3
	/// %y1y1 = mul i8 %y1, %y1
	/// %y2y2 = mul i8 %y2, %y2
	/// %ins1 = insertelement <4 x i8> poison, i8 %x0x0, i32 0
	/// %ins2 = insertelement <4 x i8> %ins1, i8 %x3x3, i32 1
	/// %ins3 = insertelement <4 x i8> %ins2, i8 %y1y1, i32 2
	/// %ins4 = insertelement <4 x i8> %ins3, i8 %y2y2, i32 3
	/// ret <4 x i8> %ins4
	/// can be transformed into:
	/// %1 = shufflevector <4 x i8> %x, <4 x i8> %y, <4 x i32> <i32 0, i32 3, i32 5,
	/// i32 6>
	/// %2 = mul <4 x i8> %1, %1
	/// ret <4 x i8> %2
	/// Mask will return the Shuffle Mask equivalent to the extracted elements.
	/// TODO: Can we split off and reuse the shuffle mask detection from
	/// ShuffleVectorInst/getShuffleCost?
	static std::optional<TargetTransformInfo::ShuffleKind>
	isFixedVectorShuffle(ArrayRef<Value *> VL, SmallVectorImpl<int> &Mask) {
	const auto *It = find_if(VL, IsaPred<ExtractElementInst>);
	if (It == VL.end())
	return std::nullopt;
	unsigned Size =
	std::accumulate(VL.begin(), VL.end(), 0u, [](unsigned S, Value *V) {
	auto *EI = dyn_cast<ExtractElementInst>(V);
	if (!EI)
	return S;
	auto *VTy = dyn_cast<FixedVectorType>(EI->getVectorOperandType());
	if (!VTy)
	return S;
	return std::max(S, VTy->getNumElements());
	});

	Value *Vec1 = nullptr;
	Value *Vec2 = nullptr;
	bool HasNonUndefVec = any_of(VL, [](Value *V) {
	auto *EE = dyn_cast<ExtractElementInst>(V);
	if (!EE)
	return false;
	Value *Vec = EE->getVectorOperand();
	if (isa<UndefValue>(Vec))
	return false;
	return isGuaranteedNotToBePoison(Vec);
	});
	enum ShuffleMode { Unknown, Select, Permute };
	ShuffleMode CommonShuffleMode = Unknown;
	Mask.assign(VL.size(), PoisonMaskElem);
	for (unsigned I = 0, E = VL.size(); I < E; ++I) {
	// Undef can be represented as an undef element in a vector.
	if (isa<UndefValue>(VL[I]))
	continue;
	auto *EI = cast<ExtractElementInst>(VL[I]);
	if (isa<ScalableVectorType>(EI->getVectorOperandType()))
	return std::nullopt;
	auto *Vec = EI->getVectorOperand();
	// We can extractelement from undef or poison vector.
	if (isUndefVector</isPoisonOnly=/true>(Vec).all())
	continue;
	// All vector operands must have the same number of vector elements.
	if (isa<UndefValue>(Vec)) {
	Mask[I] = I;
	} else {
	if (isa<UndefValue>(EI->getIndexOperand()))
	continue;
	auto *Idx = dyn_cast<ConstantInt>(EI->getIndexOperand());
	if (!Idx)
	return std::nullopt;
	// Undefined behavior if Idx is negative or >= Size.
	if (Idx->getValue().uge(Size))
	continue;
	unsigned IntIdx = Idx->getValue().getZExtValue();
	Mask[I] = IntIdx;
	}
	if (isUndefVector(Vec).all() && HasNonUndefVec)
	continue;
	// For correct shuffling we have to have at most 2 different vector operands
	// in all extractelement instructions.
	if (!Vec1 \|\| Vec1 == Vec) {
	Vec1 = Vec;
	} else if (!Vec2 \|\| Vec2 == Vec) {
	Vec2 = Vec;
	Mask[I] += Size;
	} else {
	return std::nullopt;
	}
	if (CommonShuffleMode == Permute)
	continue;
	// If the extract index is not the same as the operation number, it is a
	// permutation.
	if (Mask[I] % Size != I) {
	CommonShuffleMode = Permute;
	continue;
	}
	CommonShuffleMode = Select;
	}
	// If we're not crossing lanes in different vectors, consider it as blending.
	if (CommonShuffleMode == Select && Vec2)
	return TargetTransformInfo::SK_Select;
	// If Vec2 was never used, we have a permutation of a single vector, otherwise
	// we have permutation of 2 vectors.
	return Vec2 ? TargetTransformInfo::SK_PermuteTwoSrc
	: TargetTransformInfo::SK_PermuteSingleSrc;
	}

	/// \returns True if Extract{Value,Element} instruction extracts element Idx.
	static std::optional<unsigned> getExtractIndex(Instruction *E) {
	unsigned Opcode = E->getOpcode();
	assert((Opcode == Instruction::ExtractElement \|\|
	Opcode == Instruction::ExtractValue) &&
	"Expected extractelement or extractvalue instruction.");
	if (Opcode == Instruction::ExtractElement) {
	auto *CI = dyn_cast<ConstantInt>(E->getOperand(1));
	if (!CI)
	return std::nullopt;
	return CI->getZExtValue();
	}
	auto *EI = cast<ExtractValueInst>(E);
	if (EI->getNumIndices() != 1)
	return std::nullopt;
	return *EI->idx_begin();
	}

	namespace {

	/// Main data required for vectorization of instructions.
	struct InstructionsState {
	/// The very first instruction in the list with the main opcode.
	Value *OpValue = nullptr;

	/// The main/alternate instruction.
	Instruction *MainOp = nullptr;
	Instruction *AltOp = nullptr;

	/// The main/alternate opcodes for the list of instructions.
	unsigned getOpcode() const {
	return MainOp ? MainOp->getOpcode() : 0;
	}

	unsigned getAltOpcode() const {
	return AltOp ? AltOp->getOpcode() : 0;
	}

	/// Some of the instructions in the list have alternate opcodes.
	bool isAltShuffle() const { return AltOp != MainOp; }

	bool isOpcodeOrAlt(Instruction *I) const {
	unsigned CheckedOpcode = I->getOpcode();
	return getOpcode() == CheckedOpcode \|\| getAltOpcode() == CheckedOpcode;
	}

	InstructionsState() = delete;
	InstructionsState(Value OpValue, Instruction MainOp, Instruction *AltOp)
	: OpValue(OpValue), MainOp(MainOp), AltOp(AltOp) {}
	};

	} // end anonymous namespace

	/// Chooses the correct key for scheduling data. If \p Op has the same (or
	/// alternate) opcode as \p OpValue, the key is \p Op. Otherwise the key is \p
	/// OpValue.
	static Value isOneOf(const InstructionsState &S, Value Op) {
	auto *I = dyn_cast<Instruction>(Op);
	if (I && S.isOpcodeOrAlt(I))
	return Op;
	return S.OpValue;
	}

	/// \returns true if \p Opcode is allowed as part of the main/alternate
	/// instruction for SLP vectorization.
	///
	/// Example of unsupported opcode is SDIV that can potentially cause UB if the
	/// "shuffled out" lane would result in division by zero.
	static bool isValidForAlternation(unsigned Opcode) {
	if (Instruction::isIntDivRem(Opcode))
	return false;

	return true;
	}

	static InstructionsState getSameOpcode(ArrayRef<Value *> VL,
	const TargetLibraryInfo &TLI,
	unsigned BaseIndex = 0);

	/// Checks if the provided operands of 2 cmp instructions are compatible, i.e.
	/// compatible instructions or constants, or just some other regular values.
	static bool areCompatibleCmpOps(Value BaseOp0, Value BaseOp1, Value *Op0,
	Value *Op1, const TargetLibraryInfo &TLI) {
	return (isConstant(BaseOp0) && isConstant(Op0)) \|\|
	(isConstant(BaseOp1) && isConstant(Op1)) \|\|
	(!isa<Instruction>(BaseOp0) && !isa<Instruction>(Op0) &&
	!isa<Instruction>(BaseOp1) && !isa<Instruction>(Op1)) \|\|
	BaseOp0 == Op0 \|\| BaseOp1 == Op1 \|\|
	getSameOpcode({BaseOp0, Op0}, TLI).getOpcode() \|\|
	getSameOpcode({BaseOp1, Op1}, TLI).getOpcode();
	}

	/// \returns true if a compare instruction \p CI has similar "look" and
	/// same predicate as \p BaseCI, "as is" or with its operands and predicate
	/// swapped, false otherwise.
	static bool isCmpSameOrSwapped(const CmpInst BaseCI, const CmpInst CI,
	const TargetLibraryInfo &TLI) {
	assert(BaseCI->getOperand(0)->getType() == CI->getOperand(0)->getType() &&
	"Assessing comparisons of different types?");
	CmpInst::Predicate BasePred = BaseCI->getPredicate();
	CmpInst::Predicate Pred = CI->getPredicate();
	CmpInst::Predicate SwappedPred = CmpInst::getSwappedPredicate(Pred);

	Value *BaseOp0 = BaseCI->getOperand(0);
	Value *BaseOp1 = BaseCI->getOperand(1);
	Value *Op0 = CI->getOperand(0);
	Value *Op1 = CI->getOperand(1);

	return (BasePred == Pred &&
	areCompatibleCmpOps(BaseOp0, BaseOp1, Op0, Op1, TLI)) \|\|
	(BasePred == SwappedPred &&
	areCompatibleCmpOps(BaseOp0, BaseOp1, Op1, Op0, TLI));
	}

	/// \returns analysis of the Instructions in \p VL described in
	/// InstructionsState, the Opcode that we suppose the whole list
	/// could be vectorized even if its structure is diverse.
	static InstructionsState getSameOpcode(ArrayRef<Value *> VL,
	const TargetLibraryInfo &TLI,
	unsigned BaseIndex) {
	// Make sure these are all Instructions.
	if (llvm::any_of(VL, [](Value *V) { return !isa<Instruction>(V); }))
	return InstructionsState(VL[BaseIndex], nullptr, nullptr);

	bool IsCastOp = isa<CastInst>(VL[BaseIndex]);
	bool IsBinOp = isa<BinaryOperator>(VL[BaseIndex]);
	bool IsCmpOp = isa<CmpInst>(VL[BaseIndex]);
	CmpInst::Predicate BasePred =
	IsCmpOp ? cast<CmpInst>(VL[BaseIndex])->getPredicate()
	: CmpInst::BAD_ICMP_PREDICATE;
	unsigned Opcode = cast<Instruction>(VL[BaseIndex])->getOpcode();
	unsigned AltOpcode = Opcode;
	unsigned AltIndex = BaseIndex;

	bool SwappedPredsCompatible = [&]() {
	if (!IsCmpOp)
	return false;
	SetVector<unsigned> UniquePreds, UniqueNonSwappedPreds;
	UniquePreds.insert(BasePred);
	UniqueNonSwappedPreds.insert(BasePred);
	for (Value *V : VL) {
	auto *I = dyn_cast<CmpInst>(V);
	if (!I)
	return false;
	CmpInst::Predicate CurrentPred = I->getPredicate();
	CmpInst::Predicate SwappedCurrentPred =
	CmpInst::getSwappedPredicate(CurrentPred);
	UniqueNonSwappedPreds.insert(CurrentPred);
	if (!UniquePreds.contains(CurrentPred) &&
	!UniquePreds.contains(SwappedCurrentPred))
	UniquePreds.insert(CurrentPred);
	}
	// Total number of predicates > 2, but if consider swapped predicates
	// compatible only 2, consider swappable predicates as compatible opcodes,
	// not alternate.
	return UniqueNonSwappedPreds.size() > 2 && UniquePreds.size() == 2;
	}();
	// Check for one alternate opcode from another BinaryOperator.
	// TODO - generalize to support all operators (types, calls etc.).
	auto *IBase = cast<Instruction>(VL[BaseIndex]);
	Intrinsic::ID BaseID = 0;
	SmallVector<VFInfo> BaseMappings;
	if (auto *CallBase = dyn_cast<CallInst>(IBase)) {
	BaseID = getVectorIntrinsicIDForCall(CallBase, &TLI);
	BaseMappings = VFDatabase(CallBase).getMappings(CallBase);
	if (!isTriviallyVectorizable(BaseID) && BaseMappings.empty())
	return InstructionsState(VL[BaseIndex], nullptr, nullptr);
	}
	for (int Cnt = 0, E = VL.size(); Cnt < E; Cnt++) {
	auto *I = cast<Instruction>(VL[Cnt]);
	unsigned InstOpcode = I->getOpcode();
	if (IsBinOp && isa<BinaryOperator>(I)) {
	if (InstOpcode == Opcode \|\| InstOpcode == AltOpcode)
	continue;
	if (Opcode == AltOpcode && isValidForAlternation(InstOpcode) &&
	isValidForAlternation(Opcode)) {
	AltOpcode = InstOpcode;
	AltIndex = Cnt;
	continue;
	}
	} else if (IsCastOp && isa<CastInst>(I)) {
	Value *Op0 = IBase->getOperand(0);
	Type *Ty0 = Op0->getType();
	Value *Op1 = I->getOperand(0);
	Type *Ty1 = Op1->getType();
	if (Ty0 == Ty1) {
	if (InstOpcode == Opcode \|\| InstOpcode == AltOpcode)
	continue;
	if (Opcode == AltOpcode) {
	assert(isValidForAlternation(Opcode) &&
	isValidForAlternation(InstOpcode) &&
	"Cast isn't safe for alternation, logic needs to be updated!");
	AltOpcode = InstOpcode;
	AltIndex = Cnt;
	continue;
	}
	}
	} else if (auto *Inst = dyn_cast<CmpInst>(VL[Cnt]); Inst && IsCmpOp) {
	auto *BaseInst = cast<CmpInst>(VL[BaseIndex]);
	Type *Ty0 = BaseInst->getOperand(0)->getType();
	Type *Ty1 = Inst->getOperand(0)->getType();
	if (Ty0 == Ty1) {
	assert(InstOpcode == Opcode && "Expected same CmpInst opcode.");
	// Check for compatible operands. If the corresponding operands are not
	// compatible - need to perform alternate vectorization.
	CmpInst::Predicate CurrentPred = Inst->getPredicate();
	CmpInst::Predicate SwappedCurrentPred =
	CmpInst::getSwappedPredicate(CurrentPred);

	if ((E == 2 \|\| SwappedPredsCompatible) &&
	(BasePred == CurrentPred \|\| BasePred == SwappedCurrentPred))
	continue;

	if (isCmpSameOrSwapped(BaseInst, Inst, TLI))
	continue;
	auto *AltInst = cast<CmpInst>(VL[AltIndex]);
	if (AltIndex != BaseIndex) {
	if (isCmpSameOrSwapped(AltInst, Inst, TLI))
	continue;
	} else if (BasePred != CurrentPred) {
	assert(
	isValidForAlternation(InstOpcode) &&
	"CmpInst isn't safe for alternation, logic needs to be updated!");
	AltIndex = Cnt;
	continue;
	}
	CmpInst::Predicate AltPred = AltInst->getPredicate();
	if (BasePred == CurrentPred \|\| BasePred == SwappedCurrentPred \|\|
	AltPred == CurrentPred \|\| AltPred == SwappedCurrentPred)
	continue;
	}
	} else if (InstOpcode == Opcode \|\| InstOpcode == AltOpcode) {
	if (auto *Gep = dyn_cast<GetElementPtrInst>(I)) {
	if (Gep->getNumOperands() != 2 \|\|
	Gep->getOperand(0)->getType() != IBase->getOperand(0)->getType())
	return InstructionsState(VL[BaseIndex], nullptr, nullptr);
	} else if (auto *EI = dyn_cast<ExtractElementInst>(I)) {
	if (!isVectorLikeInstWithConstOps(EI))
	return InstructionsState(VL[BaseIndex], nullptr, nullptr);
	} else if (auto *LI = dyn_cast<LoadInst>(I)) {
	auto *BaseLI = cast<LoadInst>(IBase);
	if (!LI->isSimple() \|\| !BaseLI->isSimple())
	return InstructionsState(VL[BaseIndex], nullptr, nullptr);
	} else if (auto *Call = dyn_cast<CallInst>(I)) {
	auto *CallBase = cast<CallInst>(IBase);
	if (Call->getCalledFunction() != CallBase->getCalledFunction())
	return InstructionsState(VL[BaseIndex], nullptr, nullptr);
	if (Call->hasOperandBundles() && (!CallBase->hasOperandBundles() \|\|
	!std::equal(Call->op_begin() + Call->getBundleOperandsStartIndex(),
	Call->op_begin() + Call->getBundleOperandsEndIndex(),
	CallBase->op_begin() +
	CallBase->getBundleOperandsStartIndex())))
	return InstructionsState(VL[BaseIndex], nullptr, nullptr);
	Intrinsic::ID ID = getVectorIntrinsicIDForCall(Call, &TLI);
	if (ID != BaseID)
	return InstructionsState(VL[BaseIndex], nullptr, nullptr);
	if (!ID) {
	SmallVector<VFInfo> Mappings = VFDatabase(Call).getMappings(Call);
	if (Mappings.size() != BaseMappings.size() \|\|
	Mappings.front().ISA != BaseMappings.front().ISA \|\|
	Mappings.front().ScalarName != BaseMappings.front().ScalarName \|\|
	Mappings.front().VectorName != BaseMappings.front().VectorName \|\|
	Mappings.front().Shape.VF != BaseMappings.front().Shape.VF \|\|
	Mappings.front().Shape.Parameters !=
	BaseMappings.front().Shape.Parameters)
	return InstructionsState(VL[BaseIndex], nullptr, nullptr);
	}
	}
	continue;
	}
	return InstructionsState(VL[BaseIndex], nullptr, nullptr);
	}

	return InstructionsState(VL[BaseIndex], cast<Instruction>(VL[BaseIndex]),
	cast<Instruction>(VL[AltIndex]));
	}

	/// \returns true if all of the values in \p VL have the same type or false
	/// otherwise.
	static bool allSameType(ArrayRef<Value *> VL) {
	Type *Ty = VL.front()->getType();
	return all_of(VL.drop_front(), [&](Value *V) { return V->getType() == Ty; });
	}

	/// \returns True if in-tree use also needs extract. This refers to
	/// possible scalar operand in vectorized instruction.
	static bool doesInTreeUserNeedToExtract(Value Scalar, Instruction UserInst,
	TargetLibraryInfo *TLI) {
	unsigned Opcode = UserInst->getOpcode();
	switch (Opcode) {
	case Instruction::Load: {
	LoadInst *LI = cast<LoadInst>(UserInst);
	return (LI->getPointerOperand() == Scalar);
	}
	case Instruction::Store: {
	StoreInst *SI = cast<StoreInst>(UserInst);
	return (SI->getPointerOperand() == Scalar);
	}
	case Instruction::Call: {
	CallInst *CI = cast<CallInst>(UserInst);
	Intrinsic::ID ID = getVectorIntrinsicIDForCall(CI, TLI);
	return any_of(enumerate(CI->args()), [&](auto &&Arg) {
	return isVectorIntrinsicWithScalarOpAtArg(ID, Arg.index()) &&
	Arg.value().get() == Scalar;
	});
	}
	default:
	return false;
	}
	}

	/// \returns the AA location that is being access by the instruction.
	static MemoryLocation getLocation(Instruction *I) {
	if (StoreInst *SI = dyn_cast<StoreInst>(I))
	return MemoryLocation::get(SI);
	if (LoadInst *LI = dyn_cast<LoadInst>(I))
	return MemoryLocation::get(LI);
	return MemoryLocation();
	}

	/// \returns True if the instruction is not a volatile or atomic load/store.
	static bool isSimple(Instruction *I) {
	if (LoadInst *LI = dyn_cast<LoadInst>(I))
	return LI->isSimple();
	if (StoreInst *SI = dyn_cast<StoreInst>(I))
	return SI->isSimple();
	if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(I))
	return !MI->isVolatile();
	return true;
	}

	/// Shuffles \p Mask in accordance with the given \p SubMask.
	/// \param ExtendingManyInputs Supports reshuffling of the mask with not only
	/// one but two input vectors.
	static void addMask(SmallVectorImpl<int> &Mask, ArrayRef<int> SubMask,
	bool ExtendingManyInputs = false) {
	if (SubMask.empty())
	return;
	assert(
	(!ExtendingManyInputs \|\| SubMask.size() > Mask.size() \|\|
	// Check if input scalars were extended to match the size of other node.
	(SubMask.size() == Mask.size() &&
	std::all_of(std::next(Mask.begin(), Mask.size() / 2), Mask.end(),
	[](int Idx) { return Idx == PoisonMaskElem; }))) &&
	"SubMask with many inputs support must be larger than the mask.");
	if (Mask.empty()) {
	Mask.append(SubMask.begin(), SubMask.end());
	return;
	}
	SmallVector<int> NewMask(SubMask.size(), PoisonMaskElem);
	int TermValue = std::min(Mask.size(), SubMask.size());
	for (int I = 0, E = SubMask.size(); I < E; ++I) {
	if (SubMask[I] == PoisonMaskElem \|\|
	(!ExtendingManyInputs &&
	(SubMask[I] >= TermValue \|\| Mask[SubMask[I]] >= TermValue)))
	continue;
	NewMask[I] = Mask[SubMask[I]];
	}
	Mask.swap(NewMask);
	}

	/// Order may have elements assigned special value (size) which is out of
	/// bounds. Such indices only appear on places which correspond to undef values
	/// (see canReuseExtract for details) and used in order to avoid undef values
	/// have effect on operands ordering.
	/// The first loop below simply finds all unused indices and then the next loop
	/// nest assigns these indices for undef values positions.
	/// As an example below Order has two undef positions and they have assigned
	/// values 3 and 7 respectively:
	/// before: 6 9 5 4 9 2 1 0
	/// after: 6 3 5 4 7 2 1 0
	static void fixupOrderingIndices(MutableArrayRef<unsigned> Order) {
	const unsigned Sz = Order.size();
	SmallBitVector UnusedIndices(Sz, /t=/true);
	SmallBitVector MaskedIndices(Sz);
	for (unsigned I = 0; I < Sz; ++I) {
	if (Order[I] < Sz)
	UnusedIndices.reset(Order[I]);
	else
	MaskedIndices.set(I);
	}
	if (MaskedIndices.none())
	return;
	assert(UnusedIndices.count() == MaskedIndices.count() &&
	"Non-synced masked/available indices.");
	int Idx = UnusedIndices.find_first();
	int MIdx = MaskedIndices.find_first();
	while (MIdx >= 0) {
	assert(Idx >= 0 && "Indices must be synced.");
	Order[MIdx] = Idx;
	Idx = UnusedIndices.find_next(Idx);
	MIdx = MaskedIndices.find_next(MIdx);
	}
	}

	/// \returns a bitset for selecting opcodes. false for Opcode0 and true for
	/// Opcode1.
	SmallBitVector getAltInstrMask(ArrayRef<Value *> VL, unsigned Opcode0,
	unsigned Opcode1) {
	SmallBitVector OpcodeMask(VL.size(), false);
	for (unsigned Lane : seq<unsigned>(VL.size()))
	if (cast<Instruction>(VL[Lane])->getOpcode() == Opcode1)
	OpcodeMask.set(Lane);
	return OpcodeMask;
	}

	namespace llvm {

	static void inversePermutation(ArrayRef<unsigned> Indices,
	SmallVectorImpl<int> &Mask) {
	Mask.clear();
	const unsigned E = Indices.size();
	Mask.resize(E, PoisonMaskElem);
	for (unsigned I = 0; I < E; ++I)
	Mask[Indices[I]] = I;
	}

	/// Reorders the list of scalars in accordance with the given \p Mask.
	static void reorderScalars(SmallVectorImpl<Value *> &Scalars,
	ArrayRef<int> Mask) {
	assert(!Mask.empty() && "Expected non-empty mask.");
	SmallVector<Value *> Prev(Scalars.size(),
	PoisonValue::get(Scalars.front()->getType()));
	Prev.swap(Scalars);
	for (unsigned I = 0, E = Prev.size(); I < E; ++I)
	if (Mask[I] != PoisonMaskElem)
	Scalars[Mask[I]] = Prev[I];
	}

	/// Checks if the provided value does not require scheduling. It does not
	/// require scheduling if this is not an instruction or it is an instruction
	/// that does not read/write memory and all operands are either not instructions
	/// or phi nodes or instructions from different blocks.
	static bool areAllOperandsNonInsts(Value *V) {
	auto *I = dyn_cast<Instruction>(V);
	if (!I)
	return true;
	return !mayHaveNonDefUseDependency(*I) &&
	all_of(I->operands(), [I](Value *V) {
	auto *IO = dyn_cast<Instruction>(V);
	if (!IO)
	return true;
	return isa<PHINode>(IO) \|\| IO->getParent() != I->getParent();
	});
	}

	/// Checks if the provided value does not require scheduling. It does not
	/// require scheduling if this is not an instruction or it is an instruction
	/// that does not read/write memory and all users are phi nodes or instructions
	/// from the different blocks.
	static bool isUsedOutsideBlock(Value *V) {
	auto *I = dyn_cast<Instruction>(V);
	if (!I)
	return true;
	// Limits the number of uses to save compile time.
	return !I->mayReadOrWriteMemory() && !I->hasNUsesOrMore(UsesLimit) &&
	all_of(I->users(), [I](User *U) {
	auto *IU = dyn_cast<Instruction>(U);
	if (!IU)
	return true;
	return IU->getParent() != I->getParent() \|\| isa<PHINode>(IU);
	});
	}

	/// Checks if the specified value does not require scheduling. It does not
	/// require scheduling if all operands and all users do not need to be scheduled
	/// in the current basic block.
	static bool doesNotNeedToBeScheduled(Value *V) {
	return areAllOperandsNonInsts(V) && isUsedOutsideBlock(V);
	}

	/// Checks if the specified array of instructions does not require scheduling.
	/// It is so if all either instructions have operands that do not require
	/// scheduling or their users do not require scheduling since they are phis or
	/// in other basic blocks.
	static bool doesNotNeedToSchedule(ArrayRef<Value *> VL) {
	return !VL.empty() &&
	(all_of(VL, isUsedOutsideBlock) \|\| all_of(VL, areAllOperandsNonInsts));
	}

	namespace slpvectorizer {

	/// Bottom Up SLP Vectorizer.
	class BoUpSLP {
	struct TreeEntry;
	struct ScheduleData;
	class ShuffleCostEstimator;
	class ShuffleInstructionBuilder;

	public:
	/// Tracks the state we can represent the loads in the given sequence.
	enum class LoadsState {
	Gather,
	Vectorize,
	ScatterVectorize,
	StridedVectorize
	};

	using ValueList = SmallVector<Value *, 8>;
	using InstrList = SmallVector<Instruction *, 16>;
	using ValueSet = SmallPtrSet<Value *, 16>;
	using StoreList = SmallVector<StoreInst *, 8>;
	using ExtraValueToDebugLocsMap =
	MapVector<Value , SmallVector<Instruction , 2>>;
	using OrdersType = SmallVector<unsigned, 4>;

	BoUpSLP(Function Func, ScalarEvolution Se, TargetTransformInfo *Tti,
	TargetLibraryInfo TLi, AAResults Aa, LoopInfo *Li,
	DominatorTree Dt, AssumptionCache AC, DemandedBits *DB,
	const DataLayout DL, OptimizationRemarkEmitter ORE)
	: BatchAA(*Aa), F(Func), SE(Se), TTI(Tti), TLI(TLi), LI(Li), DT(Dt),
	AC(AC), DB(DB), DL(DL), ORE(ORE),
	Builder(Se->getContext(), TargetFolder(*DL)) {
	CodeMetrics::collectEphemeralValues(F, AC, EphValues);
	// Use the vector register size specified by the target unless overridden
	// by a command-line option.
	// TODO: It would be better to limit the vectorization factor based on
	// data type rather than just register size. For example, x86 AVX has
	// 256-bit registers, but it does not support integer operations
	// at that width (that requires AVX2).
	if (MaxVectorRegSizeOption.getNumOccurrences())
	MaxVecRegSize = MaxVectorRegSizeOption;
	else
	MaxVecRegSize =
	TTI->getRegisterBitWidth(TargetTransformInfo::RGK_FixedWidthVector)
	.getFixedValue();

	if (MinVectorRegSizeOption.getNumOccurrences())
	MinVecRegSize = MinVectorRegSizeOption;
	else
	MinVecRegSize = TTI->getMinVectorRegisterBitWidth();
	}

	/// Vectorize the tree that starts with the elements in \p VL.
	/// Returns the vectorized root.
	Value *vectorizeTree();

	/// Vectorize the tree but with the list of externally used values \p
	/// ExternallyUsedValues. Values in this MapVector can be replaced but the
	/// generated extractvalue instructions.
	/// \param ReplacedExternals containd list of replaced external values
	/// {scalar, replace} after emitting extractelement for external uses.
	Value *
	vectorizeTree(const ExtraValueToDebugLocsMap &ExternallyUsedValues,
	SmallVectorImpl<std::pair<Value , Value >> &ReplacedExternals,
	Instruction *ReductionRoot = nullptr);

	/// \returns the cost incurred by unwanted spills and fills, caused by
	/// holding live values over call sites.
	InstructionCost getSpillCost() const;

	/// \returns the vectorization cost of the subtree that starts at \p VL.
	/// A negative number means that this is profitable.
	InstructionCost getTreeCost(ArrayRef<Value *> VectorizedVals = std::nullopt);

	/// Construct a vectorizable tree that starts at \p Roots, ignoring users for
	/// the purpose of scheduling and extraction in the \p UserIgnoreLst.
	void buildTree(ArrayRef<Value *> Roots,
	const SmallDenseSet<Value *> &UserIgnoreLst);

	/// Construct a vectorizable tree that starts at \p Roots.
	void buildTree(ArrayRef<Value *> Roots);

	/// Returns whether the root node has in-tree uses.
	bool doesRootHaveInTreeUses() const {
	return !VectorizableTree.empty() &&
	!VectorizableTree.front()->UserTreeIndices.empty();
	}

	/// Return the scalars of the root node.
	ArrayRef<Value *> getRootNodeScalars() const {
	assert(!VectorizableTree.empty() && "No graph to get the first node from");
	return VectorizableTree.front()->Scalars;
	}

	/// Checks if the root graph node can be emitted with narrower bitwidth at
	/// codegen and returns it signedness, if so.
	bool isSignedMinBitwidthRootNode() const {
	return MinBWs.at(VectorizableTree.front().get()).second;
	}

	/// Builds external uses of the vectorized scalars, i.e. the list of
	/// vectorized scalars to be extracted, their lanes and their scalar users. \p
	/// ExternallyUsedValues contains additional list of external uses to handle
	/// vectorization of reductions.
	void
	buildExternalUses(const ExtraValueToDebugLocsMap &ExternallyUsedValues = {});

	/// Transforms graph nodes to target specific representations, if profitable.
	void transformNodes();

	/// Clear the internal data structures that are created by 'buildTree'.
	void deleteTree() {
	VectorizableTree.clear();
	ScalarToTreeEntry.clear();
	MultiNodeScalars.clear();
	MustGather.clear();
	NonScheduledFirst.clear();
	EntryToLastInstruction.clear();
	ExternalUses.clear();
	ExternalUsesAsGEPs.clear();
	for (auto &Iter : BlocksSchedules) {
	BlockScheduling *BS = Iter.second.get();
	BS->clear();
	}
	MinBWs.clear();
	ReductionBitWidth = 0;
	CastMaxMinBWSizes.reset();
	ExtraBitWidthNodes.clear();
	InstrElementSize.clear();
	UserIgnoreList = nullptr;
	PostponedGathers.clear();
	ValueToGatherNodes.clear();
	}

	unsigned getTreeSize() const { return VectorizableTree.size(); }

	/// Perform LICM and CSE on the newly generated gather sequences.
	void optimizeGatherSequence();

	/// Checks if the specified gather tree entry \p TE can be represented as a
	/// shuffled vector entry + (possibly) permutation with other gathers. It
	/// implements the checks only for possibly ordered scalars (Loads,
	/// ExtractElement, ExtractValue), which can be part of the graph.
	std::optional<OrdersType> findReusedOrderedScalars(const TreeEntry &TE);

	/// Sort loads into increasing pointers offsets to allow greater clustering.
	std::optional<OrdersType> findPartiallyOrderedLoads(const TreeEntry &TE);

	/// Gets reordering data for the given tree entry. If the entry is vectorized
	/// - just return ReorderIndices, otherwise check if the scalars can be
	/// reordered and return the most optimal order.
	/// \return std::nullopt if ordering is not important, empty order, if
	/// identity order is important, or the actual order.
	/// \param TopToBottom If true, include the order of vectorized stores and
	/// insertelement nodes, otherwise skip them.
	std::optional<OrdersType> getReorderingData(const TreeEntry &TE,
	bool TopToBottom);

	/// Reorders the current graph to the most profitable order starting from the
	/// root node to the leaf nodes. The best order is chosen only from the nodes
	/// of the same size (vectorization factor). Smaller nodes are considered
	/// parts of subgraph with smaller VF and they are reordered independently. We
	/// can make it because we still need to extend smaller nodes to the wider VF
	/// and we can merge reordering shuffles with the widening shuffles.
	void reorderTopToBottom();

	/// Reorders the current graph to the most profitable order starting from
	/// leaves to the root. It allows to rotate small subgraphs and reduce the
	/// number of reshuffles if the leaf nodes use the same order. In this case we
	/// can merge the orders and just shuffle user node instead of shuffling its
	/// operands. Plus, even the leaf nodes have different orders, it allows to
	/// sink reordering in the graph closer to the root node and merge it later
	/// during analysis.
	void reorderBottomToTop(bool IgnoreReorder = false);

	/// \return The vector element size in bits to use when vectorizing the
	/// expression tree ending at \p V. If V is a store, the size is the width of
	/// the stored value. Otherwise, the size is the width of the largest loaded
	/// value reaching V. This method is used by the vectorizer to calculate
	/// vectorization factors.
	unsigned getVectorElementSize(Value *V);

	/// Compute the minimum type sizes required to represent the entries in a
	/// vectorizable tree.
	void computeMinimumValueSizes();

	// \returns maximum vector register size as set by TTI or overridden by cl::opt.
	unsigned getMaxVecRegSize() const {
	return MaxVecRegSize;
	}

	// \returns minimum vector register size as set by cl::opt.
	unsigned getMinVecRegSize() const {
	return MinVecRegSize;
	}

	unsigned getMinVF(unsigned Sz) const {
	return std::max(2U, getMinVecRegSize() / Sz);
	}

	unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const {
	unsigned MaxVF = MaxVFOption.getNumOccurrences() ?
	MaxVFOption : TTI->getMaximumVF(ElemWidth, Opcode);
	return MaxVF ? MaxVF : UINT_MAX;
	}

	/// Check if homogeneous aggregate is isomorphic to some VectorType.
	/// Accepts homogeneous multidimensional aggregate of scalars/vectors like
	/// {[4 x i16], [4 x i16]}, { <2 x float>, <2 x float> },
	/// {{{i16, i16}, {i16, i16}}, {{i16, i16}, {i16, i16}}} and so on.
	///
	/// \returns number of elements in vector if isomorphism exists, 0 otherwise.
	unsigned canMapToVector(Type *T) const;

	/// \returns True if the VectorizableTree is both tiny and not fully
	/// vectorizable. We do not vectorize such trees.
	bool isTreeTinyAndNotFullyVectorizable(bool ForReduction = false) const;

	/// Assume that a legal-sized 'or'-reduction of shifted/zexted loaded values
	/// can be load combined in the backend. Load combining may not be allowed in
	/// the IR optimizer, so we do not want to alter the pattern. For example,
	/// partially transforming a scalar bswap() pattern into vector code is
	/// effectively impossible for the backend to undo.
	/// TODO: If load combining is allowed in the IR optimizer, this analysis
	/// may not be necessary.
	bool isLoadCombineReductionCandidate(RecurKind RdxKind) const;

	/// Assume that a vector of stores of bitwise-or/shifted/zexted loaded values
	/// can be load combined in the backend. Load combining may not be allowed in
	/// the IR optimizer, so we do not want to alter the pattern. For example,
	/// partially transforming a scalar bswap() pattern into vector code is
	/// effectively impossible for the backend to undo.
	/// TODO: If load combining is allowed in the IR optimizer, this analysis
	/// may not be necessary.
	bool isLoadCombineCandidate(ArrayRef<Value *> Stores) const;

	/// Checks if the given array of loads can be represented as a vectorized,
	/// scatter or just simple gather.
	/// \param VL list of loads.
	/// \param VL0 main load value.
	/// \param Order returned order of load instructions.
	/// \param PointerOps returned list of pointer operands.
	/// \param TryRecursiveCheck used to check if long masked gather can be
	/// represented as a serie of loads/insert subvector, if profitable.
	LoadsState canVectorizeLoads(ArrayRef<Value > VL, const Value VL0,
	SmallVectorImpl<unsigned> &Order,
	SmallVectorImpl<Value *> &PointerOps,
	bool TryRecursiveCheck = true) const;

	OptimizationRemarkEmitter *getORE() { return ORE; }

	/// This structure holds any data we need about the edges being traversed
	/// during buildTree_rec(). We keep track of:
	/// (i) the user TreeEntry index, and
	/// (ii) the index of the edge.
	struct EdgeInfo {
	EdgeInfo() = default;
	EdgeInfo(TreeEntry *UserTE, unsigned EdgeIdx)
	: UserTE(UserTE), EdgeIdx(EdgeIdx) {}
	/// The user TreeEntry.
	TreeEntry *UserTE = nullptr;
	/// The operand index of the use.
	unsigned EdgeIdx = UINT_MAX;
	#ifndef NDEBUG
	friend inline raw_ostream &operator<<(raw_ostream &OS,
	const BoUpSLP::EdgeInfo &EI) {
	EI.dump(OS);
	return OS;
	}
	/// Debug print.
	void dump(raw_ostream &OS) const {
	OS << "{User:" << (UserTE ? std::to_string(UserTE->Idx) : "null")
	<< " EdgeIdx:" << EdgeIdx << "}";
	}
	LLVM_DUMP_METHOD void dump() const { dump(dbgs()); }
	#endif
	bool operator == (const EdgeInfo &Other) const {
	return UserTE == Other.UserTE && EdgeIdx == Other.EdgeIdx;
	}
	};

	/// A helper class used for scoring candidates for two consecutive lanes.
	class LookAheadHeuristics {
	const TargetLibraryInfo &TLI;
	const DataLayout &DL;
	ScalarEvolution &SE;
	const BoUpSLP &R;
	int NumLanes; // Total number of lanes (aka vectorization factor).
	int MaxLevel; // The maximum recursion depth for accumulating score.

	public:
	LookAheadHeuristics(const TargetLibraryInfo &TLI, const DataLayout &DL,
	ScalarEvolution &SE, const BoUpSLP &R, int NumLanes,
	int MaxLevel)
	: TLI(TLI), DL(DL), SE(SE), R(R), NumLanes(NumLanes),
	MaxLevel(MaxLevel) {}

	// The hard-coded scores listed here are not very important, though it shall
	// be higher for better matches to improve the resulting cost. When
	// computing the scores of matching one sub-tree with another, we are
	// basically counting the number of values that are matching. So even if all
	// scores are set to 1, we would still get a decent matching result.
	// However, sometimes we have to break ties. For example we may have to
	// choose between matching loads vs matching opcodes. This is what these
	// scores are helping us with: they provide the order of preference. Also,
	// this is important if the scalar is externally used or used in another
	// tree entry node in the different lane.

	/// Loads from consecutive memory addresses, e.g. load(A[i]), load(A[i+1]).
	static const int ScoreConsecutiveLoads = 4;
	/// The same load multiple times. This should have a better score than
	/// `ScoreSplat` because it in x86 for a 2-lane vector we can represent it
	/// with `movddup (%reg), xmm0` which has a throughput of 0.5 versus 0.5 for
	/// a vector load and 1.0 for a broadcast.
	static const int ScoreSplatLoads = 3;
	/// Loads from reversed memory addresses, e.g. load(A[i+1]), load(A[i]).
	static const int ScoreReversedLoads = 3;
	/// A load candidate for masked gather.
	static const int ScoreMaskedGatherCandidate = 1;
	/// ExtractElementInst from same vector and consecutive indexes.
	static const int ScoreConsecutiveExtracts = 4;
	/// ExtractElementInst from same vector and reversed indices.
	static const int ScoreReversedExtracts = 3;
	/// Constants.
	static const int ScoreConstants = 2;
	/// Instructions with the same opcode.
	static const int ScoreSameOpcode = 2;
	/// Instructions with alt opcodes (e.g, add + sub).
	static const int ScoreAltOpcodes = 1;
	/// Identical instructions (a.k.a. splat or broadcast).
	static const int ScoreSplat = 1;
	/// Matching with an undef is preferable to failing.
	static const int ScoreUndef = 1;
	/// Score for failing to find a decent match.
	static const int ScoreFail = 0;
	/// Score if all users are vectorized.
	static const int ScoreAllUserVectorized = 1;

	/// \returns the score of placing \p V1 and \p V2 in consecutive lanes.
	/// \p U1 and \p U2 are the users of \p V1 and \p V2.
	/// Also, checks if \p V1 and \p V2 are compatible with instructions in \p
	/// MainAltOps.
	int getShallowScore(Value V1, Value V2, Instruction U1, Instruction U2,
	ArrayRef<Value *> MainAltOps) const {
	if (!isValidElementType(V1->getType()) \|\|
	!isValidElementType(V2->getType()))
	return LookAheadHeuristics::ScoreFail;

	if (V1 == V2) {
	if (isa<LoadInst>(V1)) {
	// Retruns true if the users of V1 and V2 won't need to be extracted.
	auto AllUsersAreInternal = [U1, U2, this](Value V1, Value V2) {
	// Bail out if we have too many uses to save compilation time.
	if (V1->hasNUsesOrMore(UsesLimit) \|\| V2->hasNUsesOrMore(UsesLimit))
	return false;

	auto AllUsersVectorized = [U1, U2, this](Value *V) {
	return llvm::all_of(V->users(), [U1, U2, this](Value *U) {
	return U == U1 \|\| U == U2 \|\| R.getTreeEntry(U) != nullptr;
	});
	};
	return AllUsersVectorized(V1) && AllUsersVectorized(V2);
	};
	// A broadcast of a load can be cheaper on some targets.
	if (R.TTI->isLegalBroadcastLoad(V1->getType(),
	ElementCount::getFixed(NumLanes)) &&
	((int)V1->getNumUses() == NumLanes \|\|
	AllUsersAreInternal(V1, V2)))
	return LookAheadHeuristics::ScoreSplatLoads;
	}
	return LookAheadHeuristics::ScoreSplat;
	}

	auto CheckSameEntryOrFail = [&]() {
	if (const TreeEntry *TE1 = R.getTreeEntry(V1);
	TE1 && TE1 == R.getTreeEntry(V2))
	return LookAheadHeuristics::ScoreSplatLoads;
	return LookAheadHeuristics::ScoreFail;
	};

	auto *LI1 = dyn_cast<LoadInst>(V1);
	auto *LI2 = dyn_cast<LoadInst>(V2);
	if (LI1 && LI2) {
	if (LI1->getParent() != LI2->getParent() \|\| !LI1->isSimple() \|\|
	!LI2->isSimple())
	return CheckSameEntryOrFail();

	std::optional<int> Dist = getPointersDiff(
	LI1->getType(), LI1->getPointerOperand(), LI2->getType(),
	LI2->getPointerOperand(), DL, SE, /StrictCheck=/true);
	if (!Dist \|\| *Dist == 0) {
	if (getUnderlyingObject(LI1->getPointerOperand()) ==
	getUnderlyingObject(LI2->getPointerOperand()) &&
	R.TTI->isLegalMaskedGather(
	getWidenedType(LI1->getType(), NumLanes), LI1->getAlign()))
	return LookAheadHeuristics::ScoreMaskedGatherCandidate;
	return CheckSameEntryOrFail();
	}
	// The distance is too large - still may be profitable to use masked
	// loads/gathers.
	if (std::abs(*Dist) > NumLanes / 2)
	return LookAheadHeuristics::ScoreMaskedGatherCandidate;
	// This still will detect consecutive loads, but we might have "holes"
	// in some cases. It is ok for non-power-2 vectorization and may produce
	// better results. It should not affect current vectorization.
	return (*Dist > 0) ? LookAheadHeuristics::ScoreConsecutiveLoads
	: LookAheadHeuristics::ScoreReversedLoads;
	}

	auto *C1 = dyn_cast<Constant>(V1);
	auto *C2 = dyn_cast<Constant>(V2);
	if (C1 && C2)
	return LookAheadHeuristics::ScoreConstants;

	// Extracts from consecutive indexes of the same vector better score as
	// the extracts could be optimized away.
	Value *EV1;
	ConstantInt *Ex1Idx;
	if (match(V1, m_ExtractElt(m_Value(EV1), m_ConstantInt(Ex1Idx)))) {
	// Undefs are always profitable for extractelements.
	// Compiler can easily combine poison and extractelement <non-poison> or
	// undef and extractelement <poison>. But combining undef +
	// extractelement <non-poison-but-may-produce-poison> requires some
	// extra operations.
	if (isa<UndefValue>(V2))
	return (isa<PoisonValue>(V2) \|\| isUndefVector(EV1).all())
	? LookAheadHeuristics::ScoreConsecutiveExtracts
	: LookAheadHeuristics::ScoreSameOpcode;
	Value *EV2 = nullptr;
	ConstantInt *Ex2Idx = nullptr;
	if (match(V2,
	m_ExtractElt(m_Value(EV2), m_CombineOr(m_ConstantInt(Ex2Idx),
	m_Undef())))) {
	// Undefs are always profitable for extractelements.
	if (!Ex2Idx)
	return LookAheadHeuristics::ScoreConsecutiveExtracts;
	if (isUndefVector(EV2).all() && EV2->getType() == EV1->getType())
	return LookAheadHeuristics::ScoreConsecutiveExtracts;
	if (EV2 == EV1) {
	int Idx1 = Ex1Idx->getZExtValue();
	int Idx2 = Ex2Idx->getZExtValue();
	int Dist = Idx2 - Idx1;
	// The distance is too large - still may be profitable to use
	// shuffles.
	if (std::abs(Dist) == 0)
	return LookAheadHeuristics::ScoreSplat;
	if (std::abs(Dist) > NumLanes / 2)
	return LookAheadHeuristics::ScoreSameOpcode;
	return (Dist > 0) ? LookAheadHeuristics::ScoreConsecutiveExtracts
	: LookAheadHeuristics::ScoreReversedExtracts;
	}
	return LookAheadHeuristics::ScoreAltOpcodes;
	}
	return CheckSameEntryOrFail();
	}

	auto *I1 = dyn_cast<Instruction>(V1);
	auto *I2 = dyn_cast<Instruction>(V2);
	if (I1 && I2) {
	if (I1->getParent() != I2->getParent())
	return CheckSameEntryOrFail();
	SmallVector<Value *, 4> Ops(MainAltOps.begin(), MainAltOps.end());
	Ops.push_back(I1);
	Ops.push_back(I2);
	InstructionsState S = getSameOpcode(Ops, TLI);
	// Note: Only consider instructions with <= 2 operands to avoid
	// complexity explosion.
	if (S.getOpcode() &&
	(S.MainOp->getNumOperands() <= 2 \|\| !MainAltOps.empty() \|\|
	!S.isAltShuffle()) &&
	all_of(Ops, [&S](Value *V) {
	return cast<Instruction>(V)->getNumOperands() ==
	S.MainOp->getNumOperands();
	}))
	return S.isAltShuffle() ? LookAheadHeuristics::ScoreAltOpcodes
	: LookAheadHeuristics::ScoreSameOpcode;
	}

	if (isa<UndefValue>(V2))
	return LookAheadHeuristics::ScoreUndef;

	return CheckSameEntryOrFail();
	}

	/// Go through the operands of \p LHS and \p RHS recursively until
	/// MaxLevel, and return the cummulative score. \p U1 and \p U2 are
	/// the users of \p LHS and \p RHS (that is \p LHS and \p RHS are operands
	/// of \p U1 and \p U2), except at the beginning of the recursion where
	/// these are set to nullptr.
	///
	/// For example:
	/// \verbatim
	/// A[0] B[0] A[1] B[1] C[0] D[0] B[1] A[1]
	/// \ / \ / \ / \ /
	/// + + + +
	/// G1 G2 G3 G4
	/// \endverbatim
	/// The getScoreAtLevelRec(G1, G2) function will try to match the nodes at
	/// each level recursively, accumulating the score. It starts from matching
	/// the additions at level 0, then moves on to the loads (level 1). The
	/// score of G1 and G2 is higher than G1 and G3, because {A[0],A[1]} and
	/// {B[0],B[1]} match with LookAheadHeuristics::ScoreConsecutiveLoads, while
	/// {A[0],C[0]} has a score of LookAheadHeuristics::ScoreFail.
	/// Please note that the order of the operands does not matter, as we
	/// evaluate the score of all profitable combinations of operands. In
	/// other words the score of G1 and G4 is the same as G1 and G2. This
	/// heuristic is based on ideas described in:
	/// Look-ahead SLP: Auto-vectorization in the presence of commutative
	/// operations, CGO 2018 by Vasileios Porpodas, Rodrigo C. O. Rocha,
	/// Luís F. W. Góes
	int getScoreAtLevelRec(Value LHS, Value RHS, Instruction *U1,
	Instruction *U2, int CurrLevel,
	ArrayRef<Value *> MainAltOps) const {

	// Get the shallow score of V1 and V2.
	int ShallowScoreAtThisLevel =
	getShallowScore(LHS, RHS, U1, U2, MainAltOps);

	// If reached MaxLevel,
	// or if V1 and V2 are not instructions,
	// or if they are SPLAT,
	// or if they are not consecutive,
	// or if profitable to vectorize loads or extractelements, early return
	// the current cost.
	auto *I1 = dyn_cast<Instruction>(LHS);
	auto *I2 = dyn_cast<Instruction>(RHS);
	if (CurrLevel == MaxLevel \|\| !(I1 && I2) \|\| I1 == I2 \|\|
	ShallowScoreAtThisLevel == LookAheadHeuristics::ScoreFail \|\|
	(((isa<LoadInst>(I1) && isa<LoadInst>(I2)) \|\|
	(I1->getNumOperands() > 2 && I2->getNumOperands() > 2) \|\|
	(isa<ExtractElementInst>(I1) && isa<ExtractElementInst>(I2))) &&
	ShallowScoreAtThisLevel))
	return ShallowScoreAtThisLevel;
	assert(I1 && I2 && "Should have early exited.");

	// Contains the I2 operand indexes that got matched with I1 operands.
	SmallSet<unsigned, 4> Op2Used;

	// Recursion towards the operands of I1 and I2. We are trying all possible
	// operand pairs, and keeping track of the best score.
	for (unsigned OpIdx1 = 0, NumOperands1 = I1->getNumOperands();
	OpIdx1 != NumOperands1; ++OpIdx1) {
	// Try to pair op1I with the best operand of I2.
	int MaxTmpScore = 0;
	unsigned MaxOpIdx2 = 0;
	bool FoundBest = false;
	// If I2 is commutative try all combinations.
	unsigned FromIdx = isCommutative(I2) ? 0 : OpIdx1;
	unsigned ToIdx = isCommutative(I2)
	? I2->getNumOperands()
	: std::min(I2->getNumOperands(), OpIdx1 + 1);
	assert(FromIdx <= ToIdx && "Bad index");
	for (unsigned OpIdx2 = FromIdx; OpIdx2 != ToIdx; ++OpIdx2) {
	// Skip operands already paired with OpIdx1.
	if (Op2Used.count(OpIdx2))
	continue;
	// Recursively calculate the cost at each level
	int TmpScore =
	getScoreAtLevelRec(I1->getOperand(OpIdx1), I2->getOperand(OpIdx2),
	I1, I2, CurrLevel + 1, std::nullopt);
	// Look for the best score.
	if (TmpScore > LookAheadHeuristics::ScoreFail &&
	TmpScore > MaxTmpScore) {
	MaxTmpScore = TmpScore;
	MaxOpIdx2 = OpIdx2;
	FoundBest = true;
	}
	}
	if (FoundBest) {
	// Pair {OpIdx1, MaxOpIdx2} was found to be best. Never revisit it.
	Op2Used.insert(MaxOpIdx2);
	ShallowScoreAtThisLevel += MaxTmpScore;
	}
	}
	return ShallowScoreAtThisLevel;
	}
	};
	/// A helper data structure to hold the operands of a vector of instructions.
	/// This supports a fixed vector length for all operand vectors.
	class VLOperands {
	/// For each operand we need (i) the value, and (ii) the opcode that it
	/// would be attached to if the expression was in a left-linearized form.
	/// This is required to avoid illegal operand reordering.
	/// For example:
	/// \verbatim
	/// 0 Op1
	/// \|/
	/// Op1 Op2 Linearized + Op2
	/// \ / ----------> \|/
	/// - -
	///
	/// Op1 - Op2 (0 + Op1) - Op2
	/// \endverbatim
	///
	/// Value Op1 is attached to a '+' operation, and Op2 to a '-'.
	///
	/// Another way to think of this is to track all the operations across the
	/// path from the operand all the way to the root of the tree and to
	/// calculate the operation that corresponds to this path. For example, the
	/// path from Op2 to the root crosses the RHS of the '-', therefore the
	/// corresponding operation is a '-' (which matches the one in the
	/// linearized tree, as shown above).
	///
	/// For lack of a better term, we refer to this operation as Accumulated
	/// Path Operation (APO).
	struct OperandData {
	OperandData() = default;
	OperandData(Value *V, bool APO, bool IsUsed)
	: V(V), APO(APO), IsUsed(IsUsed) {}
	/// The operand value.
	Value *V = nullptr;
	/// TreeEntries only allow a single opcode, or an alternate sequence of
	/// them (e.g, +, -). Therefore, we can safely use a boolean value for the
	/// APO. It is set to 'true' if 'V' is attached to an inverse operation
	/// in the left-linearized form (e.g., Sub/Div), and 'false' otherwise
	/// (e.g., Add/Mul)
	bool APO = false;
	/// Helper data for the reordering function.
	bool IsUsed = false;
	};

	/// During operand reordering, we are trying to select the operand at lane
	/// that matches best with the operand at the neighboring lane. Our
	/// selection is based on the type of value we are looking for. For example,
	/// if the neighboring lane has a load, we need to look for a load that is
	/// accessing a consecutive address. These strategies are summarized in the
	/// 'ReorderingMode' enumerator.
	enum class ReorderingMode {
	Load, ///< Matching loads to consecutive memory addresses
	Opcode, ///< Matching instructions based on opcode (same or alternate)
	Constant, ///< Matching constants
	Splat, ///< Matching the same instruction multiple times (broadcast)
	Failed, ///< We failed to create a vectorizable group
	};

	using OperandDataVec = SmallVector<OperandData, 2>;

	/// A vector of operand vectors.
	SmallVector<OperandDataVec, 4> OpsVec;

	const TargetLibraryInfo &TLI;
	const DataLayout &DL;
	ScalarEvolution &SE;
	const BoUpSLP &R;
	const Loop *L = nullptr;

	/// \returns the operand data at \p OpIdx and \p Lane.
	OperandData &getData(unsigned OpIdx, unsigned Lane) {
	return OpsVec[OpIdx][Lane];
	}

	/// \returns the operand data at \p OpIdx and \p Lane. Const version.
	const OperandData &getData(unsigned OpIdx, unsigned Lane) const {
	return OpsVec[OpIdx][Lane];
	}

	/// Clears the used flag for all entries.
	void clearUsed() {
	for (unsigned OpIdx = 0, NumOperands = getNumOperands();
	OpIdx != NumOperands; ++OpIdx)
	for (unsigned Lane = 0, NumLanes = getNumLanes(); Lane != NumLanes;
	++Lane)
	OpsVec[OpIdx][Lane].IsUsed = false;
	}

	/// Swap the operand at \p OpIdx1 with that one at \p OpIdx2.
	void swap(unsigned OpIdx1, unsigned OpIdx2, unsigned Lane) {
	std::swap(OpsVec[OpIdx1][Lane], OpsVec[OpIdx2][Lane]);
	}

	/// \param Lane lane of the operands under analysis.
	/// \param OpIdx operand index in \p Lane lane we're looking the best
	/// candidate for.
	/// \param Idx operand index of the current candidate value.
	/// \returns The additional score due to possible broadcasting of the
	/// elements in the lane. It is more profitable to have power-of-2 unique
	/// elements in the lane, it will be vectorized with higher probability
	/// after removing duplicates. Currently the SLP vectorizer supports only
	/// vectorization of the power-of-2 number of unique scalars.
	int getSplatScore(unsigned Lane, unsigned OpIdx, unsigned Idx) const {
	Value *IdxLaneV = getData(Idx, Lane).V;
	if (!isa<Instruction>(IdxLaneV) \|\| IdxLaneV == getData(OpIdx, Lane).V)
	return 0;
	SmallPtrSet<Value *, 4> Uniques;
	for (unsigned Ln = 0, E = getNumLanes(); Ln < E; ++Ln) {
	if (Ln == Lane)
	continue;
	Value *OpIdxLnV = getData(OpIdx, Ln).V;
	if (!isa<Instruction>(OpIdxLnV))
	return 0;
	Uniques.insert(OpIdxLnV);
	}
	int UniquesCount = Uniques.size();
	int UniquesCntWithIdxLaneV =
	Uniques.contains(IdxLaneV) ? UniquesCount : UniquesCount + 1;
	Value *OpIdxLaneV = getData(OpIdx, Lane).V;
	int UniquesCntWithOpIdxLaneV =
	Uniques.contains(OpIdxLaneV) ? UniquesCount : UniquesCount + 1;
	if (UniquesCntWithIdxLaneV == UniquesCntWithOpIdxLaneV)
	return 0;
	return (PowerOf2Ceil(UniquesCntWithOpIdxLaneV) -
	UniquesCntWithOpIdxLaneV) -
	(PowerOf2Ceil(UniquesCntWithIdxLaneV) - UniquesCntWithIdxLaneV);
	}

	/// \param Lane lane of the operands under analysis.
	/// \param OpIdx operand index in \p Lane lane we're looking the best
	/// candidate for.
	/// \param Idx operand index of the current candidate value.
	/// \returns The additional score for the scalar which users are all
	/// vectorized.
	int getExternalUseScore(unsigned Lane, unsigned OpIdx, unsigned Idx) const {
	Value *IdxLaneV = getData(Idx, Lane).V;
	Value *OpIdxLaneV = getData(OpIdx, Lane).V;
	// Do not care about number of uses for vector-like instructions
	// (extractelement/extractvalue with constant indices), they are extracts
	// themselves and already externally used. Vectorization of such
	// instructions does not add extra extractelement instruction, just may
	// remove it.
	if (isVectorLikeInstWithConstOps(IdxLaneV) &&
	isVectorLikeInstWithConstOps(OpIdxLaneV))
	return LookAheadHeuristics::ScoreAllUserVectorized;
	auto *IdxLaneI = dyn_cast<Instruction>(IdxLaneV);
	if (!IdxLaneI \|\| !isa<Instruction>(OpIdxLaneV))
	return 0;
	return R.areAllUsersVectorized(IdxLaneI)
	? LookAheadHeuristics::ScoreAllUserVectorized
	: 0;
	}

	/// Score scaling factor for fully compatible instructions but with
	/// different number of external uses. Allows better selection of the
	/// instructions with less external uses.
	static const int ScoreScaleFactor = 10;

	/// \Returns the look-ahead score, which tells us how much the sub-trees
	/// rooted at \p LHS and \p RHS match, the more they match the higher the
	/// score. This helps break ties in an informed way when we cannot decide on
	/// the order of the operands by just considering the immediate
	/// predecessors.
	int getLookAheadScore(Value LHS, Value RHS, ArrayRef<Value *> MainAltOps,
	int Lane, unsigned OpIdx, unsigned Idx,
	bool &IsUsed) {
	LookAheadHeuristics LookAhead(TLI, DL, SE, R, getNumLanes(),
	LookAheadMaxDepth);
	// Keep track of the instruction stack as we recurse into the operands
	// during the look-ahead score exploration.
	int Score =
	LookAhead.getScoreAtLevelRec(LHS, RHS, /U1=/nullptr, /U2=/nullptr,
	/CurrLevel=/1, MainAltOps);
	if (Score) {
	int SplatScore = getSplatScore(Lane, OpIdx, Idx);
	if (Score <= -SplatScore) {
	// Set the minimum score for splat-like sequence to avoid setting
	// failed state.
	Score = 1;
	} else {
	Score += SplatScore;
	// Scale score to see the difference between different operands
	// and similar operands but all vectorized/not all vectorized
	// uses. It does not affect actual selection of the best
	// compatible operand in general, just allows to select the
	// operand with all vectorized uses.
	Score *= ScoreScaleFactor;
	Score += getExternalUseScore(Lane, OpIdx, Idx);
	IsUsed = true;
	}
	}
	return Score;
	}

	/// Best defined scores per lanes between the passes. Used to choose the
	/// best operand (with the highest score) between the passes.
	/// The key - {Operand Index, Lane}.
	/// The value - the best score between the passes for the lane and the
	/// operand.
	SmallDenseMap<std::pair<unsigned, unsigned>, unsigned, 8>
	BestScoresPerLanes;

	// Search all operands in Ops[*][Lane] for the one that matches best
	// Ops[OpIdx][LastLane] and return its opreand index.
	// If no good match can be found, return std::nullopt.
	std::optional<unsigned>
	getBestOperand(unsigned OpIdx, int Lane, int LastLane,
	ArrayRef<ReorderingMode> ReorderingModes,
	ArrayRef<Value *> MainAltOps) {
	unsigned NumOperands = getNumOperands();

	// The operand of the previous lane at OpIdx.
	Value *OpLastLane = getData(OpIdx, LastLane).V;

	// Our strategy mode for OpIdx.
	ReorderingMode RMode = ReorderingModes[OpIdx];
	if (RMode == ReorderingMode::Failed)
	return std::nullopt;

	// The linearized opcode of the operand at OpIdx, Lane.
	bool OpIdxAPO = getData(OpIdx, Lane).APO;

	// The best operand index and its score.
	// Sometimes we have more than one option (e.g., Opcode and Undefs), so we
	// are using the score to differentiate between the two.
	struct BestOpData {
	std::optional<unsigned> Idx;
	unsigned Score = 0;
	} BestOp;
	BestOp.Score =
	BestScoresPerLanes.try_emplace(std::make_pair(OpIdx, Lane), 0)
	.first->second;

	// Track if the operand must be marked as used. If the operand is set to
	// Score 1 explicitly (because of non power-of-2 unique scalars, we may
	// want to reestimate the operands again on the following iterations).
	bool IsUsed = RMode == ReorderingMode::Splat \|\|
	RMode == ReorderingMode::Constant \|\|
	RMode == ReorderingMode::Load;
	// Iterate through all unused operands and look for the best.
	for (unsigned Idx = 0; Idx != NumOperands; ++Idx) {
	// Get the operand at Idx and Lane.
	OperandData &OpData = getData(Idx, Lane);
	Value *Op = OpData.V;
	bool OpAPO = OpData.APO;

	// Skip already selected operands.
	if (OpData.IsUsed)
	continue;

	// Skip if we are trying to move the operand to a position with a
	// different opcode in the linearized tree form. This would break the
	// semantics.
	if (OpAPO != OpIdxAPO)
	continue;

	// Look for an operand that matches the current mode.
	switch (RMode) {
	case ReorderingMode::Load:
	case ReorderingMode::Opcode: {
	bool LeftToRight = Lane > LastLane;
	Value *OpLeft = (LeftToRight) ? OpLastLane : Op;
	Value *OpRight = (LeftToRight) ? Op : OpLastLane;
	int Score = getLookAheadScore(OpLeft, OpRight, MainAltOps, Lane,
	OpIdx, Idx, IsUsed);
	if (Score > static_cast<int>(BestOp.Score) \|\|
	(Score > 0 && Score == static_cast<int>(BestOp.Score) &&
	Idx == OpIdx)) {
	BestOp.Idx = Idx;
	BestOp.Score = Score;
	BestScoresPerLanes[std::make_pair(OpIdx, Lane)] = Score;
	}
	break;
	}
	case ReorderingMode::Constant:
	if (isa<Constant>(Op) \|\|
	(!BestOp.Score && L && L->isLoopInvariant(Op))) {
	BestOp.Idx = Idx;
	if (isa<Constant>(Op)) {
	BestOp.Score = LookAheadHeuristics::ScoreConstants;
	BestScoresPerLanes[std::make_pair(OpIdx, Lane)] =
	LookAheadHeuristics::ScoreConstants;
	}
	if (isa<UndefValue>(Op) \|\| !isa<Constant>(Op))
	IsUsed = false;
	}
	break;
	case ReorderingMode::Splat:
	if (Op == OpLastLane \|\| (!BestOp.Score && isa<Constant>(Op))) {
	IsUsed = Op == OpLastLane;
	if (Op == OpLastLane) {
	BestOp.Score = LookAheadHeuristics::ScoreSplat;
	BestScoresPerLanes[std::make_pair(OpIdx, Lane)] =
	LookAheadHeuristics::ScoreSplat;
	}
	BestOp.Idx = Idx;
	}
	break;
	case ReorderingMode::Failed:
	llvm_unreachable("Not expected Failed reordering mode.");
	}
	}

	if (BestOp.Idx) {
	getData(*BestOp.Idx, Lane).IsUsed = IsUsed;
	return BestOp.Idx;
	}
	// If we could not find a good match return std::nullopt.
	return std::nullopt;
	}

	/// Helper for reorderOperandVecs.
	/// \returns the lane that we should start reordering from. This is the one
	/// which has the least number of operands that can freely move about or
	/// less profitable because it already has the most optimal set of operands.
	unsigned getBestLaneToStartReordering() const {
	unsigned Min = UINT_MAX;
	unsigned SameOpNumber = 0;
	// std::pair<unsigned, unsigned> is used to implement a simple voting
	// algorithm and choose the lane with the least number of operands that
	// can freely move about or less profitable because it already has the
	// most optimal set of operands. The first unsigned is a counter for
	// voting, the second unsigned is the counter of lanes with instructions
	// with same/alternate opcodes and same parent basic block.
	MapVector<unsigned, std::pair<unsigned, unsigned>> HashMap;
	// Try to be closer to the original results, if we have multiple lanes
	// with same cost. If 2 lanes have the same cost, use the one with the
	// lowest index.
	for (int I = getNumLanes(); I > 0; --I) {
	unsigned Lane = I - 1;
	OperandsOrderData NumFreeOpsHash =
	getMaxNumOperandsThatCanBeReordered(Lane);
	// Compare the number of operands that can move and choose the one with
	// the least number.
	if (NumFreeOpsHash.NumOfAPOs < Min) {
	Min = NumFreeOpsHash.NumOfAPOs;
	SameOpNumber = NumFreeOpsHash.NumOpsWithSameOpcodeParent;
	HashMap.clear();
	HashMap[NumFreeOpsHash.Hash] = std::make_pair(1, Lane);
	} else if (NumFreeOpsHash.NumOfAPOs == Min &&
	NumFreeOpsHash.NumOpsWithSameOpcodeParent < SameOpNumber) {
	// Select the most optimal lane in terms of number of operands that
	// should be moved around.
	SameOpNumber = NumFreeOpsHash.NumOpsWithSameOpcodeParent;
	HashMap[NumFreeOpsHash.Hash] = std::make_pair(1, Lane);
	} else if (NumFreeOpsHash.NumOfAPOs == Min &&
	NumFreeOpsHash.NumOpsWithSameOpcodeParent == SameOpNumber) {
	auto *It = HashMap.find(NumFreeOpsHash.Hash);
	if (It == HashMap.end())
	HashMap[NumFreeOpsHash.Hash] = std::make_pair(1, Lane);
	else
	++It->second.first;
	}
	}
	// Select the lane with the minimum counter.
	unsigned BestLane = 0;
	unsigned CntMin = UINT_MAX;
	for (const auto &Data : reverse(HashMap)) {
	if (Data.second.first < CntMin) {
	CntMin = Data.second.first;
	BestLane = Data.second.second;
	}
	}
	return BestLane;
	}

	/// Data structure that helps to reorder operands.
	struct OperandsOrderData {
	/// The best number of operands with the same APOs, which can be
	/// reordered.
	unsigned NumOfAPOs = UINT_MAX;
	/// Number of operands with the same/alternate instruction opcode and
	/// parent.
	unsigned NumOpsWithSameOpcodeParent = 0;
	/// Hash for the actual operands ordering.
	/// Used to count operands, actually their position id and opcode
	/// value. It is used in the voting mechanism to find the lane with the
	/// least number of operands that can freely move about or less profitable
	/// because it already has the most optimal set of operands. Can be
	/// replaced with SmallVector<unsigned> instead but hash code is faster
	/// and requires less memory.
	unsigned Hash = 0;
	};
	/// \returns the maximum number of operands that are allowed to be reordered
	/// for \p Lane and the number of compatible instructions(with the same
	/// parent/opcode). This is used as a heuristic for selecting the first lane
	/// to start operand reordering.
	OperandsOrderData getMaxNumOperandsThatCanBeReordered(unsigned Lane) const {
	unsigned CntTrue = 0;
	unsigned NumOperands = getNumOperands();
	// Operands with the same APO can be reordered. We therefore need to count
	// how many of them we have for each APO, like this: Cnt[APO] = x.
	// Since we only have two APOs, namely true and false, we can avoid using
	// a map. Instead we can simply count the number of operands that
	// correspond to one of them (in this case the 'true' APO), and calculate
	// the other by subtracting it from the total number of operands.
	// Operands with the same instruction opcode and parent are more
	// profitable since we don't need to move them in many cases, with a high
	// probability such lane already can be vectorized effectively.
	bool AllUndefs = true;
	unsigned NumOpsWithSameOpcodeParent = 0;
	Instruction *OpcodeI = nullptr;
	BasicBlock *Parent = nullptr;
	unsigned Hash = 0;
	for (unsigned OpIdx = 0; OpIdx != NumOperands; ++OpIdx) {
	const OperandData &OpData = getData(OpIdx, Lane);
	if (OpData.APO)
	++CntTrue;
	// Use Boyer-Moore majority voting for finding the majority opcode and
	// the number of times it occurs.
	if (auto *I = dyn_cast<Instruction>(OpData.V)) {
	if (!OpcodeI \|\| !getSameOpcode({OpcodeI, I}, TLI).getOpcode() \|\|
	I->getParent() != Parent) {
	if (NumOpsWithSameOpcodeParent == 0) {
	NumOpsWithSameOpcodeParent = 1;
	OpcodeI = I;
	Parent = I->getParent();
	} else {
	--NumOpsWithSameOpcodeParent;
	}
	} else {
	++NumOpsWithSameOpcodeParent;
	}
	}
	Hash = hash_combine(
	Hash, hash_value((OpIdx + 1) * (OpData.V->getValueID() + 1)));
	AllUndefs = AllUndefs && isa<UndefValue>(OpData.V);
	}
	if (AllUndefs)
	return {};
	OperandsOrderData Data;
	Data.NumOfAPOs = std::max(CntTrue, NumOperands - CntTrue);
	Data.NumOpsWithSameOpcodeParent = NumOpsWithSameOpcodeParent;
	Data.Hash = Hash;
	return Data;
	}

	/// Go through the instructions in VL and append their operands.
	void appendOperandsOfVL(ArrayRef<Value *> VL) {
	assert(!VL.empty() && "Bad VL");
	assert((empty() \|\| VL.size() == getNumLanes()) &&
	"Expected same number of lanes");
	assert(isa<Instruction>(VL[0]) && "Expected instruction");
	unsigned NumOperands = cast<Instruction>(VL[0])->getNumOperands();
	constexpr unsigned IntrinsicNumOperands = 2;
	if (isa<IntrinsicInst>(VL[0]))
	NumOperands = IntrinsicNumOperands;
	OpsVec.resize(NumOperands);
	unsigned NumLanes = VL.size();
	for (unsigned OpIdx = 0; OpIdx != NumOperands; ++OpIdx) {
	OpsVec[OpIdx].resize(NumLanes);
	for (unsigned Lane = 0; Lane != NumLanes; ++Lane) {
	assert(isa<Instruction>(VL[Lane]) && "Expected instruction");
	// Our tree has just 3 nodes: the root and two operands.
	// It is therefore trivial to get the APO. We only need to check the
	// opcode of VL[Lane] and whether the operand at OpIdx is the LHS or
	// RHS operand. The LHS operand of both add and sub is never attached
	// to an inversese operation in the linearized form, therefore its APO
	// is false. The RHS is true only if VL[Lane] is an inverse operation.

	// Since operand reordering is performed on groups of commutative
	// operations or alternating sequences (e.g., +, -), we can safely
	// tell the inverse operations by checking commutativity.
	bool IsInverseOperation = !isCommutative(cast<Instruction>(VL[Lane]));
	bool APO = (OpIdx == 0) ? false : IsInverseOperation;
	OpsVec[OpIdx][Lane] = {cast<Instruction>(VL[Lane])->getOperand(OpIdx),
	APO, false};
	}
	}
	}

	/// \returns the number of operands.
	unsigned getNumOperands() const { return OpsVec.size(); }

	/// \returns the number of lanes.
	unsigned getNumLanes() const { return OpsVec[0].size(); }

	/// \returns the operand value at \p OpIdx and \p Lane.
	Value *getValue(unsigned OpIdx, unsigned Lane) const {
	return getData(OpIdx, Lane).V;
	}

	/// \returns true if the data structure is empty.
	bool empty() const { return OpsVec.empty(); }

	/// Clears the data.
	void clear() { OpsVec.clear(); }

	/// \Returns true if there are enough operands identical to \p Op to fill
	/// the whole vector (it is mixed with constants or loop invariant values).
	/// Note: This modifies the 'IsUsed' flag, so a cleanUsed() must follow.
	bool shouldBroadcast(Value *Op, unsigned OpIdx, unsigned Lane) {
	bool OpAPO = getData(OpIdx, Lane).APO;
	bool IsInvariant = L && L->isLoopInvariant(Op);
	unsigned Cnt = 0;
	for (unsigned Ln = 0, Lns = getNumLanes(); Ln != Lns; ++Ln) {
	if (Ln == Lane)
	continue;
	// This is set to true if we found a candidate for broadcast at Lane.
	bool FoundCandidate = false;
	for (unsigned OpI = 0, OpE = getNumOperands(); OpI != OpE; ++OpI) {
	OperandData &Data = getData(OpI, Ln);
	if (Data.APO != OpAPO \|\| Data.IsUsed)
	continue;
	Value *OpILane = getValue(OpI, Lane);
	bool IsConstantOp = isa<Constant>(OpILane);
	// Consider the broadcast candidate if:
	// 1. Same value is found in one of the operands.
	if (Data.V == Op \|\|
	// 2. The operand in the given lane is not constant but there is a
	// constant operand in another lane (which can be moved to the
	// given lane). In this case we can represent it as a simple
	// permutation of constant and broadcast.
	(!IsConstantOp &&
	((Lns > 2 && isa<Constant>(Data.V)) \|\|
	// 2.1. If we have only 2 lanes, need to check that value in the
	// next lane does not build same opcode sequence.
	(Lns == 2 &&
	!getSameOpcode({Op, getValue((OpI + 1) % OpE, Ln)}, TLI)
	.getOpcode() &&
	isa<Constant>(Data.V)))) \|\|
	// 3. The operand in the current lane is loop invariant (can be
	// hoisted out) and another operand is also a loop invariant
	// (though not a constant). In this case the whole vector can be
	// hoisted out.
	// FIXME: need to teach the cost model about this case for better
	// estimation.
	(IsInvariant && !isa<Constant>(Data.V) &&
	!getSameOpcode({Op, Data.V}, TLI).getOpcode() &&
	L->isLoopInvariant(Data.V))) {
	FoundCandidate = true;
	Data.IsUsed = Data.V == Op;
	if (Data.V == Op)
	++Cnt;
	break;
	}
	}
	if (!FoundCandidate)
	return false;
	}
	return getNumLanes() == 2 \|\| Cnt > 1;
	}

	/// Checks if there is at least single compatible operand in lanes other
	/// than \p Lane, compatible with the operand \p Op.
	bool canBeVectorized(Instruction *Op, unsigned OpIdx, unsigned Lane) const {
	bool OpAPO = getData(OpIdx, Lane).APO;
	for (unsigned Ln = 0, Lns = getNumLanes(); Ln != Lns; ++Ln) {
	if (Ln == Lane)
	continue;
	if (any_of(seq<unsigned>(getNumOperands()), [&](unsigned OpI) {
	const OperandData &Data = getData(OpI, Ln);
	if (Data.APO != OpAPO \|\| Data.IsUsed)
	return true;
	Value *OpILn = getValue(OpI, Ln);
	return (L && L->isLoopInvariant(OpILn)) \|\|
	(getSameOpcode({Op, OpILn}, TLI).getOpcode() &&
	Op->getParent() == cast<Instruction>(OpILn)->getParent());
	}))
	return true;
	}
	return false;
	}

	public:
	/// Initialize with all the operands of the instruction vector \p RootVL.
	VLOperands(ArrayRef<Value *> RootVL, const BoUpSLP &R)
	: TLI(R.TLI), DL(R.DL), SE(*R.SE), R(R),
	L(R.LI->getLoopFor(
	(cast<Instruction>(RootVL.front())->getParent()))) {
	// Append all the operands of RootVL.
	appendOperandsOfVL(RootVL);
	}

	/// \Returns a value vector with the operands across all lanes for the
	/// opearnd at \p OpIdx.
	ValueList getVL(unsigned OpIdx) const {
	ValueList OpVL(OpsVec[OpIdx].size());
	assert(OpsVec[OpIdx].size() == getNumLanes() &&
	"Expected same num of lanes across all operands");
	for (unsigned Lane = 0, Lanes = getNumLanes(); Lane != Lanes; ++Lane)
	OpVL[Lane] = OpsVec[OpIdx][Lane].V;
	return OpVL;
	}

	// Performs operand reordering for 2 or more operands.
	// The original operands are in OrigOps[OpIdx][Lane].
	// The reordered operands are returned in 'SortedOps[OpIdx][Lane]'.
	void reorder() {
	unsigned NumOperands = getNumOperands();
	unsigned NumLanes = getNumLanes();
	// Each operand has its own mode. We are using this mode to help us select
	// the instructions for each lane, so that they match best with the ones
	// we have selected so far.
	SmallVector<ReorderingMode, 2> ReorderingModes(NumOperands);

	// This is a greedy single-pass algorithm. We are going over each lane
	// once and deciding on the best order right away with no back-tracking.
	// However, in order to increase its effectiveness, we start with the lane
	// that has operands that can move the least. For example, given the
	// following lanes:
	// Lane 0 : A[0] = B[0] + C[0] // Visited 3rd
	// Lane 1 : A[1] = C[1] - B[1] // Visited 1st
	// Lane 2 : A[2] = B[2] + C[2] // Visited 2nd
	// Lane 3 : A[3] = C[3] - B[3] // Visited 4th
	// we will start at Lane 1, since the operands of the subtraction cannot
	// be reordered. Then we will visit the rest of the lanes in a circular
	// fashion. That is, Lanes 2, then Lane 0, and finally Lane 3.

	// Find the first lane that we will start our search from.
	unsigned FirstLane = getBestLaneToStartReordering();

	// Initialize the modes.
	for (unsigned OpIdx = 0; OpIdx != NumOperands; ++OpIdx) {
	Value *OpLane0 = getValue(OpIdx, FirstLane);
	// Keep track if we have instructions with all the same opcode on one
	// side.
	if (isa<LoadInst>(OpLane0))
	ReorderingModes[OpIdx] = ReorderingMode::Load;
	else if (auto *OpILane0 = dyn_cast<Instruction>(OpLane0)) {
	// Check if OpLane0 should be broadcast.
	if (shouldBroadcast(OpLane0, OpIdx, FirstLane) \|\|
	!canBeVectorized(OpILane0, OpIdx, FirstLane))
	ReorderingModes[OpIdx] = ReorderingMode::Splat;
	else
	ReorderingModes[OpIdx] = ReorderingMode::Opcode;
	} else if (isa<Constant>(OpLane0))
	ReorderingModes[OpIdx] = ReorderingMode::Constant;
	else if (isa<Argument>(OpLane0))
	// Our best hope is a Splat. It may save some cost in some cases.
	ReorderingModes[OpIdx] = ReorderingMode::Splat;
	else
	// NOTE: This should be unreachable.
	ReorderingModes[OpIdx] = ReorderingMode::Failed;
	}

	// Check that we don't have same operands. No need to reorder if operands
	// are just perfect diamond or shuffled diamond match. Do not do it only
	// for possible broadcasts or non-power of 2 number of scalars (just for
	// now).
	auto &&SkipReordering = [this]() {
	SmallPtrSet<Value *, 4> UniqueValues;
	ArrayRef<OperandData> Op0 = OpsVec.front();
	for (const OperandData &Data : Op0)
	UniqueValues.insert(Data.V);
	for (ArrayRef<OperandData> Op : drop_begin(OpsVec, 1)) {
	if (any_of(Op, [&UniqueValues](const OperandData &Data) {
	return !UniqueValues.contains(Data.V);
	}))
	return false;
	}
	// TODO: Check if we can remove a check for non-power-2 number of
	// scalars after full support of non-power-2 vectorization.
	return UniqueValues.size() != 2 && isPowerOf2_32(UniqueValues.size());
	};

	// If the initial strategy fails for any of the operand indexes, then we
	// perform reordering again in a second pass. This helps avoid assigning
	// high priority to the failed strategy, and should improve reordering for
	// the non-failed operand indexes.
	for (int Pass = 0; Pass != 2; ++Pass) {
	// Check if no need to reorder operands since they're are perfect or
	// shuffled diamond match.
	// Need to do it to avoid extra external use cost counting for
	// shuffled matches, which may cause regressions.
	if (SkipReordering())
	break;
	// Skip the second pass if the first pass did not fail.
	bool StrategyFailed = false;
	// Mark all operand data as free to use.
	clearUsed();
	// We keep the original operand order for the FirstLane, so reorder the
	// rest of the lanes. We are visiting the nodes in a circular fashion,
	// using FirstLane as the center point and increasing the radius
	// distance.
	SmallVector<SmallVector<Value *, 2>> MainAltOps(NumOperands);
	for (unsigned I = 0; I < NumOperands; ++I)
	MainAltOps[I].push_back(getData(I, FirstLane).V);

	for (unsigned Distance = 1; Distance != NumLanes; ++Distance) {
	// Visit the lane on the right and then the lane on the left.
	for (int Direction : {+1, -1}) {
	int Lane = FirstLane + Direction * Distance;
	if (Lane < 0 \|\| Lane >= (int)NumLanes)
	continue;
	int LastLane = Lane - Direction;
	assert(LastLane >= 0 && LastLane < (int)NumLanes &&
	"Out of bounds");
	// Look for a good match for each operand.
	for (unsigned OpIdx = 0; OpIdx != NumOperands; ++OpIdx) {
	// Search for the operand that matches SortedOps[OpIdx][Lane-1].
	std::optional<unsigned> BestIdx = getBestOperand(
	OpIdx, Lane, LastLane, ReorderingModes, MainAltOps[OpIdx]);
	// By not selecting a value, we allow the operands that follow to
	// select a better matching value. We will get a non-null value in
	// the next run of getBestOperand().
	if (BestIdx) {
	// Swap the current operand with the one returned by
	// getBestOperand().
	swap(OpIdx, *BestIdx, Lane);
	} else {
	// Enable the second pass.
	StrategyFailed = true;
	}
	// Try to get the alternate opcode and follow it during analysis.
	if (MainAltOps[OpIdx].size() != 2) {
	OperandData &AltOp = getData(OpIdx, Lane);
	InstructionsState OpS =
	getSameOpcode({MainAltOps[OpIdx].front(), AltOp.V}, TLI);
	if (OpS.getOpcode() && OpS.isAltShuffle())
	MainAltOps[OpIdx].push_back(AltOp.V);
	}
	}
	}
	}
	// Skip second pass if the strategy did not fail.
	if (!StrategyFailed)
	break;
	}
	}

	#if !defined(NDEBUG) \|\| defined(LLVM_ENABLE_DUMP)
	LLVM_DUMP_METHOD static StringRef getModeStr(ReorderingMode RMode) {
	switch (RMode) {
	case ReorderingMode::Load:
	return "Load";
	case ReorderingMode::Opcode:
	return "Opcode";
	case ReorderingMode::Constant:
	return "Constant";
	case ReorderingMode::Splat:
	return "Splat";
	case ReorderingMode::Failed:
	return "Failed";
	}
	llvm_unreachable("Unimplemented Reordering Type");
	}

	LLVM_DUMP_METHOD static raw_ostream &printMode(ReorderingMode RMode,
	raw_ostream &OS) {
	return OS << getModeStr(RMode);
	}

	/// Debug print.
	LLVM_DUMP_METHOD static void dumpMode(ReorderingMode RMode) {
	printMode(RMode, dbgs());
	}

	friend raw_ostream &operator<<(raw_ostream &OS, ReorderingMode RMode) {
	return printMode(RMode, OS);
	}

	LLVM_DUMP_METHOD raw_ostream &print(raw_ostream &OS) const {
	const unsigned Indent = 2;
	unsigned Cnt = 0;
	for (const OperandDataVec &OpDataVec : OpsVec) {
	OS << "Operand " << Cnt++ << "\n";
	for (const OperandData &OpData : OpDataVec) {
	OS.indent(Indent) << "{";
	if (Value *V = OpData.V)
	OS << *V;
	else
	OS << "null";
	OS << ", APO:" << OpData.APO << "}\n";
	}
	OS << "\n";
	}
	return OS;
	}

	/// Debug print.
	LLVM_DUMP_METHOD void dump() const { print(dbgs()); }
	#endif
	};

	/// Evaluate each pair in \p Candidates and return index into \p Candidates
	/// for a pair which have highest score deemed to have best chance to form
	/// root of profitable tree to vectorize. Return std::nullopt if no candidate
	/// scored above the LookAheadHeuristics::ScoreFail. \param Limit Lower limit
	/// of the cost, considered to be good enough score.
	std::optional<int>
	findBestRootPair(ArrayRef<std::pair<Value , Value >> Candidates,
	int Limit = LookAheadHeuristics::ScoreFail) const {
	LookAheadHeuristics LookAhead(TLI, DL, SE, this, /NumLanes=/2,
	RootLookAheadMaxDepth);
	int BestScore = Limit;
	std::optional<int> Index;
	for (int I : seq<int>(0, Candidates.size())) {
	int Score = LookAhead.getScoreAtLevelRec(Candidates[I].first,
	Candidates[I].second,
	/U1=/nullptr, /U2=/nullptr,
	/Level=/1, std::nullopt);
	if (Score > BestScore) {
	BestScore = Score;
	Index = I;
	}
	}
	return Index;
	}

	/// Checks if the instruction is marked for deletion.
	bool isDeleted(Instruction *I) const { return DeletedInstructions.count(I); }

	/// Removes an instruction from its block and eventually deletes it.
	/// It's like Instruction::eraseFromParent() except that the actual deletion
	/// is delayed until BoUpSLP is destructed.
	void eraseInstruction(Instruction *I) {
	DeletedInstructions.insert(I);
	}

	/// Remove instructions from the parent function and clear the operands of \p
	/// DeadVals instructions, marking for deletion trivially dead operands.
	template <typename T>
	void removeInstructionsAndOperands(ArrayRef<T *> DeadVals) {
	SmallVector<WeakTrackingVH> DeadInsts;
	for (T *V : DeadVals) {
	auto *I = cast<Instruction>(V);
	DeletedInstructions.insert(I);
	}
	DenseSet<Value *> Processed;
	for (T *V : DeadVals) {
	if (!V \|\| !Processed.insert(V).second)
	continue;
	auto *I = cast<Instruction>(V);
	salvageDebugInfo(*I);
	SmallVector<const TreeEntry *> Entries;
	if (const TreeEntry *Entry = getTreeEntry(I)) {
	Entries.push_back(Entry);
	auto It = MultiNodeScalars.find(I);
	if (It != MultiNodeScalars.end())
	Entries.append(It->second.begin(), It->second.end());
	}
	for (Use &U : I->operands()) {
	if (auto *OpI = dyn_cast_if_present<Instruction>(U.get());
	OpI && !DeletedInstructions.contains(OpI) && OpI->hasOneUser() &&
	wouldInstructionBeTriviallyDead(OpI, TLI) &&
	(Entries.empty() \|\| none_of(Entries, [&](const TreeEntry *Entry) {
	return Entry->VectorizedValue == OpI;
	})))
	DeadInsts.push_back(OpI);
	}
	I->dropAllReferences();
	}
	for (T *V : DeadVals) {
	auto *I = cast<Instruction>(V);
	if (!I->getParent())
	continue;
	assert((I->use_empty() \|\| all_of(I->uses(),
	[&](Use &U) {
	return isDeleted(
	cast<Instruction>(U.getUser()));
	})) &&
	"trying to erase instruction with users.");
	I->removeFromParent();
	SE->forgetValue(I);
	}
	// Process the dead instruction list until empty.
	while (!DeadInsts.empty()) {
	Value *V = DeadInsts.pop_back_val();
	Instruction *VI = cast_or_null<Instruction>(V);
	if (!VI \|\| !VI->getParent())
	continue;
	assert(isInstructionTriviallyDead(VI, TLI) &&
	"Live instruction found in dead worklist!");
	assert(VI->use_empty() && "Instructions with uses are not dead.");

	// Don't lose the debug info while deleting the instructions.
	salvageDebugInfo(*VI);

	// Null out all of the instruction's operands to see if any operand
	// becomes dead as we go.
	for (Use &OpU : VI->operands()) {
	Value *OpV = OpU.get();
	if (!OpV)
	continue;
	OpU.set(nullptr);

	if (!OpV->use_empty())
	continue;

	// If the operand is an instruction that became dead as we nulled out
	// the operand, and if it is 'trivially' dead, delete it in a future
	// loop iteration.
	if (auto *OpI = dyn_cast<Instruction>(OpV))
	if (!DeletedInstructions.contains(OpI) &&
	isInstructionTriviallyDead(OpI, TLI))
	DeadInsts.push_back(OpI);
	}

	VI->removeFromParent();
	DeletedInstructions.insert(VI);
	SE->forgetValue(VI);
	}
	}

	/// Checks if the instruction was already analyzed for being possible
	/// reduction root.
	bool isAnalyzedReductionRoot(Instruction *I) const {
	return AnalyzedReductionsRoots.count(I);
	}
	/// Register given instruction as already analyzed for being possible
	/// reduction root.
	void analyzedReductionRoot(Instruction *I) {
	AnalyzedReductionsRoots.insert(I);
	}
	/// Checks if the provided list of reduced values was checked already for
	/// vectorization.
	bool areAnalyzedReductionVals(ArrayRef<Value *> VL) const {
	return AnalyzedReductionVals.contains(hash_value(VL));
	}
	/// Adds the list of reduced values to list of already checked values for the
	/// vectorization.
	void analyzedReductionVals(ArrayRef<Value *> VL) {
	AnalyzedReductionVals.insert(hash_value(VL));
	}
	/// Clear the list of the analyzed reduction root instructions.
	void clearReductionData() {
	AnalyzedReductionsRoots.clear();
	AnalyzedReductionVals.clear();
	AnalyzedMinBWVals.clear();
	}
	/// Checks if the given value is gathered in one of the nodes.
	bool isAnyGathered(const SmallDenseSet<Value *> &Vals) const {
	return any_of(MustGather, [&](Value *V) { return Vals.contains(V); });
	}
	/// Checks if the given value is gathered in one of the nodes.
	bool isGathered(const Value *V) const {
	return MustGather.contains(V);
	}
	/// Checks if the specified value was not schedule.
	bool isNotScheduled(const Value *V) const {
	return NonScheduledFirst.contains(V);
	}

	/// Check if the value is vectorized in the tree.
	bool isVectorized(Value *V) const { return getTreeEntry(V); }

	~BoUpSLP();

	private:
	/// Determine if a node \p E in can be demoted to a smaller type with a
	/// truncation. We collect the entries that will be demoted in ToDemote.
	/// \param E Node for analysis
	/// \param ToDemote indices of the nodes to be demoted.
	bool collectValuesToDemote(const TreeEntry &E, bool IsProfitableToDemoteRoot,
	unsigned &BitWidth,
	SmallVectorImpl<unsigned> &ToDemote,
	DenseSet<const TreeEntry *> &Visited,
	unsigned &MaxDepthLevel,
	bool &IsProfitableToDemote,
	bool IsTruncRoot) const;

	/// Check if the operands on the edges \p Edges of the \p UserTE allows
	/// reordering (i.e. the operands can be reordered because they have only one
	/// user and reordarable).
	/// \param ReorderableGathers List of all gather nodes that require reordering
	/// (e.g., gather of extractlements or partially vectorizable loads).
	/// \param GatherOps List of gather operand nodes for \p UserTE that require
	/// reordering, subset of \p NonVectorized.
	bool
	canReorderOperands(TreeEntry *UserTE,
	SmallVectorImpl<std::pair<unsigned, TreeEntry *>> &Edges,
	ArrayRef<TreeEntry *> ReorderableGathers,
	SmallVectorImpl<TreeEntry *> &GatherOps);

	/// Checks if the given \p TE is a gather node with clustered reused scalars
	/// and reorders it per given \p Mask.
	void reorderNodeWithReuses(TreeEntry &TE, ArrayRef<int> Mask) const;

	/// Returns vectorized operand \p OpIdx of the node \p UserTE from the graph,
	/// if any. If it is not vectorized (gather node), returns nullptr.
	TreeEntry getVectorizedOperand(TreeEntry UserTE, unsigned OpIdx) {
	ArrayRef<Value *> VL = UserTE->getOperand(OpIdx);
	TreeEntry *TE = nullptr;
	const auto It = find_if(VL, [&](Value V) {
	TE = getTreeEntry(V);
	if (TE && is_contained(TE->UserTreeIndices, EdgeInfo(UserTE, OpIdx)))
	return true;
	auto It = MultiNodeScalars.find(V);
	if (It != MultiNodeScalars.end()) {
	for (TreeEntry *E : It->second) {
	if (is_contained(E->UserTreeIndices, EdgeInfo(UserTE, OpIdx))) {
	TE = E;
	return true;
	}
	}
	}
	return false;
	});
	if (It != VL.end()) {
	assert(TE->isSame(VL) && "Expected same scalars.");
	return TE;
	}
	return nullptr;
	}

	/// Returns vectorized operand \p OpIdx of the node \p UserTE from the graph,
	/// if any. If it is not vectorized (gather node), returns nullptr.
	const TreeEntry getVectorizedOperand(const TreeEntry UserTE,
	unsigned OpIdx) const {
	return const_cast<BoUpSLP *>(this)->getVectorizedOperand(
	const_cast<TreeEntry *>(UserTE), OpIdx);
	}

	/// Checks if all users of \p I are the part of the vectorization tree.
	bool areAllUsersVectorized(
	Instruction *I,
	const SmallDenseSet<Value > VectorizedVals = nullptr) const;

	/// Return information about the vector formed for the specified index
	/// of a vector of (the same) instruction.
	TargetTransformInfo::OperandValueInfo getOperandInfo(ArrayRef<Value *> Ops);

	/// \ returns the graph entry for the \p Idx operand of the \p E entry.
	const TreeEntry getOperandEntry(const TreeEntry E, unsigned Idx) const;

	/// \returns Cast context for the given graph node.
	TargetTransformInfo::CastContextHint
	getCastContextHint(const TreeEntry &TE) const;

	/// \returns the cost of the vectorizable entry.
	InstructionCost getEntryCost(const TreeEntry *E,
	ArrayRef<Value *> VectorizedVals,
	SmallPtrSetImpl<Value *> &CheckedExtracts);

	/// This is the recursive part of buildTree.
	void buildTree_rec(ArrayRef<Value *> Roots, unsigned Depth,
	const EdgeInfo &EI);

	/// \returns true if the ExtractElement/ExtractValue instructions in \p VL can
	/// be vectorized to use the original vector (or aggregate "bitcast" to a
	/// vector) and sets \p CurrentOrder to the identity permutation; otherwise
	/// returns false, setting \p CurrentOrder to either an empty vector or a
	/// non-identity permutation that allows to reuse extract instructions.
	/// \param ResizeAllowed indicates whether it is allowed to handle subvector
	/// extract order.
	bool canReuseExtract(ArrayRef<Value > VL, Value OpValue,
	SmallVectorImpl<unsigned> &CurrentOrder,
	bool ResizeAllowed = false) const;

	/// Vectorize a single entry in the tree.
	/// \param PostponedPHIs true, if need to postpone emission of phi nodes to
	/// avoid issues with def-use order.
	Value vectorizeTree(TreeEntry E, bool PostponedPHIs);

	/// Vectorize a single entry in the tree, the \p Idx-th operand of the entry
	/// \p E.
	/// \param PostponedPHIs true, if need to postpone emission of phi nodes to
	/// avoid issues with def-use order.
	Value vectorizeOperand(TreeEntry E, unsigned NodeIdx, bool PostponedPHIs);

	/// Create a new vector from a list of scalar values. Produces a sequence
	/// which exploits values reused across lanes, and arranges the inserts
	/// for ease of later optimization.
	template <typename BVTy, typename ResTy, typename... Args>
	ResTy processBuildVector(const TreeEntry E, Type ScalarTy, Args &...Params);

	/// Create a new vector from a list of scalar values. Produces a sequence
	/// which exploits values reused across lanes, and arranges the inserts
	/// for ease of later optimization.
	Value createBuildVector(const TreeEntry E, Type *ScalarTy);

	/// Returns the instruction in the bundle, which can be used as a base point
	/// for scheduling. Usually it is the last instruction in the bundle, except
	/// for the case when all operands are external (in this case, it is the first
	/// instruction in the list).
	Instruction &getLastInstructionInBundle(const TreeEntry *E);

	/// Tries to find extractelement instructions with constant indices from fixed
	/// vector type and gather such instructions into a bunch, which highly likely
	/// might be detected as a shuffle of 1 or 2 input vectors. If this attempt
	/// was successful, the matched scalars are replaced by poison values in \p VL
	/// for future analysis.
	std::optional<TargetTransformInfo::ShuffleKind>
	tryToGatherSingleRegisterExtractElements(MutableArrayRef<Value *> VL,
	SmallVectorImpl<int> &Mask) const;

	/// Tries to find extractelement instructions with constant indices from fixed
	/// vector type and gather such instructions into a bunch, which highly likely
	/// might be detected as a shuffle of 1 or 2 input vectors. If this attempt
	/// was successful, the matched scalars are replaced by poison values in \p VL
	/// for future analysis.
	SmallVector<std::optional<TargetTransformInfo::ShuffleKind>>
	tryToGatherExtractElements(SmallVectorImpl<Value *> &VL,
	SmallVectorImpl<int> &Mask,
	unsigned NumParts) const;

	/// Checks if the gathered \p VL can be represented as a single register
	/// shuffle(s) of previous tree entries.
	/// \param TE Tree entry checked for permutation.
	/// \param VL List of scalars (a subset of the TE scalar), checked for
	/// permutations. Must form single-register vector.
	/// \param ForOrder Tries to fetch the best candidates for ordering info. Also
	/// commands to build the mask using the original vector value, without
	/// relying on the potential reordering.
	/// \returns ShuffleKind, if gathered values can be represented as shuffles of
	/// previous tree entries. \p Part of \p Mask is filled with the shuffle mask.
	std::optional<TargetTransformInfo::ShuffleKind>
	isGatherShuffledSingleRegisterEntry(
	const TreeEntry TE, ArrayRef<Value > VL, MutableArrayRef<int> Mask,
	SmallVectorImpl<const TreeEntry *> &Entries, unsigned Part,
	bool ForOrder);

	/// Checks if the gathered \p VL can be represented as multi-register
	/// shuffle(s) of previous tree entries.
	/// \param TE Tree entry checked for permutation.
	/// \param VL List of scalars (a subset of the TE scalar), checked for
	/// permutations.
	/// \param ForOrder Tries to fetch the best candidates for ordering info. Also
	/// commands to build the mask using the original vector value, without
	/// relying on the potential reordering.
	/// \returns per-register series of ShuffleKind, if gathered values can be
	/// represented as shuffles of previous tree entries. \p Mask is filled with
	/// the shuffle mask (also on per-register base).
	SmallVector<std::optional<TargetTransformInfo::ShuffleKind>>
	isGatherShuffledEntry(
	const TreeEntry TE, ArrayRef<Value > VL, SmallVectorImpl<int> &Mask,
	SmallVectorImpl<SmallVector<const TreeEntry *>> &Entries,
	unsigned NumParts, bool ForOrder = false);

	/// \returns the scalarization cost for this list of values. Assuming that
	/// this subtree gets vectorized, we may need to extract the values from the
	/// roots. This method calculates the cost of extracting the values.
	/// \param ForPoisonSrc true if initial vector is poison, false otherwise.
	InstructionCost getGatherCost(ArrayRef<Value *> VL, bool ForPoisonSrc,
	Type *ScalarTy) const;

	/// Set the Builder insert point to one after the last instruction in
	/// the bundle
	void setInsertPointAfterBundle(const TreeEntry *E);

	/// \returns a vector from a collection of scalars in \p VL. if \p Root is not
	/// specified, the starting vector value is poison.
	Value gather(ArrayRef<Value > VL, Value Root, Type ScalarTy);

	/// \returns whether the VectorizableTree is fully vectorizable and will
	/// be beneficial even the tree height is tiny.
	bool isFullyVectorizableTinyTree(bool ForReduction) const;

	/// Reorder commutative or alt operands to get better probability of
	/// generating vectorized code.
	static void reorderInputsAccordingToOpcode(ArrayRef<Value *> VL,
	SmallVectorImpl<Value *> &Left,
	SmallVectorImpl<Value *> &Right,
	const BoUpSLP &R);

	/// Helper for `findExternalStoreUsersReorderIndices()`. It iterates over the
	/// users of \p TE and collects the stores. It returns the map from the store
	/// pointers to the collected stores.
	DenseMap<Value , SmallVector<StoreInst >>
	collectUserStores(const BoUpSLP::TreeEntry *TE) const;

	/// Helper for `findExternalStoreUsersReorderIndices()`. It checks if the
	/// stores in \p StoresVec can form a vector instruction. If so it returns
	/// true and populates \p ReorderIndices with the shuffle indices of the
	/// stores when compared to the sorted vector.
	bool canFormVector(ArrayRef<StoreInst *> StoresVec,
	OrdersType &ReorderIndices) const;

	/// Iterates through the users of \p TE, looking for scalar stores that can be
	/// potentially vectorized in a future SLP-tree. If found, it keeps track of
	/// their order and builds an order index vector for each store bundle. It
	/// returns all these order vectors found.
	/// We run this after the tree has formed, otherwise we may come across user
	/// instructions that are not yet in the tree.
	SmallVector<OrdersType, 1>
	findExternalStoreUsersReorderIndices(TreeEntry *TE) const;

	struct TreeEntry {
	using VecTreeTy = SmallVector<std::unique_ptr<TreeEntry>, 8>;
	TreeEntry(VecTreeTy &Container) : Container(Container) {}

	/// \returns Common mask for reorder indices and reused scalars.
	SmallVector<int> getCommonMask() const {
	SmallVector<int> Mask;
	inversePermutation(ReorderIndices, Mask);
	::addMask(Mask, ReuseShuffleIndices);
	return Mask;
	}

	/// \returns true if the scalars in VL are equal to this entry.
	bool isSame(ArrayRef<Value *> VL) const {
	auto &&IsSame = [VL](ArrayRef<Value *> Scalars, ArrayRef<int> Mask) {
	if (Mask.size() != VL.size() && VL.size() == Scalars.size())
	return std::equal(VL.begin(), VL.end(), Scalars.begin());
	return VL.size() == Mask.size() &&
	std::equal(VL.begin(), VL.end(), Mask.begin(),
	[Scalars](Value *V, int Idx) {
	return (isa<UndefValue>(V) &&
	Idx == PoisonMaskElem) \|\|
	(Idx != PoisonMaskElem && V == Scalars[Idx]);
	});
	};
	if (!ReorderIndices.empty()) {
	// TODO: implement matching if the nodes are just reordered, still can
	// treat the vector as the same if the list of scalars matches VL
	// directly, without reordering.
	SmallVector<int> Mask;
	inversePermutation(ReorderIndices, Mask);
	if (VL.size() == Scalars.size())
	return IsSame(Scalars, Mask);
	if (VL.size() == ReuseShuffleIndices.size()) {
	::addMask(Mask, ReuseShuffleIndices);
	return IsSame(Scalars, Mask);
	}
	return false;
	}
	return IsSame(Scalars, ReuseShuffleIndices);
	}

	bool isOperandGatherNode(const EdgeInfo &UserEI) const {
	return isGather() && UserTreeIndices.front().EdgeIdx == UserEI.EdgeIdx &&
	UserTreeIndices.front().UserTE == UserEI.UserTE;
	}

	/// \returns true if current entry has same operands as \p TE.
	bool hasEqualOperands(const TreeEntry &TE) const {
	if (TE.getNumOperands() != getNumOperands())
	return false;
	SmallBitVector Used(getNumOperands());
	for (unsigned I = 0, E = getNumOperands(); I < E; ++I) {
	unsigned PrevCount = Used.count();
	for (unsigned K = 0; K < E; ++K) {
	if (Used.test(K))
	continue;
	if (getOperand(K) == TE.getOperand(I)) {
	Used.set(K);
	break;
	}
	}
	// Check if we actually found the matching operand.
	if (PrevCount == Used.count())
	return false;
	}
	return true;
	}

	/// \return Final vectorization factor for the node. Defined by the total
	/// number of vectorized scalars, including those, used several times in the
	/// entry and counted in the \a ReuseShuffleIndices, if any.
	unsigned getVectorFactor() const {
	if (!ReuseShuffleIndices.empty())
	return ReuseShuffleIndices.size();
	return Scalars.size();
	};

	/// Checks if the current node is a gather node.
	bool isGather() const {return State == NeedToGather; }

	/// A vector of scalars.
	ValueList Scalars;

	/// The Scalars are vectorized into this value. It is initialized to Null.
	WeakTrackingVH VectorizedValue = nullptr;

	/// New vector phi instructions emitted for the vectorized phi nodes.
	PHINode *PHI = nullptr;

	/// Do we need to gather this sequence or vectorize it
	/// (either with vector instruction or with scatter/gather
	/// intrinsics for store/load)?
	enum EntryState {
	Vectorize,
	ScatterVectorize,
	StridedVectorize,
	NeedToGather
	};
	EntryState State;

	/// Does this sequence require some shuffling?
	SmallVector<int, 4> ReuseShuffleIndices;

	/// Does this entry require reordering?
	SmallVector<unsigned, 4> ReorderIndices;

	/// Points back to the VectorizableTree.
	///
	/// Only used for Graphviz right now. Unfortunately GraphTrait::NodeRef has
	/// to be a pointer and needs to be able to initialize the child iterator.
	/// Thus we need a reference back to the container to translate the indices
	/// to entries.
	VecTreeTy &Container;

	/// The TreeEntry index containing the user of this entry. We can actually
	/// have multiple users so the data structure is not truly a tree.
	SmallVector<EdgeInfo, 1> UserTreeIndices;

	/// The index of this treeEntry in VectorizableTree.
	int Idx = -1;

	private:
	/// The operands of each instruction in each lane Operands[op_index][lane].
	/// Note: This helps avoid the replication of the code that performs the
	/// reordering of operands during buildTree_rec() and vectorizeTree().
	SmallVector<ValueList, 2> Operands;

	/// The main/alternate instruction.
	Instruction *MainOp = nullptr;
	Instruction *AltOp = nullptr;

	public:
	/// Set this bundle's \p OpIdx'th operand to \p OpVL.
	void setOperand(unsigned OpIdx, ArrayRef<Value *> OpVL) {
	if (Operands.size() < OpIdx + 1)
	Operands.resize(OpIdx + 1);
	assert(Operands[OpIdx].empty() && "Already resized?");
	assert(OpVL.size() <= Scalars.size() &&
	"Number of operands is greater than the number of scalars.");
	Operands[OpIdx].resize(OpVL.size());
	copy(OpVL, Operands[OpIdx].begin());
	}

	/// Set the operands of this bundle in their original order.
	void setOperandsInOrder() {
	assert(Operands.empty() && "Already initialized?");
	auto *I0 = cast<Instruction>(Scalars[0]);
	Operands.resize(I0->getNumOperands());
	unsigned NumLanes = Scalars.size();
	for (unsigned OpIdx = 0, NumOperands = I0->getNumOperands();
	OpIdx != NumOperands; ++OpIdx) {
	Operands[OpIdx].resize(NumLanes);
	for (unsigned Lane = 0; Lane != NumLanes; ++Lane) {
	auto *I = cast<Instruction>(Scalars[Lane]);
	assert(I->getNumOperands() == NumOperands &&
	"Expected same number of operands");
	Operands[OpIdx][Lane] = I->getOperand(OpIdx);
	}
	}
	}

	/// Reorders operands of the node to the given mask \p Mask.
	void reorderOperands(ArrayRef<int> Mask) {
	for (ValueList &Operand : Operands)
	reorderScalars(Operand, Mask);
	}

	/// \returns the \p OpIdx operand of this TreeEntry.
	ValueList &getOperand(unsigned OpIdx) {
	assert(OpIdx < Operands.size() && "Off bounds");
	return Operands[OpIdx];
	}

	/// \returns the \p OpIdx operand of this TreeEntry.
	ArrayRef<Value *> getOperand(unsigned OpIdx) const {
	assert(OpIdx < Operands.size() && "Off bounds");
	return Operands[OpIdx];
	}

	/// \returns the number of operands.
	unsigned getNumOperands() const { return Operands.size(); }

	/// \return the single \p OpIdx operand.
	Value *getSingleOperand(unsigned OpIdx) const {
	assert(OpIdx < Operands.size() && "Off bounds");
	assert(!Operands[OpIdx].empty() && "No operand available");
	return Operands[OpIdx][0];
	}

	/// Some of the instructions in the list have alternate opcodes.
	bool isAltShuffle() const { return MainOp != AltOp; }

	bool isOpcodeOrAlt(Instruction *I) const {
	unsigned CheckedOpcode = I->getOpcode();
	return (getOpcode() == CheckedOpcode \|\|
	getAltOpcode() == CheckedOpcode);
	}

	/// Chooses the correct key for scheduling data. If \p Op has the same (or
	/// alternate) opcode as \p OpValue, the key is \p Op. Otherwise the key is
	/// \p OpValue.
	Value isOneOf(Value Op) const {
	auto *I = dyn_cast<Instruction>(Op);
	if (I && isOpcodeOrAlt(I))
	return Op;
	return MainOp;
	}

	void setOperations(const InstructionsState &S) {
	MainOp = S.MainOp;
	AltOp = S.AltOp;
	}

	Instruction *getMainOp() const {
	return MainOp;
	}

	Instruction *getAltOp() const {
	return AltOp;
	}

	/// The main/alternate opcodes for the list of instructions.
	unsigned getOpcode() const {
	return MainOp ? MainOp->getOpcode() : 0;
	}

	unsigned getAltOpcode() const {
	return AltOp ? AltOp->getOpcode() : 0;
	}

	/// When ReuseReorderShuffleIndices is empty it just returns position of \p
	/// V within vector of Scalars. Otherwise, try to remap on its reuse index.
	int findLaneForValue(Value *V) const {
	unsigned FoundLane = std::distance(Scalars.begin(), find(Scalars, V));
	assert(FoundLane < Scalars.size() && "Couldn't find extract lane");
	if (!ReorderIndices.empty())
	FoundLane = ReorderIndices[FoundLane];
	assert(FoundLane < Scalars.size() && "Couldn't find extract lane");
	if (!ReuseShuffleIndices.empty()) {
	FoundLane = std::distance(ReuseShuffleIndices.begin(),
	find(ReuseShuffleIndices, FoundLane));
	}
	return FoundLane;
	}

	/// Build a shuffle mask for graph entry which represents a merge of main
	/// and alternate operations.
	void
	buildAltOpShuffleMask(const function_ref<bool(Instruction *)> IsAltOp,
	SmallVectorImpl<int> &Mask,
	SmallVectorImpl<Value > OpScalars = nullptr,
	SmallVectorImpl<Value > AltScalars = nullptr) const;

	/// Return true if this is a non-power-of-2 node.
	bool isNonPowOf2Vec() const {
	bool IsNonPowerOf2 = !isPowerOf2_32(Scalars.size());
	assert((!IsNonPowerOf2 \|\| ReuseShuffleIndices.empty()) &&
	"Reshuffling not supported with non-power-of-2 vectors yet.");
	return IsNonPowerOf2;
	}

	#ifndef NDEBUG
	/// Debug printer.
	LLVM_DUMP_METHOD void dump() const {
	dbgs() << Idx << ".\n";
	for (unsigned OpI = 0, OpE = Operands.size(); OpI != OpE; ++OpI) {
	dbgs() << "Operand " << OpI << ":\n";
	for (const Value *V : Operands[OpI])
	dbgs().indent(2) << *V << "\n";
	}
	dbgs() << "Scalars: \n";
	for (Value *V : Scalars)
	dbgs().indent(2) << *V << "\n";
	dbgs() << "State: ";
	switch (State) {
	case Vectorize:
	dbgs() << "Vectorize\n";
	break;
	case ScatterVectorize:
	dbgs() << "ScatterVectorize\n";
	break;
	case StridedVectorize:
	dbgs() << "StridedVectorize\n";
	break;
	case NeedToGather:
	dbgs() << "NeedToGather\n";
	break;
	}
	dbgs() << "MainOp: ";
	if (MainOp)
	dbgs() << *MainOp << "\n";
	else
	dbgs() << "NULL\n";
	dbgs() << "AltOp: ";
	if (AltOp)
	dbgs() << *AltOp << "\n";
	else
	dbgs() << "NULL\n";
	dbgs() << "VectorizedValue: ";
	if (VectorizedValue)
	dbgs() << *VectorizedValue << "\n";
	else
	dbgs() << "NULL\n";
	dbgs() << "ReuseShuffleIndices: ";
	if (ReuseShuffleIndices.empty())
	dbgs() << "Empty";
	else
	for (int ReuseIdx : ReuseShuffleIndices)
	dbgs() << ReuseIdx << ", ";
	dbgs() << "\n";
	dbgs() << "ReorderIndices: ";
	for (unsigned ReorderIdx : ReorderIndices)
	dbgs() << ReorderIdx << ", ";
	dbgs() << "\n";
	dbgs() << "UserTreeIndices: ";
	for (const auto &EInfo : UserTreeIndices)
	dbgs() << EInfo << ", ";
	dbgs() << "\n";
	}
	#endif
	};

	#ifndef NDEBUG
	void dumpTreeCosts(const TreeEntry *E, InstructionCost ReuseShuffleCost,
	InstructionCost VecCost, InstructionCost ScalarCost,
	StringRef Banner) const {
	dbgs() << "SLP: " << Banner << ":\n";
	E->dump();
	dbgs() << "SLP: Costs:\n";
	dbgs() << "SLP: ReuseShuffleCost = " << ReuseShuffleCost << "\n";
	dbgs() << "SLP: VectorCost = " << VecCost << "\n";
	dbgs() << "SLP: ScalarCost = " << ScalarCost << "\n";
	dbgs() << "SLP: ReuseShuffleCost + VecCost - ScalarCost = "
	<< ReuseShuffleCost + VecCost - ScalarCost << "\n";
	}
	#endif

	/// Create a new VectorizableTree entry.
	TreeEntry newTreeEntry(ArrayRef<Value > VL,
	std::optional<ScheduleData *> Bundle,
	const InstructionsState &S,
	const EdgeInfo &UserTreeIdx,
	ArrayRef<int> ReuseShuffleIndices = std::nullopt,
	ArrayRef<unsigned> ReorderIndices = std::nullopt) {
	TreeEntry::EntryState EntryState =
	Bundle ? TreeEntry::Vectorize : TreeEntry::NeedToGather;
	return newTreeEntry(VL, EntryState, Bundle, S, UserTreeIdx,
	ReuseShuffleIndices, ReorderIndices);
	}

	TreeEntry newTreeEntry(ArrayRef<Value > VL,
	TreeEntry::EntryState EntryState,
	std::optional<ScheduleData *> Bundle,
	const InstructionsState &S,
	const EdgeInfo &UserTreeIdx,
	ArrayRef<int> ReuseShuffleIndices = std::nullopt,
	ArrayRef<unsigned> ReorderIndices = std::nullopt) {
	assert(((!Bundle && EntryState == TreeEntry::NeedToGather) \|\|
	(Bundle && EntryState != TreeEntry::NeedToGather)) &&
	"Need to vectorize gather entry?");
	VectorizableTree.push_back(std::make_unique<TreeEntry>(VectorizableTree));
	TreeEntry *Last = VectorizableTree.back().get();
	Last->Idx = VectorizableTree.size() - 1;
	Last->State = EntryState;
	Last->ReuseShuffleIndices.append(ReuseShuffleIndices.begin(),
	ReuseShuffleIndices.end());
	if (ReorderIndices.empty()) {
	Last->Scalars.assign(VL.begin(), VL.end());
	Last->setOperations(S);
	} else {
	// Reorder scalars and build final mask.
	Last->Scalars.assign(VL.size(), nullptr);
	transform(ReorderIndices, Last->Scalars.begin(),
	[VL](unsigned Idx) -> Value * {
	if (Idx >= VL.size())
	return UndefValue::get(VL.front()->getType());
	return VL[Idx];
	});
	InstructionsState S = getSameOpcode(Last->Scalars, *TLI);
	Last->setOperations(S);
	Last->ReorderIndices.append(ReorderIndices.begin(), ReorderIndices.end());
	}
	if (!Last->isGather()) {
	for (Value *V : VL) {
	const TreeEntry *TE = getTreeEntry(V);
	assert((!TE \|\| TE == Last \|\| doesNotNeedToBeScheduled(V)) &&
	"Scalar already in tree!");
	if (TE) {
	if (TE != Last)
	MultiNodeScalars.try_emplace(V).first->getSecond().push_back(Last);
	continue;
	}
	ScalarToTreeEntry[V] = Last;
	}
	// Update the scheduler bundle to point to this TreeEntry.
	ScheduleData BundleMember = Bundle;
	assert((BundleMember \|\| isa<PHINode>(S.MainOp) \|\|
	isVectorLikeInstWithConstOps(S.MainOp) \|\|
	doesNotNeedToSchedule(VL)) &&
	"Bundle and VL out of sync");
	if (BundleMember) {
	for (Value *V : VL) {
	if (doesNotNeedToBeScheduled(V))
	continue;
	if (!BundleMember)
	continue;
	BundleMember->TE = Last;
	BundleMember = BundleMember->NextInBundle;
	}
	}
	assert(!BundleMember && "Bundle and VL out of sync");
	} else {
	// Build a map for gathered scalars to the nodes where they are used.
	bool AllConstsOrCasts = true;
	for (Value *V : VL)
	if (!isConstant(V)) {
	auto *I = dyn_cast<CastInst>(V);
	AllConstsOrCasts &= I && I->getType()->isIntegerTy();
	ValueToGatherNodes.try_emplace(V).first->getSecond().insert(Last);
	}
	if (AllConstsOrCasts)
	CastMaxMinBWSizes =
	std::make_pair(std::numeric_limits<unsigned>::max(), 1);
	MustGather.insert(VL.begin(), VL.end());
	}

	if (UserTreeIdx.UserTE) {
	Last->UserTreeIndices.push_back(UserTreeIdx);
	assert((!Last->isNonPowOf2Vec() \|\| Last->ReorderIndices.empty()) &&
	"Reordering isn't implemented for non-power-of-2 nodes yet");
	}
	return Last;
	}

	/// -- Vectorization State --
	/// Holds all of the tree entries.
	TreeEntry::VecTreeTy VectorizableTree;

	#ifndef NDEBUG
	/// Debug printer.
	LLVM_DUMP_METHOD void dumpVectorizableTree() const {
	for (unsigned Id = 0, IdE = VectorizableTree.size(); Id != IdE; ++Id) {
	VectorizableTree[Id]->dump();
	dbgs() << "\n";
	}
	}
	#endif

	TreeEntry getTreeEntry(Value V) { return ScalarToTreeEntry.lookup(V); }

	const TreeEntry getTreeEntry(Value V) const {
	return ScalarToTreeEntry.lookup(V);
	}

	/// Check that the operand node of alternate node does not generate
	/// buildvector sequence. If it is, then probably not worth it to build
	/// alternate shuffle, if number of buildvector operands + alternate
	/// instruction > than the number of buildvector instructions.
	/// \param S the instructions state of the analyzed values.
	/// \param VL list of the instructions with alternate opcodes.
	bool areAltOperandsProfitable(const InstructionsState &S,
	ArrayRef<Value *> VL) const;

	/// Checks if the specified list of the instructions/values can be vectorized
	/// and fills required data before actual scheduling of the instructions.
	TreeEntry::EntryState getScalarsVectorizationState(
	InstructionsState &S, ArrayRef<Value *> VL, bool IsScatterVectorizeUserTE,
	OrdersType &CurrentOrder, SmallVectorImpl<Value *> &PointerOps) const;

	/// Maps a specific scalar to its tree entry.
	SmallDenseMap<Value , TreeEntry > ScalarToTreeEntry;

	/// List of scalars, used in several vectorize nodes, and the list of the
	/// nodes.
	SmallDenseMap<Value , SmallVector<TreeEntry >> MultiNodeScalars;

	/// Maps a value to the proposed vectorizable size.
	SmallDenseMap<Value *, unsigned> InstrElementSize;

	/// A list of scalars that we found that we need to keep as scalars.
	ValueSet MustGather;

	/// A set of first non-schedulable values.
	ValueSet NonScheduledFirst;

	/// A map between the vectorized entries and the last instructions in the
	/// bundles. The bundles are built in use order, not in the def order of the
	/// instructions. So, we cannot rely directly on the last instruction in the
	/// bundle being the last instruction in the program order during
	/// vectorization process since the basic blocks are affected, need to
	/// pre-gather them before.
	DenseMap<const TreeEntry , Instruction > EntryToLastInstruction;

	/// List of gather nodes, depending on other gather/vector nodes, which should
	/// be emitted after the vector instruction emission process to correctly
	/// handle order of the vector instructions and shuffles.
	SetVector<const TreeEntry *> PostponedGathers;

	using ValueToGatherNodesMap =
	DenseMap<Value , SmallPtrSet<const TreeEntry , 4>>;
	ValueToGatherNodesMap ValueToGatherNodes;

	/// This POD struct describes one external user in the vectorized tree.
	struct ExternalUser {
	ExternalUser(Value S, llvm::User U, int L)
	: Scalar(S), User(U), Lane(L) {}

	// Which scalar in our function.
	Value *Scalar;

	// Which user that uses the scalar.
	llvm::User *User;

	// Which lane does the scalar belong to.
	int Lane;
	};
	using UserList = SmallVector<ExternalUser, 16>;

	/// Checks if two instructions may access the same memory.
	///
	/// \p Loc1 is the location of \p Inst1. It is passed explicitly because it
	/// is invariant in the calling loop.
	bool isAliased(const MemoryLocation &Loc1, Instruction *Inst1,
	Instruction *Inst2) {
	if (!Loc1.Ptr \|\| !isSimple(Inst1) \|\| !isSimple(Inst2))
	return true;
	// First check if the result is already in the cache.
	AliasCacheKey Key = std::make_pair(Inst1, Inst2);
	auto It = AliasCache.find(Key);
	if (It != AliasCache.end())
	return It->second;
	bool Aliased = isModOrRefSet(BatchAA.getModRefInfo(Inst2, Loc1));
	// Store the result in the cache.
	AliasCache.try_emplace(Key, Aliased);
	AliasCache.try_emplace(std::make_pair(Inst2, Inst1), Aliased);
	return Aliased;
	}

	using AliasCacheKey = std::pair<Instruction , Instruction >;

	/// Cache for alias results.
	/// TODO: consider moving this to the AliasAnalysis itself.
	DenseMap<AliasCacheKey, bool> AliasCache;

	// Cache for pointerMayBeCaptured calls inside AA. This is preserved
	// globally through SLP because we don't perform any action which
	// invalidates capture results.
	BatchAAResults BatchAA;

	/// Temporary store for deleted instructions. Instructions will be deleted
	/// eventually when the BoUpSLP is destructed. The deferral is required to
	/// ensure that there are no incorrect collisions in the AliasCache, which
	/// can happen if a new instruction is allocated at the same address as a
	/// previously deleted instruction.
	DenseSet<Instruction *> DeletedInstructions;

	/// Set of the instruction, being analyzed already for reductions.
	SmallPtrSet<Instruction *, 16> AnalyzedReductionsRoots;

	/// Set of hashes for the list of reduction values already being analyzed.
	DenseSet<size_t> AnalyzedReductionVals;

	/// Values, already been analyzed for mininmal bitwidth and found to be
	/// non-profitable.
	DenseSet<Value *> AnalyzedMinBWVals;

	/// A list of values that need to extracted out of the tree.
	/// This list holds pairs of (Internal Scalar : External User). External User
	/// can be nullptr, it means that this Internal Scalar will be used later,
	/// after vectorization.
	UserList ExternalUses;

	/// A list of GEPs which can be reaplced by scalar GEPs instead of
	/// extractelement instructions.
	SmallPtrSet<Value *, 4> ExternalUsesAsGEPs;

	/// Values used only by @llvm.assume calls.
	SmallPtrSet<const Value *, 32> EphValues;

	/// Holds all of the instructions that we gathered, shuffle instructions and
	/// extractelements.
	SetVector<Instruction *> GatherShuffleExtractSeq;

	/// A list of blocks that we are going to CSE.
	DenseSet<BasicBlock *> CSEBlocks;

	/// Contains all scheduling relevant data for an instruction.
	/// A ScheduleData either represents a single instruction or a member of an
	/// instruction bundle (= a group of instructions which is combined into a
	/// vector instruction).
	struct ScheduleData {
	// The initial value for the dependency counters. It means that the
	// dependencies are not calculated yet.
	enum { InvalidDeps = -1 };

	ScheduleData() = default;

	void init(int BlockSchedulingRegionID, Value *OpVal) {
	FirstInBundle = this;
	NextInBundle = nullptr;
	NextLoadStore = nullptr;
	IsScheduled = false;
	SchedulingRegionID = BlockSchedulingRegionID;
	clearDependencies();
	OpValue = OpVal;
	TE = nullptr;
	}

	/// Verify basic self consistency properties
	void verify() {
	if (hasValidDependencies()) {
	assert(UnscheduledDeps <= Dependencies && "invariant");
	} else {
	assert(UnscheduledDeps == Dependencies && "invariant");
	}

	if (IsScheduled) {
	assert(isSchedulingEntity() &&
	"unexpected scheduled state");
	for (const ScheduleData *BundleMember = this; BundleMember;
	BundleMember = BundleMember->NextInBundle) {
	assert(BundleMember->hasValidDependencies() &&
	BundleMember->UnscheduledDeps == 0 &&
	"unexpected scheduled state");
	assert((BundleMember == this \|\| !BundleMember->IsScheduled) &&
	"only bundle is marked scheduled");
	}
	}

	assert(Inst->getParent() == FirstInBundle->Inst->getParent() &&
	"all bundle members must be in same basic block");
	}

	/// Returns true if the dependency information has been calculated.
	/// Note that depenendency validity can vary between instructions within
	/// a single bundle.
	bool hasValidDependencies() const { return Dependencies != InvalidDeps; }

	/// Returns true for single instructions and for bundle representatives
	/// (= the head of a bundle).
	bool isSchedulingEntity() const { return FirstInBundle == this; }

	/// Returns true if it represents an instruction bundle and not only a
	/// single instruction.
	bool isPartOfBundle() const {
	return NextInBundle != nullptr \|\| FirstInBundle != this \|\| TE;
	}

	/// Returns true if it is ready for scheduling, i.e. it has no more
	/// unscheduled depending instructions/bundles.
	bool isReady() const {
	assert(isSchedulingEntity() &&
	"can't consider non-scheduling entity for ready list");
	return unscheduledDepsInBundle() == 0 && !IsScheduled;
	}

	/// Modifies the number of unscheduled dependencies for this instruction,
	/// and returns the number of remaining dependencies for the containing
	/// bundle.
	int incrementUnscheduledDeps(int Incr) {
	assert(hasValidDependencies() &&
	"increment of unscheduled deps would be meaningless");
	UnscheduledDeps += Incr;
	return FirstInBundle->unscheduledDepsInBundle();
	}

	/// Sets the number of unscheduled dependencies to the number of
	/// dependencies.
	void resetUnscheduledDeps() {
	UnscheduledDeps = Dependencies;
	}

	/// Clears all dependency information.
	void clearDependencies() {
	Dependencies = InvalidDeps;
	resetUnscheduledDeps();
	MemoryDependencies.clear();
	ControlDependencies.clear();
	}

	int unscheduledDepsInBundle() const {
	assert(isSchedulingEntity() && "only meaningful on the bundle");
	int Sum = 0;
	for (const ScheduleData *BundleMember = this; BundleMember;
	BundleMember = BundleMember->NextInBundle) {
	if (BundleMember->UnscheduledDeps == InvalidDeps)
	return InvalidDeps;
	Sum += BundleMember->UnscheduledDeps;
	}
	return Sum;
	}

	void dump(raw_ostream &os) const {
	if (!isSchedulingEntity()) {
	os << "/ " << *Inst;
	} else if (NextInBundle) {
	os << '[' << *Inst;
	ScheduleData *SD = NextInBundle;
	while (SD) {
	os << ';' << *SD->Inst;
	SD = SD->NextInBundle;
	}
	os << ']';
	} else {
	os << *Inst;
	}
	}

	Instruction *Inst = nullptr;

	/// Opcode of the current instruction in the schedule data.
	Value *OpValue = nullptr;

	/// The TreeEntry that this instruction corresponds to.
	TreeEntry *TE = nullptr;

	/// Points to the head in an instruction bundle (and always to this for
	/// single instructions).
	ScheduleData *FirstInBundle = nullptr;

	/// Single linked list of all instructions in a bundle. Null if it is a
	/// single instruction.
	ScheduleData *NextInBundle = nullptr;

	/// Single linked list of all memory instructions (e.g. load, store, call)
	/// in the block - until the end of the scheduling region.
	ScheduleData *NextLoadStore = nullptr;

	/// The dependent memory instructions.
	/// This list is derived on demand in calculateDependencies().
	SmallVector<ScheduleData *, 4> MemoryDependencies;

	/// List of instructions which this instruction could be control dependent
	/// on. Allowing such nodes to be scheduled below this one could introduce
	/// a runtime fault which didn't exist in the original program.
	/// ex: this is a load or udiv following a readonly call which inf loops
	SmallVector<ScheduleData *, 4> ControlDependencies;

	/// This ScheduleData is in the current scheduling region if this matches
	/// the current SchedulingRegionID of BlockScheduling.
	int SchedulingRegionID = 0;

	/// Used for getting a "good" final ordering of instructions.
	int SchedulingPriority = 0;

	/// The number of dependencies. Constitutes of the number of users of the
	/// instruction plus the number of dependent memory instructions (if any).
	/// This value is calculated on demand.
	/// If InvalidDeps, the number of dependencies is not calculated yet.
	int Dependencies = InvalidDeps;

	/// The number of dependencies minus the number of dependencies of scheduled
	/// instructions. As soon as this is zero, the instruction/bundle gets ready
	/// for scheduling.
	/// Note that this is negative as long as Dependencies is not calculated.
	int UnscheduledDeps = InvalidDeps;

	/// True if this instruction is scheduled (or considered as scheduled in the
	/// dry-run).
	bool IsScheduled = false;
	};

	#ifndef NDEBUG
	friend inline raw_ostream &operator<<(raw_ostream &os,
	const BoUpSLP::ScheduleData &SD) {
	SD.dump(os);
	return os;
	}
	#endif

	friend struct GraphTraits<BoUpSLP *>;
	friend struct DOTGraphTraits<BoUpSLP *>;

	/// Contains all scheduling data for a basic block.
	/// It does not schedules instructions, which are not memory read/write
	/// instructions and their operands are either constants, or arguments, or
	/// phis, or instructions from others blocks, or their users are phis or from
	/// the other blocks. The resulting vector instructions can be placed at the
	/// beginning of the basic block without scheduling (if operands does not need
	/// to be scheduled) or at the end of the block (if users are outside of the
	/// block). It allows to save some compile time and memory used by the
	/// compiler.
	/// ScheduleData is assigned for each instruction in between the boundaries of
	/// the tree entry, even for those, which are not part of the graph. It is
	/// required to correctly follow the dependencies between the instructions and
	/// their correct scheduling. The ScheduleData is not allocated for the
	/// instructions, which do not require scheduling, like phis, nodes with
	/// extractelements/insertelements only or nodes with instructions, with
	/// uses/operands outside of the block.
	struct BlockScheduling {
	BlockScheduling(BasicBlock *BB)
	: BB(BB), ChunkSize(BB->size()), ChunkPos(ChunkSize) {}

	void clear() {
	ReadyInsts.clear();
	ScheduleStart = nullptr;
	ScheduleEnd = nullptr;
	FirstLoadStoreInRegion = nullptr;
	LastLoadStoreInRegion = nullptr;
	RegionHasStackSave = false;

	// Reduce the maximum schedule region size by the size of the
	// previous scheduling run.
	ScheduleRegionSizeLimit -= ScheduleRegionSize;
	if (ScheduleRegionSizeLimit < MinScheduleRegionSize)
	ScheduleRegionSizeLimit = MinScheduleRegionSize;
	ScheduleRegionSize = 0;

	// Make a new scheduling region, i.e. all existing ScheduleData is not
	// in the new region yet.
	++SchedulingRegionID;
	}

	ScheduleData getScheduleData(Instruction I) {
	if (BB != I->getParent())
	// Avoid lookup if can't possibly be in map.
	return nullptr;
	ScheduleData *SD = ScheduleDataMap.lookup(I);
	if (SD && isInSchedulingRegion(SD))
	return SD;
	return nullptr;
	}

	ScheduleData getScheduleData(Value V) {
	if (auto *I = dyn_cast<Instruction>(V))
	return getScheduleData(I);
	return nullptr;
	}

	ScheduleData getScheduleData(Value V, Value *Key) {
	if (V == Key)
	return getScheduleData(V);
	auto I = ExtraScheduleDataMap.find(V);
	if (I != ExtraScheduleDataMap.end()) {
	ScheduleData *SD = I->second.lookup(Key);
	if (SD && isInSchedulingRegion(SD))
	return SD;
	}
	return nullptr;
	}

	bool isInSchedulingRegion(ScheduleData *SD) const {
	return SD->SchedulingRegionID == SchedulingRegionID;
	}

	/// Marks an instruction as scheduled and puts all dependent ready
	/// instructions into the ready-list.
	template <typename ReadyListType>
	void schedule(ScheduleData *SD, ReadyListType &ReadyList) {
	SD->IsScheduled = true;
	LLVM_DEBUG(dbgs() << "SLP: schedule " << *SD << "\n");

	for (ScheduleData *BundleMember = SD; BundleMember;
	BundleMember = BundleMember->NextInBundle) {
	if (BundleMember->Inst != BundleMember->OpValue)
	continue;

	// Handle the def-use chain dependencies.

	// Decrement the unscheduled counter and insert to ready list if ready.
	auto &&DecrUnsched = [this, &ReadyList](Instruction *I) {
	doForAllOpcodes(I, [&ReadyList](ScheduleData *OpDef) {
	if (OpDef && OpDef->hasValidDependencies() &&
	OpDef->incrementUnscheduledDeps(-1) == 0) {
	// There are no more unscheduled dependencies after
	// decrementing, so we can put the dependent instruction
	// into the ready list.
	ScheduleData *DepBundle = OpDef->FirstInBundle;
	assert(!DepBundle->IsScheduled &&
	"already scheduled bundle gets ready");
	ReadyList.insert(DepBundle);
	LLVM_DEBUG(dbgs()
	<< "SLP: gets ready (def): " << *DepBundle << "\n");
	}
	});
	};

	// If BundleMember is a vector bundle, its operands may have been
	// reordered during buildTree(). We therefore need to get its operands
	// through the TreeEntry.
	if (TreeEntry *TE = BundleMember->TE) {
	// Need to search for the lane since the tree entry can be reordered.
	int Lane = std::distance(TE->Scalars.begin(),
	find(TE->Scalars, BundleMember->Inst));
	assert(Lane >= 0 && "Lane not set");

	// Since vectorization tree is being built recursively this assertion
	// ensures that the tree entry has all operands set before reaching
	// this code. Couple of exceptions known at the moment are extracts
	// where their second (immediate) operand is not added. Since
	// immediates do not affect scheduler behavior this is considered
	// okay.
	auto *In = BundleMember->Inst;
	assert(
	In &&
	(isa<ExtractValueInst, ExtractElementInst, IntrinsicInst>(In) \|\|
	In->getNumOperands() == TE->getNumOperands()) &&
	"Missed TreeEntry operands?");
	(void)In; // fake use to avoid build failure when assertions disabled

	for (unsigned OpIdx = 0, NumOperands = TE->getNumOperands();
	OpIdx != NumOperands; ++OpIdx)
	if (auto *I = dyn_cast<Instruction>(TE->getOperand(OpIdx)[Lane]))
	DecrUnsched(I);
	} else {
	// If BundleMember is a stand-alone instruction, no operand reordering
	// has taken place, so we directly access its operands.
	for (Use &U : BundleMember->Inst->operands())
	if (auto *I = dyn_cast<Instruction>(U.get()))
	DecrUnsched(I);
	}
	// Handle the memory dependencies.
	for (ScheduleData *MemoryDepSD : BundleMember->MemoryDependencies) {
	if (MemoryDepSD->hasValidDependencies() &&
	MemoryDepSD->incrementUnscheduledDeps(-1) == 0) {
	// There are no more unscheduled dependencies after decrementing,
	// so we can put the dependent instruction into the ready list.
	ScheduleData *DepBundle = MemoryDepSD->FirstInBundle;
	assert(!DepBundle->IsScheduled &&
	"already scheduled bundle gets ready");
	ReadyList.insert(DepBundle);
	LLVM_DEBUG(dbgs()
	<< "SLP: gets ready (mem): " << *DepBundle << "\n");
	}
	}
	// Handle the control dependencies.
	for (ScheduleData *DepSD : BundleMember->ControlDependencies) {
	if (DepSD->incrementUnscheduledDeps(-1) == 0) {
	// There are no more unscheduled dependencies after decrementing,
	// so we can put the dependent instruction into the ready list.
	ScheduleData *DepBundle = DepSD->FirstInBundle;
	assert(!DepBundle->IsScheduled &&
	"already scheduled bundle gets ready");
	ReadyList.insert(DepBundle);
	LLVM_DEBUG(dbgs()
	<< "SLP: gets ready (ctl): " << *DepBundle << "\n");
	}
	}
	}
	}

	/// Verify basic self consistency properties of the data structure.
	void verify() {
	if (!ScheduleStart)
	return;

	assert(ScheduleStart->getParent() == ScheduleEnd->getParent() &&
	ScheduleStart->comesBefore(ScheduleEnd) &&
	"Not a valid scheduling region?");

	for (auto *I = ScheduleStart; I != ScheduleEnd; I = I->getNextNode()) {
	auto *SD = getScheduleData(I);
	if (!SD)
	continue;
	assert(isInSchedulingRegion(SD) &&
	"primary schedule data not in window?");
	assert(isInSchedulingRegion(SD->FirstInBundle) &&
	"entire bundle in window!");
	(void)SD;
	doForAllOpcodes(I, [](ScheduleData *SD) { SD->verify(); });
	}

	for (auto *SD : ReadyInsts) {
	assert(SD->isSchedulingEntity() && SD->isReady() &&
	"item in ready list not ready?");
	(void)SD;
	}
	}

	void doForAllOpcodes(Value *V,
	function_ref<void(ScheduleData *SD)> Action) {
	if (ScheduleData *SD = getScheduleData(V))
	Action(SD);
	auto I = ExtraScheduleDataMap.find(V);
	if (I != ExtraScheduleDataMap.end())
	for (auto &P : I->second)
	if (isInSchedulingRegion(P.second))
	Action(P.second);
	}

	/// Put all instructions into the ReadyList which are ready for scheduling.
	template <typename ReadyListType>
	void initialFillReadyList(ReadyListType &ReadyList) {
	for (auto *I = ScheduleStart; I != ScheduleEnd; I = I->getNextNode()) {
	doForAllOpcodes(I, [&](ScheduleData *SD) {
	if (SD->isSchedulingEntity() && SD->hasValidDependencies() &&
	SD->isReady()) {
	ReadyList.insert(SD);
	LLVM_DEBUG(dbgs()
	<< "SLP: initially in ready list: " << *SD << "\n");
	}
	});
	}
	}

	/// Build a bundle from the ScheduleData nodes corresponding to the
	/// scalar instruction for each lane.
	ScheduleData buildBundle(ArrayRef<Value > VL);

	/// Checks if a bundle of instructions can be scheduled, i.e. has no
	/// cyclic dependencies. This is only a dry-run, no instructions are
	/// actually moved at this stage.
	/// \returns the scheduling bundle. The returned Optional value is not
	/// std::nullopt if \p VL is allowed to be scheduled.
	std::optional<ScheduleData *>
	tryScheduleBundle(ArrayRef<Value > VL, BoUpSLP SLP,
	const InstructionsState &S);

	/// Un-bundles a group of instructions.
	void cancelScheduling(ArrayRef<Value > VL, Value OpValue);

	/// Allocates schedule data chunk.
	ScheduleData *allocateScheduleDataChunks();

	/// Extends the scheduling region so that V is inside the region.
	/// \returns true if the region size is within the limit.
	bool extendSchedulingRegion(Value *V, const InstructionsState &S);

	/// Initialize the ScheduleData structures for new instructions in the
	/// scheduling region.
	void initScheduleData(Instruction FromI, Instruction ToI,
	ScheduleData *PrevLoadStore,
	ScheduleData *NextLoadStore);

	/// Updates the dependency information of a bundle and of all instructions/
	/// bundles which depend on the original bundle.
	void calculateDependencies(ScheduleData *SD, bool InsertInReadyList,
	BoUpSLP *SLP);

	/// Sets all instruction in the scheduling region to un-scheduled.
	void resetSchedule();

	BasicBlock *BB;

	/// Simple memory allocation for ScheduleData.
	SmallVector<std::unique_ptr<ScheduleData[]>> ScheduleDataChunks;

	/// The size of a ScheduleData array in ScheduleDataChunks.
	int ChunkSize;

	/// The allocator position in the current chunk, which is the last entry
	/// of ScheduleDataChunks.
	int ChunkPos;

	/// Attaches ScheduleData to Instruction.
	/// Note that the mapping survives during all vectorization iterations, i.e.
	/// ScheduleData structures are recycled.
	DenseMap<Instruction , ScheduleData > ScheduleDataMap;

	/// Attaches ScheduleData to Instruction with the leading key.
	DenseMap<Value , SmallDenseMap<Value , ScheduleData *>>
	ExtraScheduleDataMap;

	/// The ready-list for scheduling (only used for the dry-run).
	SetVector<ScheduleData *> ReadyInsts;

	/// The first instruction of the scheduling region.
	Instruction *ScheduleStart = nullptr;

	/// The first instruction _after_ the scheduling region.
	Instruction *ScheduleEnd = nullptr;

	/// The first memory accessing instruction in the scheduling region
	/// (can be null).
	ScheduleData *FirstLoadStoreInRegion = nullptr;

	/// The last memory accessing instruction in the scheduling region
	/// (can be null).
	ScheduleData *LastLoadStoreInRegion = nullptr;

	/// Is there an llvm.stacksave or llvm.stackrestore in the scheduling
	/// region? Used to optimize the dependence calculation for the
	/// common case where there isn't.
	bool RegionHasStackSave = false;

	/// The current size of the scheduling region.
	int ScheduleRegionSize = 0;

	/// The maximum size allowed for the scheduling region.
	int ScheduleRegionSizeLimit = ScheduleRegionSizeBudget;

	/// The ID of the scheduling region. For a new vectorization iteration this
	/// is incremented which "removes" all ScheduleData from the region.
	/// Make sure that the initial SchedulingRegionID is greater than the
	/// initial SchedulingRegionID in ScheduleData (which is 0).
	int SchedulingRegionID = 1;
	};

	/// Attaches the BlockScheduling structures to basic blocks.
	MapVector<BasicBlock *, std::unique_ptr<BlockScheduling>> BlocksSchedules;

	/// Performs the "real" scheduling. Done before vectorization is actually
	/// performed in a basic block.
	void scheduleBlock(BlockScheduling *BS);

	/// List of users to ignore during scheduling and that don't need extracting.
	const SmallDenseSet<Value > UserIgnoreList = nullptr;

	/// A DenseMapInfo implementation for holding DenseMaps and DenseSets of
	/// sorted SmallVectors of unsigned.
	struct OrdersTypeDenseMapInfo {
	static OrdersType getEmptyKey() {
	OrdersType V;
	V.push_back(~1U);
	return V;
	}

	static OrdersType getTombstoneKey() {
	OrdersType V;
	V.push_back(~2U);
	return V;
	}

	static unsigned getHashValue(const OrdersType &V) {
	return static_cast<unsigned>(hash_combine_range(V.begin(), V.end()));
	}

	static bool isEqual(const OrdersType &LHS, const OrdersType &RHS) {
	return LHS == RHS;
	}
	};

	// Analysis and block reference.
	Function *F;
	ScalarEvolution *SE;
	TargetTransformInfo *TTI;
	TargetLibraryInfo *TLI;
	LoopInfo *LI;
	DominatorTree *DT;
	AssumptionCache *AC;
	DemandedBits *DB;
	const DataLayout *DL;
	OptimizationRemarkEmitter *ORE;

	unsigned MaxVecRegSize; // This is set by TTI or overridden by cl::opt.
	unsigned MinVecRegSize; // Set by cl::opt (default: 128).

	/// Instruction builder to construct the vectorized tree.
	IRBuilder<TargetFolder> Builder;

	/// A map of scalar integer values to the smallest bit width with which they
	/// can legally be represented. The values map to (width, signed) pairs,
	/// where "width" indicates the minimum bit width and "signed" is True if the
	/// value must be signed-extended, rather than zero-extended, back to its
	/// original width.
	DenseMap<const TreeEntry *, std::pair<uint64_t, bool>> MinBWs;

	/// Final size of the reduced vector, if the current graph represents the
	/// input for the reduction and it was possible to narrow the size of the
	/// reduction.
	unsigned ReductionBitWidth = 0;

	/// If the tree contains any zext/sext/trunc nodes, contains max-min pair of
	/// type sizes, used in the tree.
	std::optional<std::pair<unsigned, unsigned>> CastMaxMinBWSizes;

	/// Indices of the vectorized nodes, which supposed to be the roots of the new
	/// bitwidth analysis attempt, like trunc, IToFP or ICmp.
	DenseSet<unsigned> ExtraBitWidthNodes;
	};

	} // end namespace slpvectorizer

	template <> struct GraphTraits<BoUpSLP *> {
	using TreeEntry = BoUpSLP::TreeEntry;

	/// NodeRef has to be a pointer per the GraphWriter.
	using NodeRef = TreeEntry *;

	using ContainerTy = BoUpSLP::TreeEntry::VecTreeTy;

	/// Add the VectorizableTree to the index iterator to be able to return
	/// TreeEntry pointers.
	struct ChildIteratorType
	: public iterator_adaptor_base<
	ChildIteratorType, SmallVector<BoUpSLP::EdgeInfo, 1>::iterator> {
	ContainerTy &VectorizableTree;

	ChildIteratorType(SmallVector<BoUpSLP::EdgeInfo, 1>::iterator W,
	ContainerTy &VT)
	: ChildIteratorType::iterator_adaptor_base(W), VectorizableTree(VT) {}

	NodeRef operator*() { return I->UserTE; }
	};

	static NodeRef getEntryNode(BoUpSLP &R) {
	return R.VectorizableTree[0].get();
	}

	static ChildIteratorType child_begin(NodeRef N) {
	return {N->UserTreeIndices.begin(), N->Container};
	}

	static ChildIteratorType child_end(NodeRef N) {
	return {N->UserTreeIndices.end(), N->Container};
	}

	/// For the node iterator we just need to turn the TreeEntry iterator into a
	/// TreeEntry* iterator so that it dereferences to NodeRef.
	class nodes_iterator {
	using ItTy = ContainerTy::iterator;
	ItTy It;

	public:
	nodes_iterator(const ItTy &It2) : It(It2) {}
	NodeRef operator*() { return It->get(); }
	nodes_iterator operator++() {
	++It;
	return *this;
	}
	bool operator!=(const nodes_iterator &N2) const { return N2.It != It; }
	};

	static nodes_iterator nodes_begin(BoUpSLP *R) {
	return nodes_iterator(R->VectorizableTree.begin());
	}

	static nodes_iterator nodes_end(BoUpSLP *R) {
	return nodes_iterator(R->VectorizableTree.end());
	}

	static unsigned size(BoUpSLP *R) { return R->VectorizableTree.size(); }
	};

	template <> struct DOTGraphTraits<BoUpSLP *> : public DefaultDOTGraphTraits {
	using TreeEntry = BoUpSLP::TreeEntry;

	DOTGraphTraits(bool IsSimple = false) : DefaultDOTGraphTraits(IsSimple) {}

	std::string getNodeLabel(const TreeEntry Entry, const BoUpSLP R) {
	std::string Str;
	raw_string_ostream OS(Str);
	OS << Entry->Idx << ".\n";
	if (isSplat(Entry->Scalars))
	OS << "<splat> ";
	for (auto *V : Entry->Scalars) {
	OS << *V;
	if (llvm::any_of(R->ExternalUses, [&](const BoUpSLP::ExternalUser &EU) {
	return EU.Scalar == V;
	}))
	OS << " <extract>";
	OS << "\n";
	}
	return Str;
	}

	static std::string getNodeAttributes(const TreeEntry *Entry,
	const BoUpSLP *) {
	if (Entry->isGather())
	return "color=red";
	if (Entry->State == TreeEntry::ScatterVectorize \|\|
	Entry->State == TreeEntry::StridedVectorize)
	return "color=blue";
	return "";
	}
	};

	} // end namespace llvm

	BoUpSLP::~BoUpSLP() {
	SmallVector<WeakTrackingVH> DeadInsts;
	for (auto *I : DeletedInstructions) {
	if (!I->getParent()) {
	// Temporarily insert instruction back to erase them from parent and
	// memory later.
	if (isa<PHINode>(I))
	// Phi nodes must be the very first instructions in the block.
	I->insertBefore(F->getEntryBlock(),
	F->getEntryBlock().getFirstNonPHIIt());
	else
	I->insertBefore(F->getEntryBlock().getTerminator());
	continue;
	}
	for (Use &U : I->operands()) {
	auto *Op = dyn_cast<Instruction>(U.get());
	if (Op && !DeletedInstructions.count(Op) && Op->hasOneUser() &&
	wouldInstructionBeTriviallyDead(Op, TLI))
	DeadInsts.emplace_back(Op);
	}
	I->dropAllReferences();
	}
	for (auto *I : DeletedInstructions) {
	assert(I->use_empty() &&
	"trying to erase instruction with users.");
	I->eraseFromParent();
	}

	// Cleanup any dead scalar code feeding the vectorized instructions
	RecursivelyDeleteTriviallyDeadInstructions(DeadInsts, TLI);

	#ifdef EXPENSIVE_CHECKS
	// If we could guarantee that this call is not extremely slow, we could
	// remove the ifdef limitation (see PR47712).
	assert(!verifyFunction(*F, &dbgs()));
	#endif
	}

	/// Reorders the given \p Reuses mask according to the given \p Mask. \p Reuses
	/// contains original mask for the scalars reused in the node. Procedure
	/// transform this mask in accordance with the given \p Mask.
	static void reorderReuses(SmallVectorImpl<int> &Reuses, ArrayRef<int> Mask) {
	assert(!Mask.empty() && Reuses.size() == Mask.size() &&
	"Expected non-empty mask.");
	SmallVector<int> Prev(Reuses.begin(), Reuses.end());
	Prev.swap(Reuses);
	for (unsigned I = 0, E = Prev.size(); I < E; ++I)
	if (Mask[I] != PoisonMaskElem)
	Reuses[Mask[I]] = Prev[I];
	}

	/// Reorders the given \p Order according to the given \p Mask. \p Order - is
	/// the original order of the scalars. Procedure transforms the provided order
	/// in accordance with the given \p Mask. If the resulting \p Order is just an
	/// identity order, \p Order is cleared.
	static void reorderOrder(SmallVectorImpl<unsigned> &Order, ArrayRef<int> Mask,
	bool BottomOrder = false) {
	assert(!Mask.empty() && "Expected non-empty mask.");
	unsigned Sz = Mask.size();
	if (BottomOrder) {
	SmallVector<unsigned> PrevOrder;
	if (Order.empty()) {
	PrevOrder.resize(Sz);
	std::iota(PrevOrder.begin(), PrevOrder.end(), 0);
	} else {
	PrevOrder.swap(Order);
	}
	Order.assign(Sz, Sz);
	for (unsigned I = 0; I < Sz; ++I)
	if (Mask[I] != PoisonMaskElem)
	Order[I] = PrevOrder[Mask[I]];
	if (all_of(enumerate(Order), [&](const auto &Data) {
	return Data.value() == Sz \|\| Data.index() == Data.value();
	})) {
	Order.clear();
	return;
	}
	fixupOrderingIndices(Order);
	return;
	}
	SmallVector<int> MaskOrder;
	if (Order.empty()) {
	MaskOrder.resize(Sz);
	std::iota(MaskOrder.begin(), MaskOrder.end(), 0);
	} else {
	inversePermutation(Order, MaskOrder);
	}
	reorderReuses(MaskOrder, Mask);
	if (ShuffleVectorInst::isIdentityMask(MaskOrder, Sz)) {
	Order.clear();
	return;
	}
	Order.assign(Sz, Sz);
	for (unsigned I = 0; I < Sz; ++I)
	if (MaskOrder[I] != PoisonMaskElem)
	Order[MaskOrder[I]] = I;
	fixupOrderingIndices(Order);
	}

	std::optional<BoUpSLP::OrdersType>
	BoUpSLP::findReusedOrderedScalars(const BoUpSLP::TreeEntry &TE) {
	assert(TE.isGather() && "Expected gather node only.");
	// Try to find subvector extract/insert patterns and reorder only such
	// patterns.
	SmallVector<Value *> GatheredScalars(TE.Scalars.begin(), TE.Scalars.end());
	Type *ScalarTy = GatheredScalars.front()->getType();
	int NumScalars = GatheredScalars.size();
	if (!isValidElementType(ScalarTy))
	return std::nullopt;
	auto *VecTy = getWidenedType(ScalarTy, NumScalars);
	int NumParts = TTI->getNumberOfParts(VecTy);
	if (NumParts == 0 \|\| NumParts >= NumScalars)
	NumParts = 1;
	SmallVector<int> ExtractMask;
	SmallVector<int> Mask;
	SmallVector<SmallVector<const TreeEntry *>> Entries;
	SmallVector<std::optional<TargetTransformInfo::ShuffleKind>> ExtractShuffles =
	tryToGatherExtractElements(GatheredScalars, ExtractMask, NumParts);
	SmallVector<std::optional<TargetTransformInfo::ShuffleKind>> GatherShuffles =
	isGatherShuffledEntry(&TE, GatheredScalars, Mask, Entries, NumParts,
	/ForOrder=/true);
	// No shuffled operands - ignore.
	if (GatherShuffles.empty() && ExtractShuffles.empty())
	return std::nullopt;
	OrdersType CurrentOrder(NumScalars, NumScalars);
	if (GatherShuffles.size() == 1 &&
	*GatherShuffles.front() == TTI::SK_PermuteSingleSrc &&
	Entries.front().front()->isSame(TE.Scalars)) {
	// Perfect match in the graph, will reuse the previously vectorized
	// node. Cost is 0.
	std::iota(CurrentOrder.begin(), CurrentOrder.end(), 0);
	return CurrentOrder;
	}
	auto IsSplatMask = [](ArrayRef<int> Mask) {
	int SingleElt = PoisonMaskElem;
	return all_of(Mask, [&](int I) {
	if (SingleElt == PoisonMaskElem && I != PoisonMaskElem)
	SingleElt = I;
	return I == PoisonMaskElem \|\| I == SingleElt;
	});
	};
	// Exclusive broadcast mask - ignore.
	if ((ExtractShuffles.empty() && IsSplatMask(Mask) &&
	(Entries.size() != 1 \|\|
	Entries.front().front()->ReorderIndices.empty())) \|\|
	(GatherShuffles.empty() && IsSplatMask(ExtractMask)))
	return std::nullopt;
	SmallBitVector ShuffledSubMasks(NumParts);
	auto TransformMaskToOrder = [&](MutableArrayRef<unsigned> CurrentOrder,
	ArrayRef<int> Mask, int PartSz, int NumParts,
	function_ref<unsigned(unsigned)> GetVF) {
	for (int I : seq<int>(0, NumParts)) {
	if (ShuffledSubMasks.test(I))
	continue;
	const int VF = GetVF(I);
	if (VF == 0)
	continue;
	unsigned Limit = getNumElems(CurrentOrder.size(), PartSz, I);
	MutableArrayRef<unsigned> Slice = CurrentOrder.slice(I * PartSz, Limit);
	// Shuffle of at least 2 vectors - ignore.
	if (any_of(Slice, [&](int I) { return I != NumScalars; })) {
	std::fill(Slice.begin(), Slice.end(), NumScalars);
	ShuffledSubMasks.set(I);
	continue;
	}
	// Try to include as much elements from the mask as possible.
	int FirstMin = INT_MAX;
	int SecondVecFound = false;
	for (int K : seq<int>(Limit)) {
	int Idx = Mask[I * PartSz + K];
	if (Idx == PoisonMaskElem) {
	Value V = GatheredScalars[I PartSz + K];
	if (isConstant(V) && !isa<PoisonValue>(V)) {
	SecondVecFound = true;
	break;
	}
	continue;
	}
	if (Idx < VF) {
	if (FirstMin > Idx)
	FirstMin = Idx;
	} else {
	SecondVecFound = true;
	break;
	}
	}
	FirstMin = (FirstMin / PartSz) * PartSz;
	// Shuffle of at least 2 vectors - ignore.
	if (SecondVecFound) {
	std::fill(Slice.begin(), Slice.end(), NumScalars);
	ShuffledSubMasks.set(I);
	continue;
	}
	for (int K : seq<int>(Limit)) {
	int Idx = Mask[I * PartSz + K];
	if (Idx == PoisonMaskElem)
	continue;
	Idx -= FirstMin;
	if (Idx >= PartSz) {
	SecondVecFound = true;
	break;
	}
	if (CurrentOrder[I * PartSz + Idx] >
	static_cast<unsigned>(I * PartSz + K) &&
	CurrentOrder[I * PartSz + Idx] !=
	static_cast<unsigned>(I * PartSz + Idx))
	CurrentOrder[I * PartSz + Idx] = I * PartSz + K;
	}
	// Shuffle of at least 2 vectors - ignore.
	if (SecondVecFound) {
	std::fill(Slice.begin(), Slice.end(), NumScalars);
	ShuffledSubMasks.set(I);
	continue;
	}
	}
	};
	int PartSz = getPartNumElems(NumScalars, NumParts);
	if (!ExtractShuffles.empty())
	TransformMaskToOrder(
	CurrentOrder, ExtractMask, PartSz, NumParts, [&](unsigned I) {
	if (!ExtractShuffles[I])
	return 0U;
	unsigned VF = 0;
	unsigned Sz = getNumElems(TE.getVectorFactor(), PartSz, I);
	for (unsigned Idx : seq<unsigned>(Sz)) {
	int K = I * PartSz + Idx;
	if (ExtractMask[K] == PoisonMaskElem)
	continue;
	if (!TE.ReuseShuffleIndices.empty())
	K = TE.ReuseShuffleIndices[K];
	if (!TE.ReorderIndices.empty())
	K = std::distance(TE.ReorderIndices.begin(),
	find(TE.ReorderIndices, K));
	auto *EI = dyn_cast<ExtractElementInst>(TE.Scalars[K]);
	if (!EI)
	continue;
	VF = std::max(VF, cast<VectorType>(EI->getVectorOperandType())
	->getElementCount()
	.getKnownMinValue());
	}
	return VF;
	});
	// Check special corner case - single shuffle of the same entry.
	if (GatherShuffles.size() == 1 && NumParts != 1) {
	if (ShuffledSubMasks.any())
	return std::nullopt;
	PartSz = NumScalars;
	NumParts = 1;
	}
	if (!Entries.empty())
	TransformMaskToOrder(CurrentOrder, Mask, PartSz, NumParts, [&](unsigned I) {
	if (!GatherShuffles[I])
	return 0U;
	return std::max(Entries[I].front()->getVectorFactor(),
	Entries[I].back()->getVectorFactor());
	});
	int NumUndefs =
	count_if(CurrentOrder, [&](int Idx) { return Idx == NumScalars; });
	if (ShuffledSubMasks.all() \|\| (NumScalars > 2 && NumUndefs >= NumScalars / 2))
	return std::nullopt;
	return std::move(CurrentOrder);
	}

	static bool arePointersCompatible(Value Ptr1, Value Ptr2,
	const TargetLibraryInfo &TLI,
	bool CompareOpcodes = true) {
	if (getUnderlyingObject(Ptr1) != getUnderlyingObject(Ptr2))
	return false;
	auto *GEP1 = dyn_cast<GetElementPtrInst>(Ptr1);
	if (!GEP1)
	return false;
	auto *GEP2 = dyn_cast<GetElementPtrInst>(Ptr2);
	if (!GEP2)
	return false;
	return GEP1->getNumOperands() == 2 && GEP2->getNumOperands() == 2 &&
	((isConstant(GEP1->getOperand(1)) &&
	isConstant(GEP2->getOperand(1))) \|\|
	!CompareOpcodes \|\|
	getSameOpcode({GEP1->getOperand(1), GEP2->getOperand(1)}, TLI)
	.getOpcode());
	}

	/// Calculates minimal alignment as a common alignment.
	template <typename T>
	static Align computeCommonAlignment(ArrayRef<Value *> VL) {
	Align CommonAlignment = cast<T>(VL.front())->getAlign();
	for (Value *V : VL.drop_front())
	CommonAlignment = std::min(CommonAlignment, cast<T>(V)->getAlign());
	return CommonAlignment;
	}

	/// Check if \p Order represents reverse order.
	static bool isReverseOrder(ArrayRef<unsigned> Order) {
	unsigned Sz = Order.size();
	return !Order.empty() && all_of(enumerate(Order), [&](const auto &Pair) {
	return Pair.value() == Sz \|\| Sz - Pair.index() - 1 == Pair.value();
	});
	}

	/// Checks if the provided list of pointers \p Pointers represents the strided
	/// pointers for type ElemTy. If they are not, std::nullopt is returned.
	/// Otherwise, if \p Inst is not specified, just initialized optional value is
	/// returned to show that the pointers represent strided pointers. If \p Inst
	/// specified, the runtime stride is materialized before the given \p Inst.
	/// \returns std::nullopt if the pointers are not pointers with the runtime
	/// stride, nullptr or actual stride value, otherwise.
	static std::optional<Value *>
	calculateRtStride(ArrayRef<Value > PointerOps, Type ElemTy,
	const DataLayout &DL, ScalarEvolution &SE,
	SmallVectorImpl<unsigned> &SortedIndices,
	Instruction *Inst = nullptr) {
	SmallVector<const SCEV *> SCEVs;
	const SCEV *PtrSCEVLowest = nullptr;
	const SCEV *PtrSCEVHighest = nullptr;
	// Find lower/upper pointers from the PointerOps (i.e. with lowest and highest
	// addresses).
	for (Value *Ptr : PointerOps) {
	const SCEV *PtrSCEV = SE.getSCEV(Ptr);
	if (!PtrSCEV)
	return std::nullopt;
	SCEVs.push_back(PtrSCEV);
	if (!PtrSCEVLowest && !PtrSCEVHighest) {
	PtrSCEVLowest = PtrSCEVHighest = PtrSCEV;
	continue;
	}
	const SCEV *Diff = SE.getMinusSCEV(PtrSCEV, PtrSCEVLowest);
	if (isa<SCEVCouldNotCompute>(Diff))
	return std::nullopt;
	if (Diff->isNonConstantNegative()) {
	PtrSCEVLowest = PtrSCEV;
	continue;
	}
	const SCEV *Diff1 = SE.getMinusSCEV(PtrSCEVHighest, PtrSCEV);
	if (isa<SCEVCouldNotCompute>(Diff1))
	return std::nullopt;
	if (Diff1->isNonConstantNegative()) {
	PtrSCEVHighest = PtrSCEV;
	continue;
	}
	}
	// Dist = PtrSCEVHighest - PtrSCEVLowest;
	const SCEV *Dist = SE.getMinusSCEV(PtrSCEVHighest, PtrSCEVLowest);
	if (isa<SCEVCouldNotCompute>(Dist))
	return std::nullopt;
	int Size = DL.getTypeStoreSize(ElemTy);
	auto TryGetStride = [&](const SCEV *Dist,
	const SCEV Multiplier) -> const SCEV {
	if (const auto *M = dyn_cast<SCEVMulExpr>(Dist)) {
	if (M->getOperand(0) == Multiplier)
	return M->getOperand(1);
	if (M->getOperand(1) == Multiplier)
	return M->getOperand(0);
	return nullptr;
	}
	if (Multiplier == Dist)
	return SE.getConstant(Dist->getType(), 1);
	return SE.getUDivExactExpr(Dist, Multiplier);
	};
	// Stride_in_elements = Dist / element_size * (num_elems - 1).
	const SCEV *Stride = nullptr;
	if (Size != 1 \|\| SCEVs.size() > 2) {
	const SCEV Sz = SE.getConstant(Dist->getType(), Size (SCEVs.size() - 1));
	Stride = TryGetStride(Dist, Sz);
	if (!Stride)
	return std::nullopt;
	}
	if (!Stride \|\| isa<SCEVConstant>(Stride))
	return std::nullopt;
	// Iterate through all pointers and check if all distances are
	// unique multiple of Stride.
	using DistOrdPair = std::pair<int64_t, int>;
	auto Compare = llvm::less_first();
	std::set<DistOrdPair, decltype(Compare)> Offsets(Compare);
	int Cnt = 0;
	bool IsConsecutive = true;
	for (const SCEV *PtrSCEV : SCEVs) {
	unsigned Dist = 0;
	if (PtrSCEV != PtrSCEVLowest) {
	const SCEV *Diff = SE.getMinusSCEV(PtrSCEV, PtrSCEVLowest);
	const SCEV *Coeff = TryGetStride(Diff, Stride);
	if (!Coeff)
	return std::nullopt;
	const auto *SC = dyn_cast<SCEVConstant>(Coeff);
	if (!SC \|\| isa<SCEVCouldNotCompute>(SC))
	return std::nullopt;
	if (!SE.getMinusSCEV(PtrSCEV, SE.getAddExpr(PtrSCEVLowest,
	SE.getMulExpr(Stride, SC)))
	->isZero())
	return std::nullopt;
	Dist = SC->getAPInt().getZExtValue();
	}
	// If the strides are not the same or repeated, we can't vectorize.
	if ((Dist / Size) * Size != Dist \|\| (Dist / Size) >= SCEVs.size())
	return std::nullopt;
	auto Res = Offsets.emplace(Dist, Cnt);
	if (!Res.second)
	return std::nullopt;
	// Consecutive order if the inserted element is the last one.
	IsConsecutive = IsConsecutive && std::next(Res.first) == Offsets.end();
	++Cnt;
	}
	if (Offsets.size() != SCEVs.size())
	return std::nullopt;
	SortedIndices.clear();
	if (!IsConsecutive) {
	// Fill SortedIndices array only if it is non-consecutive.
	SortedIndices.resize(PointerOps.size());
	Cnt = 0;
	for (const std::pair<int64_t, int> &Pair : Offsets) {
	SortedIndices[Cnt] = Pair.second;
	++Cnt;
	}
	}
	if (!Inst)
	return nullptr;
	SCEVExpander Expander(SE, DL, "strided-load-vec");
	return Expander.expandCodeFor(Stride, Stride->getType(), Inst);
	}

	static std::pair<InstructionCost, InstructionCost>
	getGEPCosts(const TargetTransformInfo &TTI, ArrayRef<Value *> Ptrs,
	Value *BasePtr, unsigned Opcode, TTI::TargetCostKind CostKind,
	Type ScalarTy, VectorType VecTy);

	BoUpSLP::LoadsState BoUpSLP::canVectorizeLoads(
	ArrayRef<Value > VL, const Value VL0, SmallVectorImpl<unsigned> &Order,
	SmallVectorImpl<Value *> &PointerOps, bool TryRecursiveCheck) const {
	// Check that a vectorized load would load the same memory as a scalar
	// load. For example, we don't want to vectorize loads that are smaller
	// than 8-bit. Even though we have a packed struct {<i2, i2, i2, i2>} LLVM
	// treats loading/storing it as an i8 struct. If we vectorize loads/stores
	// from such a struct, we read/write packed bits disagreeing with the
	// unvectorized version.
	Type *ScalarTy = VL0->getType();

	if (DL->getTypeSizeInBits(ScalarTy) != DL->getTypeAllocSizeInBits(ScalarTy))
	return LoadsState::Gather;

	// Make sure all loads in the bundle are simple - we can't vectorize
	// atomic or volatile loads.
	PointerOps.clear();
	const unsigned Sz = VL.size();
	PointerOps.resize(Sz);
	auto *POIter = PointerOps.begin();
	for (Value *V : VL) {
	auto *L = cast<LoadInst>(V);
	if (!L->isSimple())
	return LoadsState::Gather;
	*POIter = L->getPointerOperand();
	++POIter;
	}

	Order.clear();
	auto *VecTy = getWidenedType(ScalarTy, Sz);
	// Check the order of pointer operands or that all pointers are the same.
	bool IsSorted = sortPtrAccesses(PointerOps, ScalarTy, DL, SE, Order);
	// FIXME: Reordering isn't implemented for non-power-of-2 nodes yet.
	if (!Order.empty() && !isPowerOf2_32(VL.size())) {
	assert(VectorizeNonPowerOf2 && "non-power-of-2 number of loads only "
	"supported with VectorizeNonPowerOf2");
	return LoadsState::Gather;
	}

	Align CommonAlignment = computeCommonAlignment<LoadInst>(VL);
	if (!IsSorted && Sz > MinProfitableStridedLoads && TTI->isTypeLegal(VecTy) &&
	TTI->isLegalStridedLoadStore(VecTy, CommonAlignment) &&
	calculateRtStride(PointerOps, ScalarTy, DL, SE, Order))
	return LoadsState::StridedVectorize;
	if (IsSorted \|\| all_of(PointerOps, [&](Value *P) {
	return arePointersCompatible(P, PointerOps.front(), *TLI);
	})) {
	if (IsSorted) {
	Value *Ptr0;
	Value *PtrN;
	if (Order.empty()) {
	Ptr0 = PointerOps.front();
	PtrN = PointerOps.back();
	} else {
	Ptr0 = PointerOps[Order.front()];
	PtrN = PointerOps[Order.back()];
	}
	std::optional<int> Diff =
	getPointersDiff(ScalarTy, Ptr0, ScalarTy, PtrN, DL, SE);
	// Check that the sorted loads are consecutive.
	if (static_cast<unsigned>(*Diff) == Sz - 1)
	return LoadsState::Vectorize;
	// Simple check if not a strided access - clear order.
	bool IsPossibleStrided = *Diff % (Sz - 1) == 0;
	// Try to generate strided load node if:
	// 1. Target with strided load support is detected.
	// 2. The number of loads is greater than MinProfitableStridedLoads,
	// or the potential stride <= MaxProfitableLoadStride and the
	// potential stride is power-of-2 (to avoid perf regressions for the very
	// small number of loads) and max distance > number of loads, or potential
	// stride is -1.
	// 3. The loads are ordered, or number of unordered loads <=
	// MaxProfitableUnorderedLoads, or loads are in reversed order.
	// (this check is to avoid extra costs for very expensive shuffles).
	if (IsPossibleStrided && (((Sz > MinProfitableStridedLoads \|\|
	(static_cast<unsigned>(std::abs(*Diff)) <=
	MaxProfitableLoadStride * Sz &&
	isPowerOf2_32(std::abs(*Diff)))) &&
	static_cast<unsigned>(std::abs(*Diff)) > Sz) \|\|
	*Diff == -(static_cast<int>(Sz) - 1))) {
	int Stride = *Diff / static_cast<int>(Sz - 1);
	if (Diff == Stride static_cast<int>(Sz - 1)) {
	Align Alignment =
	cast<LoadInst>(Order.empty() ? VL.front() : VL[Order.front()])
	->getAlign();
	if (TTI->isLegalStridedLoadStore(VecTy, Alignment)) {
	// Iterate through all pointers and check if all distances are
	// unique multiple of Dist.
	SmallSet<int, 4> Dists;
	for (Value *Ptr : PointerOps) {
	int Dist = 0;
	if (Ptr == PtrN)
	Dist = *Diff;
	else if (Ptr != Ptr0)
	Dist =
	getPointersDiff(ScalarTy, Ptr0, ScalarTy, Ptr, DL, *SE);
	// If the strides are not the same or repeated, we can't
	// vectorize.
	if (((Dist / Stride) * Stride) != Dist \|\|
	!Dists.insert(Dist).second)
	break;
	}
	if (Dists.size() == Sz)
	return LoadsState::StridedVectorize;
	}
	}
	}
	}
	auto CheckForShuffledLoads = [&, &TTI = *TTI](Align CommonAlignment) {
	unsigned Sz = DL->getTypeSizeInBits(ScalarTy);
	unsigned MinVF = getMinVF(Sz);
	unsigned MaxVF = std::max<unsigned>(bit_floor(VL.size() / 2), MinVF);
	MaxVF = std::min(getMaximumVF(Sz, Instruction::Load), MaxVF);
	for (unsigned VF = MaxVF; VF >= MinVF; VF /= 2) {
	unsigned VectorizedCnt = 0;
	SmallVector<LoadsState> States;
	for (unsigned Cnt = 0, End = VL.size(); Cnt + VF <= End;
	Cnt += VF, ++VectorizedCnt) {
	ArrayRef<Value *> Slice = VL.slice(Cnt, VF);
	SmallVector<unsigned> Order;
	SmallVector<Value *> PointerOps;
	LoadsState LS =
	canVectorizeLoads(Slice, Slice.front(), Order, PointerOps,
	/TryRecursiveCheck=/false);
	// Check that the sorted loads are consecutive.
	if (LS == LoadsState::Gather)
	break;
	// If need the reorder - consider as high-cost masked gather for now.
	if ((LS == LoadsState::Vectorize \|\|
	LS == LoadsState::StridedVectorize) &&
	!Order.empty() && !isReverseOrder(Order))
	LS = LoadsState::ScatterVectorize;
	States.push_back(LS);
	}
	// Can be vectorized later as a serie of loads/insertelements.
	if (VectorizedCnt == VL.size() / VF) {
	// Compare masked gather cost and loads + insersubvector costs.
	TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
	auto [ScalarGEPCost, VectorGEPCost] = getGEPCosts(
	TTI, PointerOps, PointerOps.front(), Instruction::GetElementPtr,
	CostKind, ScalarTy, VecTy);
	InstructionCost MaskedGatherCost =
	TTI.getGatherScatterOpCost(
	Instruction::Load, VecTy,
	cast<LoadInst>(VL0)->getPointerOperand(),
	/VariableMask=/false, CommonAlignment, CostKind) +
	VectorGEPCost - ScalarGEPCost;
	InstructionCost VecLdCost = 0;
	auto *SubVecTy = getWidenedType(ScalarTy, VF);
	for (auto [I, LS] : enumerate(States)) {
	auto LI0 = cast<LoadInst>(VL[I VF]);
	switch (LS) {
	case LoadsState::Vectorize: {
	auto [ScalarGEPCost, VectorGEPCost] =
	getGEPCosts(TTI, ArrayRef(PointerOps).slice(I * VF, VF),
	LI0->getPointerOperand(), Instruction::Load,
	CostKind, ScalarTy, SubVecTy);
	VecLdCost += TTI.getMemoryOpCost(
	Instruction::Load, SubVecTy, LI0->getAlign(),
	LI0->getPointerAddressSpace(), CostKind,
	TTI::OperandValueInfo()) +
	VectorGEPCost - ScalarGEPCost;
	break;
	}
	case LoadsState::StridedVectorize: {
	auto [ScalarGEPCost, VectorGEPCost] =
	getGEPCosts(TTI, ArrayRef(PointerOps).slice(I * VF, VF),
	LI0->getPointerOperand(), Instruction::Load,
	CostKind, ScalarTy, SubVecTy);
	VecLdCost +=
	TTI.getStridedMemoryOpCost(
	Instruction::Load, SubVecTy, LI0->getPointerOperand(),
	/VariableMask=/false, CommonAlignment, CostKind) +
	VectorGEPCost - ScalarGEPCost;
	break;
	}
	case LoadsState::ScatterVectorize: {
	auto [ScalarGEPCost, VectorGEPCost] = getGEPCosts(
	TTI, ArrayRef(PointerOps).slice(I * VF, VF),
	LI0->getPointerOperand(), Instruction::GetElementPtr,
	CostKind, ScalarTy, SubVecTy);
	VecLdCost +=
	TTI.getGatherScatterOpCost(
	Instruction::Load, SubVecTy, LI0->getPointerOperand(),
	/VariableMask=/false, CommonAlignment, CostKind) +
	VectorGEPCost - ScalarGEPCost;
	break;
	}
	case LoadsState::Gather:
	llvm_unreachable(
	"Expected only consecutive, strided or masked gather loads.");
	}
	SmallVector<int> ShuffleMask(VL.size());
	for (int Idx : seq<int>(0, VL.size()))
	ShuffleMask[Idx] = Idx / VF == I ? VL.size() + Idx % VF : Idx;
	VecLdCost +=
	TTI.getShuffleCost(TTI::SK_InsertSubvector, VecTy, ShuffleMask,
	CostKind, I * VF, SubVecTy);
	}
	// If masked gather cost is higher - better to vectorize, so
	// consider it as a gather node. It will be better estimated
	// later.
	if (MaskedGatherCost >= VecLdCost)
	return true;
	}
	}
	return false;
	};
	// TODO: need to improve analysis of the pointers, if not all of them are
	// GEPs or have > 2 operands, we end up with a gather node, which just
	// increases the cost.
	Loop *L = LI->getLoopFor(cast<LoadInst>(VL0)->getParent());
	bool ProfitableGatherPointers =
	L && Sz > 2 &&
	static_cast<unsigned>(count_if(PointerOps, [L](Value *V) {
	return L->isLoopInvariant(V);
	})) <= Sz / 2;
	if (ProfitableGatherPointers \|\| all_of(PointerOps, [IsSorted](Value *P) {
	auto *GEP = dyn_cast<GetElementPtrInst>(P);
	return (IsSorted && !GEP && doesNotNeedToBeScheduled(P)) \|\|
	(GEP && GEP->getNumOperands() == 2 &&
	isa<Constant, Instruction>(GEP->getOperand(1)));
	})) {
	Align CommonAlignment = computeCommonAlignment<LoadInst>(VL);
	if (TTI->isLegalMaskedGather(VecTy, CommonAlignment) &&
	!TTI->forceScalarizeMaskedGather(VecTy, CommonAlignment)) {
	// Check if potential masked gather can be represented as series
	// of loads + insertsubvectors.
	if (TryRecursiveCheck && CheckForShuffledLoads(CommonAlignment)) {
	// If masked gather cost is higher - better to vectorize, so
	// consider it as a gather node. It will be better estimated
	// later.
	return LoadsState::Gather;
	}
	return LoadsState::ScatterVectorize;
	}
	}
	}

	return LoadsState::Gather;
	}

	static bool clusterSortPtrAccesses(ArrayRef<Value > VL, Type ElemTy,
	const DataLayout &DL, ScalarEvolution &SE,
	SmallVectorImpl<unsigned> &SortedIndices) {
	assert(llvm::all_of(
	VL, [](const Value *V) { return V->getType()->isPointerTy(); }) &&
	"Expected list of pointer operands.");
	// Map from bases to a vector of (Ptr, Offset, OrigIdx), which we insert each
	// Ptr into, sort and return the sorted indices with values next to one
	// another.
	MapVector<Value , SmallVector<std::tuple<Value , int, unsigned>>> Bases;
	Bases[VL[0]].push_back(std::make_tuple(VL[0], 0U, 0U));

	unsigned Cnt = 1;
	for (Value *Ptr : VL.drop_front()) {
	bool Found = any_of(Bases, [&](auto &Base) {
	std::optional<int> Diff =
	getPointersDiff(ElemTy, Base.first, ElemTy, Ptr, DL, SE,
	/StrictCheck=/true);
	if (!Diff)
	return false;

	Base.second.emplace_back(Ptr, *Diff, Cnt++);
	return true;
	});

	if (!Found) {
	// If we haven't found enough to usefully cluster, return early.
	if (Bases.size() > VL.size() / 2 - 1)
	return false;

	// Not found already - add a new Base
	Bases[Ptr].emplace_back(Ptr, 0, Cnt++);
	}
	}

	// For each of the bases sort the pointers by Offset and check if any of the
	// base become consecutively allocated.
	bool AnyConsecutive = false;
	for (auto &Base : Bases) {
	auto &Vec = Base.second;
	if (Vec.size() > 1) {
	llvm::stable_sort(Vec, [](const std::tuple<Value *, int, unsigned> &X,
	const std::tuple<Value *, int, unsigned> &Y) {
	return std::get<1>(X) < std::get<1>(Y);
	});
	int InitialOffset = std::get<1>(Vec[0]);
	AnyConsecutive \|= all_of(enumerate(Vec), [InitialOffset](const auto &P) {
	return std::get<1>(P.value()) == int(P.index()) + InitialOffset;
	});
	}
	}

	// Fill SortedIndices array only if it looks worth-while to sort the ptrs.
	SortedIndices.clear();
	if (!AnyConsecutive)
	return false;

	for (auto &Base : Bases) {
	for (auto &T : Base.second)
	SortedIndices.push_back(std::get<2>(T));
	}

	assert(SortedIndices.size() == VL.size() &&
	"Expected SortedIndices to be the size of VL");
	return true;
	}

	std::optional<BoUpSLP::OrdersType>
	BoUpSLP::findPartiallyOrderedLoads(const BoUpSLP::TreeEntry &TE) {
	assert(TE.isGather() && "Expected gather node only.");
	Type *ScalarTy = TE.Scalars[0]->getType();

	SmallVector<Value *> Ptrs;
	Ptrs.reserve(TE.Scalars.size());
	for (Value *V : TE.Scalars) {
	auto *L = dyn_cast<LoadInst>(V);
	if (!L \|\| !L->isSimple())
	return std::nullopt;
	Ptrs.push_back(L->getPointerOperand());
	}

	BoUpSLP::OrdersType Order;
	if (clusterSortPtrAccesses(Ptrs, ScalarTy, DL, SE, Order))
	return std::move(Order);
	return std::nullopt;
	}

	/// Check if two insertelement instructions are from the same buildvector.
	static bool areTwoInsertFromSameBuildVector(
	InsertElementInst VU, InsertElementInst V,
	function_ref<Value (InsertElementInst )> GetBaseOperand) {
	// Instructions must be from the same basic blocks.
	if (VU->getParent() != V->getParent())
	return false;
	// Checks if 2 insertelements are from the same buildvector.
	if (VU->getType() != V->getType())
	return false;
	// Multiple used inserts are separate nodes.
	if (!VU->hasOneUse() && !V->hasOneUse())
	return false;
	auto *IE1 = VU;
	auto *IE2 = V;
	std::optional<unsigned> Idx1 = getElementIndex(IE1);
	std::optional<unsigned> Idx2 = getElementIndex(IE2);
	if (Idx1 == std::nullopt \|\| Idx2 == std::nullopt)
	return false;
	// Go through the vector operand of insertelement instructions trying to find
	// either VU as the original vector for IE2 or V as the original vector for
	// IE1.
	SmallBitVector ReusedIdx(
	cast<VectorType>(VU->getType())->getElementCount().getKnownMinValue());
	bool IsReusedIdx = false;
	do {
	if (IE2 == VU && !IE1)
	return VU->hasOneUse();
	if (IE1 == V && !IE2)
	return V->hasOneUse();
	if (IE1 && IE1 != V) {
	unsigned Idx1 = getElementIndex(IE1).value_or(*Idx2);
	IsReusedIdx \|= ReusedIdx.test(Idx1);
	ReusedIdx.set(Idx1);
	if ((IE1 != VU && !IE1->hasOneUse()) \|\| IsReusedIdx)
	IE1 = nullptr;
	else
	IE1 = dyn_cast_or_null<InsertElementInst>(GetBaseOperand(IE1));
	}
	if (IE2 && IE2 != VU) {
	unsigned Idx2 = getElementIndex(IE2).value_or(*Idx1);
	IsReusedIdx \|= ReusedIdx.test(Idx2);
	ReusedIdx.set(Idx2);
	if ((IE2 != V && !IE2->hasOneUse()) \|\| IsReusedIdx)
	IE2 = nullptr;
	else
	IE2 = dyn_cast_or_null<InsertElementInst>(GetBaseOperand(IE2));
	}
	} while (!IsReusedIdx && (IE1 \|\| IE2));
	return false;
	}

	std::optional<BoUpSLP::OrdersType>
	BoUpSLP::getReorderingData(const TreeEntry &TE, bool TopToBottom) {
	// FIXME: Vectorizing is not supported yet for non-power-of-2 ops.
	if (TE.isNonPowOf2Vec())
	return std::nullopt;

	// No need to reorder if need to shuffle reuses, still need to shuffle the
	// node.
	if (!TE.ReuseShuffleIndices.empty()) {
	if (isSplat(TE.Scalars))
	return std::nullopt;
	// Check if reuse shuffle indices can be improved by reordering.
	// For this, check that reuse mask is "clustered", i.e. each scalar values
	// is used once in each submask of size <number_of_scalars>.
	// Example: 4 scalar values.
	// ReuseShuffleIndices mask: 0, 1, 2, 3, 3, 2, 0, 1 - clustered.
	// 0, 1, 2, 3, 3, 3, 1, 0 - not clustered, because
	// element 3 is used twice in the second submask.
	unsigned Sz = TE.Scalars.size();
	if (TE.isGather()) {
	if (std::optional<OrdersType> CurrentOrder =
	findReusedOrderedScalars(TE)) {
	SmallVector<int> Mask;
	fixupOrderingIndices(*CurrentOrder);
	inversePermutation(*CurrentOrder, Mask);
	::addMask(Mask, TE.ReuseShuffleIndices);
	OrdersType Res(TE.getVectorFactor(), TE.getVectorFactor());
	unsigned Sz = TE.Scalars.size();
	for (int K = 0, E = TE.getVectorFactor() / Sz; K < E; ++K) {
	for (auto [I, Idx] : enumerate(ArrayRef(Mask).slice(K * Sz, Sz)))
	if (Idx != PoisonMaskElem)
	Res[Idx + K * Sz] = I + K * Sz;
	}
	return std::move(Res);
	}
	}
	if (Sz == 2 && TE.getVectorFactor() == 4 &&
	TTI->getNumberOfParts(getWidenedType(TE.Scalars.front()->getType(),
	2 * TE.getVectorFactor())) == 1)
	return std::nullopt;
	if (!ShuffleVectorInst::isOneUseSingleSourceMask(TE.ReuseShuffleIndices,
	Sz)) {
	SmallVector<int> ReorderMask(Sz, PoisonMaskElem);
	if (TE.ReorderIndices.empty())
	std::iota(ReorderMask.begin(), ReorderMask.end(), 0);
	else
	inversePermutation(TE.ReorderIndices, ReorderMask);
	::addMask(ReorderMask, TE.ReuseShuffleIndices);
	unsigned VF = ReorderMask.size();
	OrdersType ResOrder(VF, VF);
	unsigned NumParts = divideCeil(VF, Sz);
	SmallBitVector UsedVals(NumParts);
	for (unsigned I = 0; I < VF; I += Sz) {
	int Val = PoisonMaskElem;
	unsigned UndefCnt = 0;
	unsigned Limit = std::min(Sz, VF - I);
	if (any_of(ArrayRef(ReorderMask).slice(I, Limit),
	[&](int Idx) {
	if (Val == PoisonMaskElem && Idx != PoisonMaskElem)
	Val = Idx;
	if (Idx == PoisonMaskElem)
	++UndefCnt;
	return Idx != PoisonMaskElem && Idx != Val;
	}) \|\|
	Val >= static_cast<int>(NumParts) \|\| UsedVals.test(Val) \|\|
	UndefCnt > Sz / 2)
	return std::nullopt;
	UsedVals.set(Val);
	for (unsigned K = 0; K < NumParts; ++K)
	ResOrder[Val + Sz * K] = I + K;
	}
	return std::move(ResOrder);
	}
	unsigned VF = TE.getVectorFactor();
	// Try build correct order for extractelement instructions.
	SmallVector<int> ReusedMask(TE.ReuseShuffleIndices.begin(),
	TE.ReuseShuffleIndices.end());
	if (TE.getOpcode() == Instruction::ExtractElement && !TE.isAltShuffle() &&
	all_of(TE.Scalars, [Sz](Value *V) {
	std::optional<unsigned> Idx = getExtractIndex(cast<Instruction>(V));
	return Idx && *Idx < Sz;
	})) {
	SmallVector<int> ReorderMask(Sz, PoisonMaskElem);
	if (TE.ReorderIndices.empty())
	std::iota(ReorderMask.begin(), ReorderMask.end(), 0);
	else
	inversePermutation(TE.ReorderIndices, ReorderMask);
	for (unsigned I = 0; I < VF; ++I) {
	int &Idx = ReusedMask[I];
	if (Idx == PoisonMaskElem)
	continue;
	Value *V = TE.Scalars[ReorderMask[Idx]];
	std::optional<unsigned> EI = getExtractIndex(cast<Instruction>(V));
	Idx = std::distance(ReorderMask.begin(), find(ReorderMask, *EI));
	}
	}
	// Build the order of the VF size, need to reorder reuses shuffles, they are
	// always of VF size.
	OrdersType ResOrder(VF);
	std::iota(ResOrder.begin(), ResOrder.end(), 0);
	auto *It = ResOrder.begin();
	for (unsigned K = 0; K < VF; K += Sz) {
	OrdersType CurrentOrder(TE.ReorderIndices);
	SmallVector<int> SubMask{ArrayRef(ReusedMask).slice(K, Sz)};
	if (SubMask.front() == PoisonMaskElem)
	std::iota(SubMask.begin(), SubMask.end(), 0);
	reorderOrder(CurrentOrder, SubMask);
	transform(CurrentOrder, It, [K](unsigned Pos) { return Pos + K; });
	std::advance(It, Sz);
	}
	if (TE.isGather() && all_of(enumerate(ResOrder), [](const auto &Data) {
	return Data.index() == Data.value();
	}))
	return std::nullopt; // No need to reorder.
	return std::move(ResOrder);
	}
	if (TE.State == TreeEntry::StridedVectorize && !TopToBottom &&
	any_of(TE.UserTreeIndices,
	[](const EdgeInfo &EI) {
	return !Instruction::isBinaryOp(EI.UserTE->getOpcode());
	}) &&
	(TE.ReorderIndices.empty() \|\| isReverseOrder(TE.ReorderIndices)))
	return std::nullopt;
	if ((TE.State == TreeEntry::Vectorize \|\|
	TE.State == TreeEntry::StridedVectorize) &&
	(isa<LoadInst, ExtractElementInst, ExtractValueInst>(TE.getMainOp()) \|\|
	(TopToBottom && isa<StoreInst, InsertElementInst>(TE.getMainOp()))) &&
	!TE.isAltShuffle())
	return TE.ReorderIndices;
	if (TE.State == TreeEntry::Vectorize && TE.getOpcode() == Instruction::PHI) {
	auto PHICompare = [&](unsigned I1, unsigned I2) {
	Value *V1 = TE.Scalars[I1];
	Value *V2 = TE.Scalars[I2];
	if (V1 == V2 \|\| (V1->getNumUses() == 0 && V2->getNumUses() == 0))
	return false;
	if (V1->getNumUses() < V2->getNumUses())
	return true;
	if (V1->getNumUses() > V2->getNumUses())
	return false;
	auto FirstUserOfPhi1 = cast<Instruction>(V1->user_begin());
	auto FirstUserOfPhi2 = cast<Instruction>(V2->user_begin());
	if (auto *IE1 = dyn_cast<InsertElementInst>(FirstUserOfPhi1))
	if (auto *IE2 = dyn_cast<InsertElementInst>(FirstUserOfPhi2)) {
	if (!areTwoInsertFromSameBuildVector(
	IE1, IE2,
	[](InsertElementInst *II) { return II->getOperand(0); }))
	return I1 < I2;
	return getElementIndex(IE1) < getElementIndex(IE2);
	}
	if (auto *EE1 = dyn_cast<ExtractElementInst>(FirstUserOfPhi1))
	if (auto *EE2 = dyn_cast<ExtractElementInst>(FirstUserOfPhi2)) {
	if (EE1->getOperand(0) != EE2->getOperand(0))
	return I1 < I2;
	return getElementIndex(EE1) < getElementIndex(EE2);
	}
	return I1 < I2;
	};
	auto IsIdentityOrder = [](const OrdersType &Order) {
	for (unsigned Idx : seq<unsigned>(0, Order.size()))
	if (Idx != Order[Idx])
	return false;
	return true;
	};
	if (!TE.ReorderIndices.empty())
	return TE.ReorderIndices;
	DenseMap<unsigned, unsigned> PhiToId;
	SmallVector<unsigned> Phis(TE.Scalars.size());
	std::iota(Phis.begin(), Phis.end(), 0);
	OrdersType ResOrder(TE.Scalars.size());
	for (unsigned Id = 0, Sz = TE.Scalars.size(); Id < Sz; ++Id)
	PhiToId[Id] = Id;
	stable_sort(Phis, PHICompare);
	for (unsigned Id = 0, Sz = Phis.size(); Id < Sz; ++Id)
	ResOrder[Id] = PhiToId[Phis[Id]];
	if (IsIdentityOrder(ResOrder))
	return std::nullopt; // No need to reorder.
	return std::move(ResOrder);
	}
	if (TE.isGather() && !TE.isAltShuffle() && allSameType(TE.Scalars)) {
	// TODO: add analysis of other gather nodes with extractelement
	// instructions and other values/instructions, not only undefs.
	if ((TE.getOpcode() == Instruction::ExtractElement \|\|
	(all_of(TE.Scalars, IsaPred<UndefValue, ExtractElementInst>) &&
	any_of(TE.Scalars, IsaPred<ExtractElementInst>))) &&
	all_of(TE.Scalars, [](Value *V) {
	auto *EE = dyn_cast<ExtractElementInst>(V);
	return !EE \|\| isa<FixedVectorType>(EE->getVectorOperandType());
	})) {
	// Check that gather of extractelements can be represented as
	// just a shuffle of a single vector.
	OrdersType CurrentOrder;
	bool Reuse = canReuseExtract(TE.Scalars, TE.getMainOp(), CurrentOrder,
	/ResizeAllowed=/true);
	if (Reuse \|\| !CurrentOrder.empty())
	return std::move(CurrentOrder);
	}
	// If the gather node is <undef, v, .., poison> and
	// insertelement poison, v, 0 [+ permute]
	// is cheaper than
	// insertelement poison, v, n - try to reorder.
	// If rotating the whole graph, exclude the permute cost, the whole graph
	// might be transformed.
	int Sz = TE.Scalars.size();
	if (isSplat(TE.Scalars) && !allConstant(TE.Scalars) &&
	count_if(TE.Scalars, IsaPred<UndefValue>) == Sz - 1) {
	const auto *It =
	find_if(TE.Scalars, [](Value *V) { return !isConstant(V); });
	if (It == TE.Scalars.begin())
	return OrdersType();
	auto *Ty = getWidenedType(TE.Scalars.front()->getType(), Sz);
	if (It != TE.Scalars.end()) {
	OrdersType Order(Sz, Sz);
	unsigned Idx = std::distance(TE.Scalars.begin(), It);
	Order[Idx] = 0;
	fixupOrderingIndices(Order);
	SmallVector<int> Mask;
	inversePermutation(Order, Mask);
	InstructionCost PermuteCost =
	TopToBottom
	? 0
	: TTI->getShuffleCost(TTI::SK_PermuteSingleSrc, Ty, Mask);
	InstructionCost InsertFirstCost = TTI->getVectorInstrCost(
	Instruction::InsertElement, Ty, TTI::TCK_RecipThroughput, 0,
	PoisonValue::get(Ty), *It);
	InstructionCost InsertIdxCost = TTI->getVectorInstrCost(
	Instruction::InsertElement, Ty, TTI::TCK_RecipThroughput, Idx,
	PoisonValue::get(Ty), *It);
	if (InsertFirstCost + PermuteCost < InsertIdxCost) {
	OrdersType Order(Sz, Sz);
	Order[Idx] = 0;
	return std::move(Order);
	}
	}
	}
	if (isSplat(TE.Scalars))
	return std::nullopt;
	if (TE.Scalars.size() >= 4)
	if (std::optional<OrdersType> Order = findPartiallyOrderedLoads(TE))
	return Order;
	if (std::optional<OrdersType> CurrentOrder = findReusedOrderedScalars(TE))
	return CurrentOrder;
	}
	return std::nullopt;
	}

	/// Checks if the given mask is a "clustered" mask with the same clusters of
	/// size \p Sz, which are not identity submasks.
	static bool isRepeatedNonIdentityClusteredMask(ArrayRef<int> Mask,
	unsigned Sz) {
	ArrayRef<int> FirstCluster = Mask.slice(0, Sz);
	if (ShuffleVectorInst::isIdentityMask(FirstCluster, Sz))
	return false;
	for (unsigned I = Sz, E = Mask.size(); I < E; I += Sz) {
	ArrayRef<int> Cluster = Mask.slice(I, Sz);
	if (Cluster != FirstCluster)
	return false;
	}
	return true;
	}

	void BoUpSLP::reorderNodeWithReuses(TreeEntry &TE, ArrayRef<int> Mask) const {
	// Reorder reuses mask.
	reorderReuses(TE.ReuseShuffleIndices, Mask);
	const unsigned Sz = TE.Scalars.size();
	// For vectorized and non-clustered reused no need to do anything else.
	if (!TE.isGather() \|\|
	!ShuffleVectorInst::isOneUseSingleSourceMask(TE.ReuseShuffleIndices,
	Sz) \|\|
	!isRepeatedNonIdentityClusteredMask(TE.ReuseShuffleIndices, Sz))
	return;
	SmallVector<int> NewMask;
	inversePermutation(TE.ReorderIndices, NewMask);
	addMask(NewMask, TE.ReuseShuffleIndices);
	// Clear reorder since it is going to be applied to the new mask.
	TE.ReorderIndices.clear();
	// Try to improve gathered nodes with clustered reuses, if possible.
	ArrayRef<int> Slice = ArrayRef(NewMask).slice(0, Sz);
	SmallVector<unsigned> NewOrder(Slice.begin(), Slice.end());
	inversePermutation(NewOrder, NewMask);
	reorderScalars(TE.Scalars, NewMask);
	// Fill the reuses mask with the identity submasks.
	for (auto *It = TE.ReuseShuffleIndices.begin(),
	*End = TE.ReuseShuffleIndices.end();
	It != End; std::advance(It, Sz))
	std::iota(It, std::next(It, Sz), 0);
	}

	static void combineOrders(MutableArrayRef<unsigned> Order,
	ArrayRef<unsigned> SecondaryOrder) {
	assert((SecondaryOrder.empty() \|\| Order.size() == SecondaryOrder.size()) &&
	"Expected same size of orders");
	unsigned Sz = Order.size();
	SmallBitVector UsedIndices(Sz);
	for (unsigned Idx : seq<unsigned>(0, Sz)) {
	if (Order[Idx] != Sz)
	UsedIndices.set(Order[Idx]);
	}
	if (SecondaryOrder.empty()) {
	for (unsigned Idx : seq<unsigned>(0, Sz))
	if (Order[Idx] == Sz && !UsedIndices.test(Idx))
	Order[Idx] = Idx;
	} else {
	for (unsigned Idx : seq<unsigned>(0, Sz))
	if (SecondaryOrder[Idx] != Sz && Order[Idx] == Sz &&
	!UsedIndices.test(SecondaryOrder[Idx]))
	Order[Idx] = SecondaryOrder[Idx];
	}
	}

	void BoUpSLP::reorderTopToBottom() {
	// Maps VF to the graph nodes.
	DenseMap<unsigned, SetVector<TreeEntry *>> VFToOrderedEntries;
	// ExtractElement gather nodes which can be vectorized and need to handle
	// their ordering.
	DenseMap<const TreeEntry *, OrdersType> GathersToOrders;

	// Phi nodes can have preferred ordering based on their result users
	DenseMap<const TreeEntry *, OrdersType> PhisToOrders;

	// AltShuffles can also have a preferred ordering that leads to fewer
	// instructions, e.g., the addsub instruction in x86.
	DenseMap<const TreeEntry *, OrdersType> AltShufflesToOrders;

	// Maps a TreeEntry to the reorder indices of external users.
	DenseMap<const TreeEntry *, SmallVector<OrdersType, 1>>
	ExternalUserReorderMap;
	// Find all reorderable nodes with the given VF.
	// Currently the are vectorized stores,loads,extracts + some gathering of
	// extracts.
	for_each(VectorizableTree, [&, &TTIRef = *TTI](
	const std::unique_ptr<TreeEntry> &TE) {
	// Look for external users that will probably be vectorized.
	SmallVector<OrdersType, 1> ExternalUserReorderIndices =
	findExternalStoreUsersReorderIndices(TE.get());
	if (!ExternalUserReorderIndices.empty()) {
	VFToOrderedEntries[TE->getVectorFactor()].insert(TE.get());
	ExternalUserReorderMap.try_emplace(TE.get(),
	std::move(ExternalUserReorderIndices));
	}

	// Patterns like [fadd,fsub] can be combined into a single instruction in
	// x86. Reordering them into [fsub,fadd] blocks this pattern. So we need
	// to take into account their order when looking for the most used order.
	if (TE->isAltShuffle()) {
	VectorType *VecTy =
	getWidenedType(TE->Scalars[0]->getType(), TE->Scalars.size());
	unsigned Opcode0 = TE->getOpcode();
	unsigned Opcode1 = TE->getAltOpcode();
	SmallBitVector OpcodeMask(getAltInstrMask(TE->Scalars, Opcode0, Opcode1));
	// If this pattern is supported by the target then we consider the order.
	if (TTIRef.isLegalAltInstr(VecTy, Opcode0, Opcode1, OpcodeMask)) {
	VFToOrderedEntries[TE->getVectorFactor()].insert(TE.get());
	AltShufflesToOrders.try_emplace(TE.get(), OrdersType());
	}
	// TODO: Check the reverse order too.
	}

	if (std::optional<OrdersType> CurrentOrder =
	getReorderingData(TE, /TopToBottom=*/true)) {
	// Do not include ordering for nodes used in the alt opcode vectorization,
	// better to reorder them during bottom-to-top stage. If follow the order
	// here, it causes reordering of the whole graph though actually it is
	// profitable just to reorder the subgraph that starts from the alternate
	// opcode vectorization node. Such nodes already end-up with the shuffle
	// instruction and it is just enough to change this shuffle rather than
	// rotate the scalars for the whole graph.
	unsigned Cnt = 0;
	const TreeEntry *UserTE = TE.get();
	while (UserTE && Cnt < RecursionMaxDepth) {
	if (UserTE->UserTreeIndices.size() != 1)
	break;
	if (all_of(UserTE->UserTreeIndices, [](const EdgeInfo &EI) {
	return EI.UserTE->State == TreeEntry::Vectorize &&
	EI.UserTE->isAltShuffle() && EI.UserTE->Idx != 0;
	}))
	return;
	UserTE = UserTE->UserTreeIndices.back().UserTE;
	++Cnt;
	}
	VFToOrderedEntries[TE->getVectorFactor()].insert(TE.get());
	if (!(TE->State == TreeEntry::Vectorize \|\|
	TE->State == TreeEntry::StridedVectorize) \|\|
	!TE->ReuseShuffleIndices.empty())
	GathersToOrders.try_emplace(TE.get(), *CurrentOrder);
	if (TE->State == TreeEntry::Vectorize &&
	TE->getOpcode() == Instruction::PHI)
	PhisToOrders.try_emplace(TE.get(), *CurrentOrder);
	}
	});

	// Reorder the graph nodes according to their vectorization factor.
	for (unsigned VF = VectorizableTree.front()->getVectorFactor(); VF > 1;
	VF /= 2) {
	auto It = VFToOrderedEntries.find(VF);
	if (It == VFToOrderedEntries.end())
	continue;
	// Try to find the most profitable order. We just are looking for the most
	// used order and reorder scalar elements in the nodes according to this
	// mostly used order.
	ArrayRef<TreeEntry *> OrderedEntries = It->second.getArrayRef();
	// All operands are reordered and used only in this node - propagate the
	// most used order to the user node.
	MapVector<OrdersType, unsigned,
	DenseMap<OrdersType, unsigned, OrdersTypeDenseMapInfo>>
	OrdersUses;
	SmallPtrSet<const TreeEntry *, 4> VisitedOps;
	for (const TreeEntry *OpTE : OrderedEntries) {
	// No need to reorder this nodes, still need to extend and to use shuffle,
	// just need to merge reordering shuffle and the reuse shuffle.
	if (!OpTE->ReuseShuffleIndices.empty() && !GathersToOrders.count(OpTE))
	continue;
	// Count number of orders uses.
	const auto &Order = [OpTE, &GathersToOrders, &AltShufflesToOrders,
	&PhisToOrders]() -> const OrdersType & {
	if (OpTE->isGather() \|\| !OpTE->ReuseShuffleIndices.empty()) {
	auto It = GathersToOrders.find(OpTE);
	if (It != GathersToOrders.end())
	return It->second;
	}
	if (OpTE->isAltShuffle()) {
	auto It = AltShufflesToOrders.find(OpTE);
	if (It != AltShufflesToOrders.end())
	return It->second;
	}
	if (OpTE->State == TreeEntry::Vectorize &&
	OpTE->getOpcode() == Instruction::PHI) {
	auto It = PhisToOrders.find(OpTE);
	if (It != PhisToOrders.end())
	return It->second;
	}
	return OpTE->ReorderIndices;
	}();
	// First consider the order of the external scalar users.
	auto It = ExternalUserReorderMap.find(OpTE);
	if (It != ExternalUserReorderMap.end()) {
	const auto &ExternalUserReorderIndices = It->second;
	// If the OpTE vector factor != number of scalars - use natural order,
	// it is an attempt to reorder node with reused scalars but with
	// external uses.
	if (OpTE->getVectorFactor() != OpTE->Scalars.size()) {
	OrdersUses.insert(std::make_pair(OrdersType(), 0)).first->second +=
	ExternalUserReorderIndices.size();
	} else {
	for (const OrdersType &ExtOrder : ExternalUserReorderIndices)
	++OrdersUses.insert(std::make_pair(ExtOrder, 0)).first->second;
	}
	// No other useful reorder data in this entry.
	if (Order.empty())
	continue;
	}
	// Stores actually store the mask, not the order, need to invert.
	if (OpTE->State == TreeEntry::Vectorize && !OpTE->isAltShuffle() &&
	OpTE->getOpcode() == Instruction::Store && !Order.empty()) {
	SmallVector<int> Mask;
	inversePermutation(Order, Mask);
	unsigned E = Order.size();
	OrdersType CurrentOrder(E, E);
	transform(Mask, CurrentOrder.begin(), [E](int Idx) {
	return Idx == PoisonMaskElem ? E : static_cast<unsigned>(Idx);
	});
	fixupOrderingIndices(CurrentOrder);
	++OrdersUses.insert(std::make_pair(CurrentOrder, 0)).first->second;
	} else {
	++OrdersUses.insert(std::make_pair(Order, 0)).first->second;
	}
	}
	if (OrdersUses.empty())
	continue;
	auto IsIdentityOrder = [](ArrayRef<unsigned> Order) {
	const unsigned Sz = Order.size();
	for (unsigned Idx : seq<unsigned>(0, Sz))
	if (Idx != Order[Idx] && Order[Idx] != Sz)
	return false;
	return true;
	};
	// Choose the most used order.
	unsigned IdentityCnt = 0;
	unsigned FilledIdentityCnt = 0;
	OrdersType IdentityOrder(VF, VF);
	for (auto &Pair : OrdersUses) {
	if (Pair.first.empty() \|\| IsIdentityOrder(Pair.first)) {
	if (!Pair.first.empty())
	FilledIdentityCnt += Pair.second;
	IdentityCnt += Pair.second;
	combineOrders(IdentityOrder, Pair.first);
	}
	}
	MutableArrayRef<unsigned> BestOrder = IdentityOrder;
	unsigned Cnt = IdentityCnt;
	for (auto &Pair : OrdersUses) {
	// Prefer identity order. But, if filled identity found (non-empty order)
	// with same number of uses, as the new candidate order, we can choose
	// this candidate order.
	if (Cnt < Pair.second \|\|
	(Cnt == IdentityCnt && IdentityCnt == FilledIdentityCnt &&
	Cnt == Pair.second && !BestOrder.empty() &&
	IsIdentityOrder(BestOrder))) {
	combineOrders(Pair.first, BestOrder);
	BestOrder = Pair.first;
	Cnt = Pair.second;
	} else {
	combineOrders(BestOrder, Pair.first);
	}
	}
	// Set order of the user node.
	if (IsIdentityOrder(BestOrder))
	continue;
	fixupOrderingIndices(BestOrder);
	SmallVector<int> Mask;
	inversePermutation(BestOrder, Mask);
	SmallVector<int> MaskOrder(BestOrder.size(), PoisonMaskElem);
	unsigned E = BestOrder.size();
	transform(BestOrder, MaskOrder.begin(), [E](unsigned I) {
	return I < E ? static_cast<int>(I) : PoisonMaskElem;
	});
	// Do an actual reordering, if profitable.
	for (std::unique_ptr<TreeEntry> &TE : VectorizableTree) {
	// Just do the reordering for the nodes with the given VF.
	if (TE->Scalars.size() != VF) {
	if (TE->ReuseShuffleIndices.size() == VF) {
	// Need to reorder the reuses masks of the operands with smaller VF to
	// be able to find the match between the graph nodes and scalar
	// operands of the given node during vectorization/cost estimation.
	assert(all_of(TE->UserTreeIndices,
	[VF, &TE](const EdgeInfo &EI) {
	return EI.UserTE->Scalars.size() == VF \|\|
	EI.UserTE->Scalars.size() ==
	TE->Scalars.size();
	}) &&
	"All users must be of VF size.");
	// Update ordering of the operands with the smaller VF than the given
	// one.
	reorderNodeWithReuses(*TE, Mask);
	}
	continue;
	}
	if ((TE->State == TreeEntry::Vectorize \|\|
	TE->State == TreeEntry::StridedVectorize) &&
	isa<ExtractElementInst, ExtractValueInst, LoadInst, StoreInst,
	InsertElementInst>(TE->getMainOp()) &&
	!TE->isAltShuffle()) {
	// Build correct orders for extract{element,value}, loads and
	// stores.
	reorderOrder(TE->ReorderIndices, Mask);
	if (isa<InsertElementInst, StoreInst>(TE->getMainOp()))
	TE->reorderOperands(Mask);
	} else {
	// Reorder the node and its operands.
	TE->reorderOperands(Mask);
	assert(TE->ReorderIndices.empty() &&
	"Expected empty reorder sequence.");
	reorderScalars(TE->Scalars, Mask);
	}
	if (!TE->ReuseShuffleIndices.empty()) {
	// Apply reversed order to keep the original ordering of the reused
	// elements to avoid extra reorder indices shuffling.
	OrdersType CurrentOrder;
	reorderOrder(CurrentOrder, MaskOrder);
	SmallVector<int> NewReuses;
	inversePermutation(CurrentOrder, NewReuses);
	addMask(NewReuses, TE->ReuseShuffleIndices);
	TE->ReuseShuffleIndices.swap(NewReuses);
	}
	}
	}
	}

	bool BoUpSLP::canReorderOperands(
	TreeEntry UserTE, SmallVectorImpl<std::pair<unsigned, TreeEntry >> &Edges,
	ArrayRef<TreeEntry *> ReorderableGathers,
	SmallVectorImpl<TreeEntry *> &GatherOps) {
	// FIXME: Reordering isn't implemented for non-power-of-2 nodes yet.
	if (UserTE->isNonPowOf2Vec())
	return false;

	for (unsigned I = 0, E = UserTE->getNumOperands(); I < E; ++I) {
	if (any_of(Edges, [I](const std::pair<unsigned, TreeEntry *> &OpData) {
	return OpData.first == I &&
	(OpData.second->State == TreeEntry::Vectorize \|\|
	OpData.second->State == TreeEntry::StridedVectorize);
	}))
	continue;
	if (TreeEntry *TE = getVectorizedOperand(UserTE, I)) {
	// Do not reorder if operand node is used by many user nodes.
	if (any_of(TE->UserTreeIndices,
	[UserTE](const EdgeInfo &EI) { return EI.UserTE != UserTE; }))
	return false;
	// Add the node to the list of the ordered nodes with the identity
	// order.
	Edges.emplace_back(I, TE);
	// Add ScatterVectorize nodes to the list of operands, where just
	// reordering of the scalars is required. Similar to the gathers, so
	// simply add to the list of gathered ops.
	// If there are reused scalars, process this node as a regular vectorize
	// node, just reorder reuses mask.
	if (TE->State != TreeEntry::Vectorize &&
	TE->State != TreeEntry::StridedVectorize &&
	TE->ReuseShuffleIndices.empty() && TE->ReorderIndices.empty())
	GatherOps.push_back(TE);
	continue;
	}
	TreeEntry *Gather = nullptr;
	if (count_if(ReorderableGathers,
	[&Gather, UserTE, I](TreeEntry *TE) {
	assert(TE->State != TreeEntry::Vectorize &&
	TE->State != TreeEntry::StridedVectorize &&
	"Only non-vectorized nodes are expected.");
	if (any_of(TE->UserTreeIndices,
	[UserTE, I](const EdgeInfo &EI) {
	return EI.UserTE == UserTE && EI.EdgeIdx == I;
	})) {
	assert(TE->isSame(UserTE->getOperand(I)) &&
	"Operand entry does not match operands.");
	Gather = TE;
	return true;
	}
	return false;
	}) > 1 &&
	!allConstant(UserTE->getOperand(I)))
	return false;
	if (Gather)
	GatherOps.push_back(Gather);
	}
	return true;
	}

	void BoUpSLP::reorderBottomToTop(bool IgnoreReorder) {
	SetVector<TreeEntry *> OrderedEntries;
	DenseSet<const TreeEntry *> GathersToOrders;
	// Find all reorderable leaf nodes with the given VF.
	// Currently the are vectorized loads,extracts without alternate operands +
	// some gathering of extracts.
	SmallVector<TreeEntry *> NonVectorized;
	for (const std::unique_ptr<TreeEntry> &TE : VectorizableTree) {
	if (TE->State != TreeEntry::Vectorize &&
	TE->State != TreeEntry::StridedVectorize)
	NonVectorized.push_back(TE.get());
	if (std::optional<OrdersType> CurrentOrder =
	getReorderingData(TE, /TopToBottom=*/false)) {
	OrderedEntries.insert(TE.get());
	if (!(TE->State == TreeEntry::Vectorize \|\|
	TE->State == TreeEntry::StridedVectorize) \|\|
	!TE->ReuseShuffleIndices.empty())
	GathersToOrders.insert(TE.get());
	}
	}

	// 1. Propagate order to the graph nodes, which use only reordered nodes.
	// I.e., if the node has operands, that are reordered, try to make at least
	// one operand order in the natural order and reorder others + reorder the
	// user node itself.
	SmallPtrSet<const TreeEntry *, 4> Visited;
	while (!OrderedEntries.empty()) {
	// 1. Filter out only reordered nodes.
	// 2. If the entry has multiple uses - skip it and jump to the next node.
	DenseMap<TreeEntry , SmallVector<std::pair<unsigned, TreeEntry >>> Users;
	SmallVector<TreeEntry *> Filtered;
	for (TreeEntry *TE : OrderedEntries) {
	if (!(TE->State == TreeEntry::Vectorize \|\|
	TE->State == TreeEntry::StridedVectorize \|\|
	(TE->isGather() && GathersToOrders.contains(TE))) \|\|
	TE->UserTreeIndices.empty() \|\| !TE->ReuseShuffleIndices.empty() \|\|
	!all_of(drop_begin(TE->UserTreeIndices),
	[TE](const EdgeInfo &EI) {
	return EI.UserTE == TE->UserTreeIndices.front().UserTE;
	}) \|\|
	!Visited.insert(TE).second) {
	Filtered.push_back(TE);
	continue;
	}
	// Build a map between user nodes and their operands order to speedup
	// search. The graph currently does not provide this dependency directly.
	for (EdgeInfo &EI : TE->UserTreeIndices) {
	TreeEntry *UserTE = EI.UserTE;
	auto It = Users.find(UserTE);
	if (It == Users.end())
	It = Users.insert({UserTE, {}}).first;
	It->second.emplace_back(EI.EdgeIdx, TE);
	}
	}
	// Erase filtered entries.
	for (TreeEntry *TE : Filtered)
	OrderedEntries.remove(TE);
	SmallVector<
	std::pair<TreeEntry , SmallVector<std::pair<unsigned, TreeEntry >>>>
	UsersVec(Users.begin(), Users.end());
	sort(UsersVec, [](const auto &Data1, const auto &Data2) {
	return Data1.first->Idx > Data2.first->Idx;
	});
	for (auto &Data : UsersVec) {
	// Check that operands are used only in the User node.
	SmallVector<TreeEntry *> GatherOps;
	if (!canReorderOperands(Data.first, Data.second, NonVectorized,
	GatherOps)) {
	for (const std::pair<unsigned, TreeEntry *> &Op : Data.second)
	OrderedEntries.remove(Op.second);
	continue;
	}
	// All operands are reordered and used only in this node - propagate the
	// most used order to the user node.
	MapVector<OrdersType, unsigned,
	DenseMap<OrdersType, unsigned, OrdersTypeDenseMapInfo>>
	OrdersUses;
	// Do the analysis for each tree entry only once, otherwise the order of
	// the same node my be considered several times, though might be not
	// profitable.
	SmallPtrSet<const TreeEntry *, 4> VisitedOps;
	SmallPtrSet<const TreeEntry *, 4> VisitedUsers;
	for (const auto &Op : Data.second) {
	TreeEntry *OpTE = Op.second;
	if (!VisitedOps.insert(OpTE).second)
	continue;
	if (!OpTE->ReuseShuffleIndices.empty() && !GathersToOrders.count(OpTE))
	continue;
	const auto Order = [&]() -> const OrdersType {
	if (OpTE->isGather() \|\| !OpTE->ReuseShuffleIndices.empty())
	return getReorderingData(OpTE, /TopToBottom=*/false)
	.value_or(OrdersType(1));
	return OpTE->ReorderIndices;
	}();
	// The order is partially ordered, skip it in favor of fully non-ordered
	// orders.
	if (Order.size() == 1)
	continue;
	unsigned NumOps = count_if(
	Data.second, [OpTE](const std::pair<unsigned, TreeEntry *> &P) {
	return P.second == OpTE;
	});
	// Stores actually store the mask, not the order, need to invert.
	if (OpTE->State == TreeEntry::Vectorize && !OpTE->isAltShuffle() &&
	OpTE->getOpcode() == Instruction::Store && !Order.empty()) {
	SmallVector<int> Mask;
	inversePermutation(Order, Mask);
	unsigned E = Order.size();
	OrdersType CurrentOrder(E, E);
	transform(Mask, CurrentOrder.begin(), [E](int Idx) {
	return Idx == PoisonMaskElem ? E : static_cast<unsigned>(Idx);
	});
	fixupOrderingIndices(CurrentOrder);
	OrdersUses.insert(std::make_pair(CurrentOrder, 0)).first->second +=
	NumOps;
	} else {
	OrdersUses.insert(std::make_pair(Order, 0)).first->second += NumOps;
	}
	auto Res = OrdersUses.insert(std::make_pair(OrdersType(), 0));
	const auto AllowsReordering = [&](const TreeEntry *TE) {
	// FIXME: Reordering isn't implemented for non-power-of-2 nodes yet.
	if (TE->isNonPowOf2Vec())
	return false;
	if (!TE->ReorderIndices.empty() \|\| !TE->ReuseShuffleIndices.empty() \|\|
	(TE->State == TreeEntry::Vectorize && TE->isAltShuffle()) \|\|
	(IgnoreReorder && TE->Idx == 0))
	return true;
	if (TE->isGather()) {
	if (GathersToOrders.contains(TE))
	return !getReorderingData(TE, /TopToBottom=*/false)
	.value_or(OrdersType(1))
	.empty();
	return true;
	}
	return false;
	};
	for (const EdgeInfo &EI : OpTE->UserTreeIndices) {
	TreeEntry *UserTE = EI.UserTE;
	if (!VisitedUsers.insert(UserTE).second)
	continue;
	// May reorder user node if it requires reordering, has reused
	// scalars, is an alternate op vectorize node or its op nodes require
	// reordering.
	if (AllowsReordering(UserTE))
	continue;
	// Check if users allow reordering.
	// Currently look up just 1 level of operands to avoid increase of
	// the compile time.
	// Profitable to reorder if definitely more operands allow
	// reordering rather than those with natural order.
	ArrayRef<std::pair<unsigned, TreeEntry *>> Ops = Users[UserTE];
	if (static_cast<unsigned>(count_if(
	Ops, [UserTE, &AllowsReordering](
	const std::pair<unsigned, TreeEntry *> &Op) {
	return AllowsReordering(Op.second) &&
	all_of(Op.second->UserTreeIndices,
	[UserTE](const EdgeInfo &EI) {
	return EI.UserTE == UserTE;
	});
	})) <= Ops.size() / 2)
	++Res.first->second;
	}
	}
	if (OrdersUses.empty()) {
	for (const std::pair<unsigned, TreeEntry *> &Op : Data.second)
	OrderedEntries.remove(Op.second);
	continue;
	}
	auto IsIdentityOrder = [](ArrayRef<unsigned> Order) {
	const unsigned Sz = Order.size();
	for (unsigned Idx : seq<unsigned>(0, Sz))
	if (Idx != Order[Idx] && Order[Idx] != Sz)
	return false;
	return true;
	};
	// Choose the most used order.
	unsigned IdentityCnt = 0;
	unsigned VF = Data.second.front().second->getVectorFactor();
	OrdersType IdentityOrder(VF, VF);
	for (auto &Pair : OrdersUses) {
	if (Pair.first.empty() \|\| IsIdentityOrder(Pair.first)) {
	IdentityCnt += Pair.second;
	combineOrders(IdentityOrder, Pair.first);
	}
	}
	MutableArrayRef<unsigned> BestOrder = IdentityOrder;
	unsigned Cnt = IdentityCnt;
	for (auto &Pair : OrdersUses) {
	// Prefer identity order. But, if filled identity found (non-empty
	// order) with same number of uses, as the new candidate order, we can
	// choose this candidate order.
	if (Cnt < Pair.second) {
	combineOrders(Pair.first, BestOrder);
	BestOrder = Pair.first;
	Cnt = Pair.second;
	} else {
	combineOrders(BestOrder, Pair.first);
	}
	}
	// Set order of the user node.
	if (IsIdentityOrder(BestOrder)) {
	for (const std::pair<unsigned, TreeEntry *> &Op : Data.second)
	OrderedEntries.remove(Op.second);
	continue;
	}
	fixupOrderingIndices(BestOrder);
	// Erase operands from OrderedEntries list and adjust their orders.
	VisitedOps.clear();
	SmallVector<int> Mask;
	inversePermutation(BestOrder, Mask);
	SmallVector<int> MaskOrder(BestOrder.size(), PoisonMaskElem);
	unsigned E = BestOrder.size();
	transform(BestOrder, MaskOrder.begin(), [E](unsigned I) {
	return I < E ? static_cast<int>(I) : PoisonMaskElem;
	});
	for (const std::pair<unsigned, TreeEntry *> &Op : Data.second) {
	TreeEntry *TE = Op.second;
	OrderedEntries.remove(TE);
	if (!VisitedOps.insert(TE).second)
	continue;
	if (TE->ReuseShuffleIndices.size() == BestOrder.size()) {
	reorderNodeWithReuses(*TE, Mask);
	continue;
	}
	// Gathers are processed separately.
	if (TE->State != TreeEntry::Vectorize &&
	TE->State != TreeEntry::StridedVectorize &&
	(TE->State != TreeEntry::ScatterVectorize \|\|
	TE->ReorderIndices.empty()))
	continue;
	assert((BestOrder.size() == TE->ReorderIndices.size() \|\|
	TE->ReorderIndices.empty()) &&
	"Non-matching sizes of user/operand entries.");
	reorderOrder(TE->ReorderIndices, Mask);
	if (IgnoreReorder && TE == VectorizableTree.front().get())
	IgnoreReorder = false;
	}
	// For gathers just need to reorder its scalars.
	for (TreeEntry *Gather : GatherOps) {
	assert(Gather->ReorderIndices.empty() &&
	"Unexpected reordering of gathers.");
	if (!Gather->ReuseShuffleIndices.empty()) {
	// Just reorder reuses indices.
	reorderReuses(Gather->ReuseShuffleIndices, Mask);
	continue;
	}
	reorderScalars(Gather->Scalars, Mask);
	OrderedEntries.remove(Gather);
	}
	// Reorder operands of the user node and set the ordering for the user
	// node itself.
	if (Data.first->State != TreeEntry::Vectorize \|\|
	!isa<ExtractElementInst, ExtractValueInst, LoadInst>(
	Data.first->getMainOp()) \|\|
	Data.first->isAltShuffle())
	Data.first->reorderOperands(Mask);
	if (!isa<InsertElementInst, StoreInst>(Data.first->getMainOp()) \|\|
	Data.first->isAltShuffle() \|\|
	Data.first->State == TreeEntry::StridedVectorize) {
	reorderScalars(Data.first->Scalars, Mask);
	reorderOrder(Data.first->ReorderIndices, MaskOrder,
	/BottomOrder=/true);
	if (Data.first->ReuseShuffleIndices.empty() &&
	!Data.first->ReorderIndices.empty() &&
	!Data.first->isAltShuffle()) {
	// Insert user node to the list to try to sink reordering deeper in
	// the graph.
	OrderedEntries.insert(Data.first);
	}
	} else {
	reorderOrder(Data.first->ReorderIndices, Mask);
	}
	}
	}
	// If the reordering is unnecessary, just remove the reorder.
	if (IgnoreReorder && !VectorizableTree.front()->ReorderIndices.empty() &&
	VectorizableTree.front()->ReuseShuffleIndices.empty())
	VectorizableTree.front()->ReorderIndices.clear();
	}

	void BoUpSLP::buildExternalUses(
	const ExtraValueToDebugLocsMap &ExternallyUsedValues) {
	DenseMap<Value *, unsigned> ScalarToExtUses;
	// Collect the values that we need to extract from the tree.
	for (auto &TEPtr : VectorizableTree) {
	TreeEntry *Entry = TEPtr.get();

	// No need to handle users of gathered values.
	if (Entry->isGather())
	continue;

	// For each lane:
	for (int Lane = 0, LE = Entry->Scalars.size(); Lane != LE; ++Lane) {
	Value *Scalar = Entry->Scalars[Lane];
	if (!isa<Instruction>(Scalar))
	continue;
	// All uses must be replaced already? No need to do it again.
	auto It = ScalarToExtUses.find(Scalar);
	if (It != ScalarToExtUses.end() && !ExternalUses[It->second].User)
	continue;

	// Check if the scalar is externally used as an extra arg.
	const auto *ExtI = ExternallyUsedValues.find(Scalar);
	if (ExtI != ExternallyUsedValues.end()) {
	int FoundLane = Entry->findLaneForValue(Scalar);
	LLVM_DEBUG(dbgs() << "SLP: Need to extract: Extra arg from lane "
	<< FoundLane << " from " << *Scalar << ".\n");
	ScalarToExtUses.try_emplace(Scalar, ExternalUses.size());
	ExternalUses.emplace_back(Scalar, nullptr, FoundLane);
	continue;
	}
	for (User *U : Scalar->users()) {
	LLVM_DEBUG(dbgs() << "SLP: Checking user:" << *U << ".\n");

	Instruction *UserInst = dyn_cast<Instruction>(U);
	if (!UserInst \|\| isDeleted(UserInst))
	continue;

	// Ignore users in the user ignore list.
	if (UserIgnoreList && UserIgnoreList->contains(UserInst))
	continue;

	// Skip in-tree scalars that become vectors
	if (TreeEntry *UseEntry = getTreeEntry(U)) {
	// Some in-tree scalars will remain as scalar in vectorized
	// instructions. If that is the case, the one in FoundLane will
	// be used.
	if (UseEntry->State == TreeEntry::ScatterVectorize \|\|
	!doesInTreeUserNeedToExtract(
	Scalar, cast<Instruction>(UseEntry->Scalars.front()), TLI)) {
	LLVM_DEBUG(dbgs() << "SLP: \tInternal user will be removed:" << *U
	<< ".\n");
	assert(!UseEntry->isGather() && "Bad state");
	continue;
	}
	U = nullptr;
	if (It != ScalarToExtUses.end()) {
	ExternalUses[It->second].User = nullptr;
	break;
	}
	}

	if (U && Scalar->hasNUsesOrMore(UsesLimit))
	U = nullptr;
	int FoundLane = Entry->findLaneForValue(Scalar);
	LLVM_DEBUG(dbgs() << "SLP: Need to extract:" << *UserInst
	<< " from lane " << FoundLane << " from " << *Scalar
	<< ".\n");
	It = ScalarToExtUses.try_emplace(Scalar, ExternalUses.size()).first;
	ExternalUses.emplace_back(Scalar, U, FoundLane);
	if (!U)
	break;
	}
	}
	}
	}

	DenseMap<Value , SmallVector<StoreInst >>
	BoUpSLP::collectUserStores(const BoUpSLP::TreeEntry *TE) const {
	DenseMap<Value , SmallVector<StoreInst >> PtrToStoresMap;
	for (unsigned Lane : seq<unsigned>(0, TE->Scalars.size())) {
	Value *V = TE->Scalars[Lane];
	// To save compilation time we don't visit if we have too many users.
	if (V->hasNUsesOrMore(UsesLimit))
	break;

	// Collect stores per pointer object.
	for (User *U : V->users()) {
	auto *SI = dyn_cast<StoreInst>(U);
	if (SI == nullptr \|\| !SI->isSimple() \|\|
	!isValidElementType(SI->getValueOperand()->getType()))
	continue;
	// Skip entry if already
	if (getTreeEntry(U))
	continue;

	Value *Ptr = getUnderlyingObject(SI->getPointerOperand());
	auto &StoresVec = PtrToStoresMap[Ptr];
	// For now just keep one store per pointer object per lane.
	// TODO: Extend this to support multiple stores per pointer per lane
	if (StoresVec.size() > Lane)
	continue;
	// Skip if in different BBs.
	if (!StoresVec.empty() &&
	SI->getParent() != StoresVec.back()->getParent())
	continue;
	// Make sure that the stores are of the same type.
	if (!StoresVec.empty() &&
	SI->getValueOperand()->getType() !=
	StoresVec.back()->getValueOperand()->getType())
	continue;
	StoresVec.push_back(SI);
	}
	}
	return PtrToStoresMap;
	}

	bool BoUpSLP::canFormVector(ArrayRef<StoreInst *> StoresVec,
	OrdersType &ReorderIndices) const {
	// We check whether the stores in StoreVec can form a vector by sorting them
	// and checking whether they are consecutive.

	// To avoid calling getPointersDiff() while sorting we create a vector of
	// pairs {store, offset from first} and sort this instead.
	SmallVector<std::pair<StoreInst *, int>> StoreOffsetVec(StoresVec.size());
	StoreInst *S0 = StoresVec[0];
	StoreOffsetVec[0] = {S0, 0};
	Type *S0Ty = S0->getValueOperand()->getType();
	Value *S0Ptr = S0->getPointerOperand();
	for (unsigned Idx : seq<unsigned>(1, StoresVec.size())) {
	StoreInst *SI = StoresVec[Idx];
	std::optional<int> Diff =
	getPointersDiff(S0Ty, S0Ptr, SI->getValueOperand()->getType(),
	SI->getPointerOperand(), DL, SE,
	/StrictCheck=/true);
	// We failed to compare the pointers so just abandon this StoresVec.
	if (!Diff)
	return false;
	StoreOffsetVec[Idx] = {StoresVec[Idx], *Diff};
	}

	// Sort the vector based on the pointers. We create a copy because we may
	// need the original later for calculating the reorder (shuffle) indices.
	stable_sort(StoreOffsetVec, [](const std::pair<StoreInst *, int> &Pair1,
	const std::pair<StoreInst *, int> &Pair2) {
	int Offset1 = Pair1.second;
	int Offset2 = Pair2.second;
	return Offset1 < Offset2;
	});

	// Check if the stores are consecutive by checking if their difference is 1.
	for (unsigned Idx : seq<unsigned>(1, StoreOffsetVec.size()))
	if (StoreOffsetVec[Idx].second != StoreOffsetVec[Idx - 1].second + 1)
	return false;

	// Calculate the shuffle indices according to their offset against the sorted
	// StoreOffsetVec.
	ReorderIndices.reserve(StoresVec.size());
	for (StoreInst *SI : StoresVec) {
	unsigned Idx = find_if(StoreOffsetVec,
	[SI](const std::pair<StoreInst *, int> &Pair) {
	return Pair.first == SI;
	}) -
	StoreOffsetVec.begin();
	ReorderIndices.push_back(Idx);
	}
	// Identity order (e.g., {0,1,2,3}) is modeled as an empty OrdersType in
	// reorderTopToBottom() and reorderBottomToTop(), so we are following the
	// same convention here.
	auto IsIdentityOrder = [](const OrdersType &Order) {
	for (unsigned Idx : seq<unsigned>(0, Order.size()))
	if (Idx != Order[Idx])
	return false;
	return true;
	};
	if (IsIdentityOrder(ReorderIndices))
	ReorderIndices.clear();

	return true;
	}

	#ifndef NDEBUG
	LLVM_DUMP_METHOD static void dumpOrder(const BoUpSLP::OrdersType &Order) {
	for (unsigned Idx : Order)
	dbgs() << Idx << ", ";
	dbgs() << "\n";
	}
	#endif

	SmallVector<BoUpSLP::OrdersType, 1>
	BoUpSLP::findExternalStoreUsersReorderIndices(TreeEntry *TE) const {
	unsigned NumLanes = TE->Scalars.size();

	DenseMap<Value , SmallVector<StoreInst >> PtrToStoresMap =
	collectUserStores(TE);

	// Holds the reorder indices for each candidate store vector that is a user of
	// the current TreeEntry.
	SmallVector<OrdersType, 1> ExternalReorderIndices;

	// Now inspect the stores collected per pointer and look for vectorization
	// candidates. For each candidate calculate the reorder index vector and push
	// it into `ExternalReorderIndices`
	for (const auto &Pair : PtrToStoresMap) {
	auto &StoresVec = Pair.second;
	// If we have fewer than NumLanes stores, then we can't form a vector.
	if (StoresVec.size() != NumLanes)
	continue;

	// If the stores are not consecutive then abandon this StoresVec.
	OrdersType ReorderIndices;
	if (!canFormVector(StoresVec, ReorderIndices))
	continue;

	// We now know that the scalars in StoresVec can form a vector instruction,
	// so set the reorder indices.
	ExternalReorderIndices.push_back(ReorderIndices);
	}
	return ExternalReorderIndices;
	}

	void BoUpSLP::buildTree(ArrayRef<Value *> Roots,
	const SmallDenseSet<Value *> &UserIgnoreLst) {
	deleteTree();
	UserIgnoreList = &UserIgnoreLst;
	if (!allSameType(Roots))
	return;
	buildTree_rec(Roots, 0, EdgeInfo());
	}

	void BoUpSLP::buildTree(ArrayRef<Value *> Roots) {
	deleteTree();
	if (!allSameType(Roots))
	return;
	buildTree_rec(Roots, 0, EdgeInfo());
	}

	/// \return true if the specified list of values has only one instruction that
	/// requires scheduling, false otherwise.
	#ifndef NDEBUG
	static bool needToScheduleSingleInstruction(ArrayRef<Value *> VL) {
	Value *NeedsScheduling = nullptr;
	for (Value *V : VL) {
	if (doesNotNeedToBeScheduled(V))
	continue;
	if (!NeedsScheduling) {
	NeedsScheduling = V;
	continue;
	}
	return false;
	}
	return NeedsScheduling;
	}
	#endif

	/// Generates key/subkey pair for the given value to provide effective sorting
	/// of the values and better detection of the vectorizable values sequences. The
	/// keys/subkeys can be used for better sorting of the values themselves (keys)
	/// and in values subgroups (subkeys).
	static std::pair<size_t, size_t> generateKeySubkey(
	Value V, const TargetLibraryInfo TLI,
	function_ref<hash_code(size_t, LoadInst *)> LoadsSubkeyGenerator,
	bool AllowAlternate) {
	hash_code Key = hash_value(V->getValueID() + 2);
	hash_code SubKey = hash_value(0);
	// Sort the loads by the distance between the pointers.
	if (auto *LI = dyn_cast<LoadInst>(V)) {
	Key = hash_combine(LI->getType(), hash_value(Instruction::Load), Key);
	if (LI->isSimple())
	SubKey = hash_value(LoadsSubkeyGenerator(Key, LI));
	else
	Key = SubKey = hash_value(LI);
	} else if (isVectorLikeInstWithConstOps(V)) {
	// Sort extracts by the vector operands.
	if (isa<ExtractElementInst, UndefValue>(V))
	Key = hash_value(Value::UndefValueVal + 1);
	if (auto *EI = dyn_cast<ExtractElementInst>(V)) {
	if (!isUndefVector(EI->getVectorOperand()).all() &&
	!isa<UndefValue>(EI->getIndexOperand()))
	SubKey = hash_value(EI->getVectorOperand());
	}
	} else if (auto *I = dyn_cast<Instruction>(V)) {
	// Sort other instructions just by the opcodes except for CMPInst.
	// For CMP also sort by the predicate kind.
	if ((isa<BinaryOperator, CastInst>(I)) &&
	isValidForAlternation(I->getOpcode())) {
	if (AllowAlternate)
	Key = hash_value(isa<BinaryOperator>(I) ? 1 : 0);
	else
	Key = hash_combine(hash_value(I->getOpcode()), Key);
	SubKey = hash_combine(
	hash_value(I->getOpcode()), hash_value(I->getType()),
	hash_value(isa<BinaryOperator>(I)
	? I->getType()
	: cast<CastInst>(I)->getOperand(0)->getType()));
	// For casts, look through the only operand to improve compile time.
	if (isa<CastInst>(I)) {
	std::pair<size_t, size_t> OpVals =
	generateKeySubkey(I->getOperand(0), TLI, LoadsSubkeyGenerator,
	/AllowAlternate=/true);
	Key = hash_combine(OpVals.first, Key);
	SubKey = hash_combine(OpVals.first, SubKey);
	}
	} else if (auto *CI = dyn_cast<CmpInst>(I)) {
	CmpInst::Predicate Pred = CI->getPredicate();
	if (CI->isCommutative())
	Pred = std::min(Pred, CmpInst::getInversePredicate(Pred));
	CmpInst::Predicate SwapPred = CmpInst::getSwappedPredicate(Pred);
	SubKey = hash_combine(hash_value(I->getOpcode()), hash_value(Pred),
	hash_value(SwapPred),
	hash_value(CI->getOperand(0)->getType()));
	} else if (auto *Call = dyn_cast<CallInst>(I)) {
	Intrinsic::ID ID = getVectorIntrinsicIDForCall(Call, TLI);
	if (isTriviallyVectorizable(ID)) {
	SubKey = hash_combine(hash_value(I->getOpcode()), hash_value(ID));
	} else if (!VFDatabase(Call).getMappings(Call).empty()) {
	SubKey = hash_combine(hash_value(I->getOpcode()),
	hash_value(Call->getCalledFunction()));
	} else {
	Key = hash_combine(hash_value(Call), Key);
	SubKey = hash_combine(hash_value(I->getOpcode()), hash_value(Call));
	}
	for (const CallBase::BundleOpInfo &Op : Call->bundle_op_infos())
	SubKey = hash_combine(hash_value(Op.Begin), hash_value(Op.End),
	hash_value(Op.Tag), SubKey);
	} else if (auto *Gep = dyn_cast<GetElementPtrInst>(I)) {
	if (Gep->getNumOperands() == 2 && isa<ConstantInt>(Gep->getOperand(1)))
	SubKey = hash_value(Gep->getPointerOperand());
	else
	SubKey = hash_value(Gep);
	} else if (BinaryOperator::isIntDivRem(I->getOpcode()) &&
	!isa<ConstantInt>(I->getOperand(1))) {
	// Do not try to vectorize instructions with potentially high cost.
	SubKey = hash_value(I);
	} else {
	SubKey = hash_value(I->getOpcode());
	}
	Key = hash_combine(hash_value(I->getParent()), Key);
	}
	return std::make_pair(Key, SubKey);
	}

	/// Checks if the specified instruction \p I is an alternate operation for
	/// the given \p MainOp and \p AltOp instructions.
	static bool isAlternateInstruction(const Instruction *I,
	const Instruction *MainOp,
	const Instruction *AltOp,
	const TargetLibraryInfo &TLI);

	bool BoUpSLP::areAltOperandsProfitable(const InstructionsState &S,
	ArrayRef<Value *> VL) const {
	unsigned Opcode0 = S.getOpcode();
	unsigned Opcode1 = S.getAltOpcode();
	SmallBitVector OpcodeMask(getAltInstrMask(VL, Opcode0, Opcode1));
	// If this pattern is supported by the target then consider it profitable.
	if (TTI->isLegalAltInstr(getWidenedType(S.MainOp->getType(), VL.size()),
	Opcode0, Opcode1, OpcodeMask))
	return true;
	SmallVector<ValueList> Operands;
	for (unsigned I : seq<unsigned>(0, S.MainOp->getNumOperands())) {
	Operands.emplace_back();
	// Prepare the operand vector.
	for (Value *V : VL)
	Operands.back().push_back(cast<Instruction>(V)->getOperand(I));
	}
	if (Operands.size() == 2) {
	// Try find best operands candidates.
	for (unsigned I : seq<unsigned>(0, VL.size() - 1)) {
	SmallVector<std::pair<Value , Value >> Candidates(3);
	Candidates[0] = std::make_pair(Operands[0][I], Operands[0][I + 1]);
	Candidates[1] = std::make_pair(Operands[0][I], Operands[1][I + 1]);
	Candidates[2] = std::make_pair(Operands[1][I], Operands[0][I + 1]);
	std::optional<int> Res = findBestRootPair(Candidates);
	switch (Res.value_or(0)) {
	case 0:
	break;
	case 1:
	std::swap(Operands[0][I + 1], Operands[1][I + 1]);
	break;
	case 2:
	std::swap(Operands[0][I], Operands[1][I]);
	break;
	default:
	llvm_unreachable("Unexpected index.");
	}
	}
	}
	DenseSet<unsigned> UniqueOpcodes;
	constexpr unsigned NumAltInsts = 3; // main + alt + shuffle.
	unsigned NonInstCnt = 0;
	// Estimate number of instructions, required for the vectorized node and for
	// the buildvector node.
	unsigned UndefCnt = 0;
	// Count the number of extra shuffles, required for vector nodes.
	unsigned ExtraShuffleInsts = 0;
	// Check that operands do not contain same values and create either perfect
	// diamond match or shuffled match.
	if (Operands.size() == 2) {
	// Do not count same operands twice.
	if (Operands.front() == Operands.back()) {
	Operands.erase(Operands.begin());
	} else if (!allConstant(Operands.front()) &&
	all_of(Operands.front(), [&](Value *V) {
	return is_contained(Operands.back(), V);
	})) {
	Operands.erase(Operands.begin());
	++ExtraShuffleInsts;
	}
	}
	const Loop *L = LI->getLoopFor(S.MainOp->getParent());
	// Vectorize node, if:
	// 1. at least single operand is constant or splat.
	// 2. Operands have many loop invariants (the instructions are not loop
	// invariants).
	// 3. At least single unique operands is supposed to vectorized.
	return none_of(Operands,
	[&](ArrayRef<Value *> Op) {
	if (allConstant(Op) \|\|
	(!isSplat(Op) && allSameBlock(Op) && allSameType(Op) &&
	getSameOpcode(Op, *TLI).MainOp))
	return false;
	DenseMap<Value *, unsigned> Uniques;
	for (Value *V : Op) {
	if (isa<Constant, ExtractElementInst>(V) \|\|
	getTreeEntry(V) \|\| (L && L->isLoopInvariant(V))) {
	if (isa<UndefValue>(V))
	++UndefCnt;
	continue;
	}
	auto Res = Uniques.try_emplace(V, 0);
	// Found first duplicate - need to add shuffle.
	if (!Res.second && Res.first->second == 1)
	++ExtraShuffleInsts;
	++Res.first->getSecond();
	if (auto *I = dyn_cast<Instruction>(V))
	UniqueOpcodes.insert(I->getOpcode());
	else if (Res.second)
	++NonInstCnt;
	}
	return none_of(Uniques, [&](const auto &P) {
	return P.first->hasNUsesOrMore(P.second + 1) &&
	none_of(P.first->users(), [&](User *U) {
	return getTreeEntry(U) \|\| Uniques.contains(U);
	});
	});
	}) \|\|
	// Do not vectorize node, if estimated number of vector instructions is
	// more than estimated number of buildvector instructions. Number of
	// vector operands is number of vector instructions + number of vector
	// instructions for operands (buildvectors). Number of buildvector
	// instructions is just number_of_operands * number_of_scalars.
	(UndefCnt < (VL.size() - 1) * S.MainOp->getNumOperands() &&
	(UniqueOpcodes.size() + NonInstCnt + ExtraShuffleInsts +
	NumAltInsts) < S.MainOp->getNumOperands() * VL.size());
	}

	BoUpSLP::TreeEntry::EntryState BoUpSLP::getScalarsVectorizationState(
	InstructionsState &S, ArrayRef<Value *> VL, bool IsScatterVectorizeUserTE,
	OrdersType &CurrentOrder, SmallVectorImpl<Value *> &PointerOps) const {
	assert(S.MainOp && "Expected instructions with same/alternate opcodes only.");

	unsigned ShuffleOrOp =
	S.isAltShuffle() ? (unsigned)Instruction::ShuffleVector : S.getOpcode();
	auto *VL0 = cast<Instruction>(S.OpValue);
	switch (ShuffleOrOp) {
	case Instruction::PHI: {
	// Too many operands - gather, most probably won't be vectorized.
	if (VL0->getNumOperands() > MaxPHINumOperands)
	return TreeEntry::NeedToGather;
	// Check for terminator values (e.g. invoke).
	for (Value *V : VL)
	for (Value *Incoming : cast<PHINode>(V)->incoming_values()) {
	Instruction *Term = dyn_cast<Instruction>(Incoming);
	if (Term && Term->isTerminator()) {
	LLVM_DEBUG(dbgs()
	<< "SLP: Need to swizzle PHINodes (terminator use).\n");
	return TreeEntry::NeedToGather;
	}
	}

	return TreeEntry::Vectorize;
	}
	case Instruction::ExtractValue:
	case Instruction::ExtractElement: {
	bool Reuse = canReuseExtract(VL, VL0, CurrentOrder);
	// FIXME: Vectorizing is not supported yet for non-power-of-2 ops.
	if (!isPowerOf2_32(VL.size()))
	return TreeEntry::NeedToGather;
	if (Reuse \|\| !CurrentOrder.empty())
	return TreeEntry::Vectorize;
	LLVM_DEBUG(dbgs() << "SLP: Gather extract sequence.\n");
	return TreeEntry::NeedToGather;
	}
	case Instruction::InsertElement: {
	// Check that we have a buildvector and not a shuffle of 2 or more
	// different vectors.
	ValueSet SourceVectors;
	for (Value *V : VL) {
	SourceVectors.insert(cast<Instruction>(V)->getOperand(0));
	assert(getElementIndex(V) != std::nullopt &&
	"Non-constant or undef index?");
	}

	if (count_if(VL, [&SourceVectors](Value *V) {
	return !SourceVectors.contains(V);
	}) >= 2) {
	// Found 2nd source vector - cancel.
	LLVM_DEBUG(dbgs() << "SLP: Gather of insertelement vectors with "
	"different source vectors.\n");
	return TreeEntry::NeedToGather;
	}

	return TreeEntry::Vectorize;
	}
	case Instruction::Load: {
	// Check that a vectorized load would load the same memory as a scalar
	// load. For example, we don't want to vectorize loads that are smaller
	// than 8-bit. Even though we have a packed struct {<i2, i2, i2, i2>} LLVM
	// treats loading/storing it as an i8 struct. If we vectorize loads/stores
	// from such a struct, we read/write packed bits disagreeing with the
	// unvectorized version.
	switch (canVectorizeLoads(VL, VL0, CurrentOrder, PointerOps)) {
	case LoadsState::Vectorize:
	return TreeEntry::Vectorize;
	case LoadsState::ScatterVectorize:
	return TreeEntry::ScatterVectorize;
	case LoadsState::StridedVectorize:
	return TreeEntry::StridedVectorize;
	case LoadsState::Gather:
	#ifndef NDEBUG
	Type *ScalarTy = VL0->getType();
	if (DL->getTypeSizeInBits(ScalarTy) !=
	DL->getTypeAllocSizeInBits(ScalarTy))
	LLVM_DEBUG(dbgs() << "SLP: Gathering loads of non-packed type.\n");
	else if (any_of(VL,
	[](Value *V) { return !cast<LoadInst>(V)->isSimple(); }))
	LLVM_DEBUG(dbgs() << "SLP: Gathering non-simple loads.\n");
	else
	LLVM_DEBUG(dbgs() << "SLP: Gathering non-consecutive loads.\n");
	#endif // NDEBUG
	return TreeEntry::NeedToGather;
	}
	llvm_unreachable("Unexpected state of loads");
	}
	case Instruction::ZExt:
	case Instruction::SExt:
	case Instruction::FPToUI:
	case Instruction::FPToSI:
	case Instruction::FPExt:
	case Instruction::PtrToInt:
	case Instruction::IntToPtr:
	case Instruction::SIToFP:
	case Instruction::UIToFP:
	case Instruction::Trunc:
	case Instruction::FPTrunc:
	case Instruction::BitCast: {
	Type *SrcTy = VL0->getOperand(0)->getType();
	for (Value *V : VL) {
	Type *Ty = cast<Instruction>(V)->getOperand(0)->getType();
	if (Ty != SrcTy \|\| !isValidElementType(Ty)) {
	LLVM_DEBUG(
	dbgs() << "SLP: Gathering casts with different src types.\n");
	return TreeEntry::NeedToGather;
	}
	}
	return TreeEntry::Vectorize;
	}
	case Instruction::ICmp:
	case Instruction::FCmp: {
	// Check that all of the compares have the same predicate.
	CmpInst::Predicate P0 = cast<CmpInst>(VL0)->getPredicate();
	CmpInst::Predicate SwapP0 = CmpInst::getSwappedPredicate(P0);
	Type *ComparedTy = VL0->getOperand(0)->getType();
	for (Value *V : VL) {
	CmpInst *Cmp = cast<CmpInst>(V);
	if ((Cmp->getPredicate() != P0 && Cmp->getPredicate() != SwapP0) \|\|
	Cmp->getOperand(0)->getType() != ComparedTy) {
	LLVM_DEBUG(dbgs() << "SLP: Gathering cmp with different predicate.\n");
	return TreeEntry::NeedToGather;
	}
	}
	return TreeEntry::Vectorize;
	}
	case Instruction::Select:
	case Instruction::FNeg:
	case Instruction::Add:
	case Instruction::FAdd:
	case Instruction::Sub:
	case Instruction::FSub:
	case Instruction::Mul:
	case Instruction::FMul:
	case Instruction::UDiv:
	case Instruction::SDiv:
	case Instruction::FDiv:
	case Instruction::URem:
	case Instruction::SRem:
	case Instruction::FRem:
	case Instruction::Shl:
	case Instruction::LShr:
	case Instruction::AShr:
	case Instruction::And:
	case Instruction::Or:
	case Instruction::Xor:
	return TreeEntry::Vectorize;
	case Instruction::GetElementPtr: {
	// We don't combine GEPs with complicated (nested) indexing.
	for (Value *V : VL) {
	auto *I = dyn_cast<GetElementPtrInst>(V);
	if (!I)
	continue;
	if (I->getNumOperands() != 2) {
	LLVM_DEBUG(dbgs() << "SLP: not-vectorizable GEP (nested indexes).\n");
	return TreeEntry::NeedToGather;
	}
	}

	// We can't combine several GEPs into one vector if they operate on
	// different types.
	Type *Ty0 = cast<GEPOperator>(VL0)->getSourceElementType();
	for (Value *V : VL) {
	auto *GEP = dyn_cast<GEPOperator>(V);
	if (!GEP)
	continue;
	Type *CurTy = GEP->getSourceElementType();
	if (Ty0 != CurTy) {
	LLVM_DEBUG(dbgs() << "SLP: not-vectorizable GEP (different types).\n");
	return TreeEntry::NeedToGather;
	}
	}

	// We don't combine GEPs with non-constant indexes.
	Type *Ty1 = VL0->getOperand(1)->getType();
	for (Value *V : VL) {
	auto *I = dyn_cast<GetElementPtrInst>(V);
	if (!I)
	continue;
	auto *Op = I->getOperand(1);
	if ((!IsScatterVectorizeUserTE && !isa<ConstantInt>(Op)) \|\|
	(Op->getType() != Ty1 &&
	((IsScatterVectorizeUserTE && !isa<ConstantInt>(Op)) \|\|
	Op->getType()->getScalarSizeInBits() >
	DL->getIndexSizeInBits(
	V->getType()->getPointerAddressSpace())))) {
	LLVM_DEBUG(
	dbgs() << "SLP: not-vectorizable GEP (non-constant indexes).\n");
	return TreeEntry::NeedToGather;
	}
	}

	return TreeEntry::Vectorize;
	}
	case Instruction::Store: {
	// Check if the stores are consecutive or if we need to swizzle them.
	llvm::Type *ScalarTy = cast<StoreInst>(VL0)->getValueOperand()->getType();
	// Avoid types that are padded when being allocated as scalars, while
	// being packed together in a vector (such as i1).
	if (DL->getTypeSizeInBits(ScalarTy) !=
	DL->getTypeAllocSizeInBits(ScalarTy)) {
	LLVM_DEBUG(dbgs() << "SLP: Gathering stores of non-packed type.\n");
	return TreeEntry::NeedToGather;
	}
	// Make sure all stores in the bundle are simple - we can't vectorize
	// atomic or volatile stores.
	for (Value *V : VL) {
	auto *SI = cast<StoreInst>(V);
	if (!SI->isSimple()) {
	LLVM_DEBUG(dbgs() << "SLP: Gathering non-simple stores.\n");
	return TreeEntry::NeedToGather;
	}
	PointerOps.push_back(SI->getPointerOperand());
	}

	// Check the order of pointer operands.
	if (llvm::sortPtrAccesses(PointerOps, ScalarTy, DL, SE, CurrentOrder)) {
	Value *Ptr0;
	Value *PtrN;
	if (CurrentOrder.empty()) {
	Ptr0 = PointerOps.front();
	PtrN = PointerOps.back();
	} else {
	Ptr0 = PointerOps[CurrentOrder.front()];
	PtrN = PointerOps[CurrentOrder.back()];
	}
	std::optional<int> Dist =
	getPointersDiff(ScalarTy, Ptr0, ScalarTy, PtrN, DL, SE);
	// Check that the sorted pointer operands are consecutive.
	if (static_cast<unsigned>(*Dist) == VL.size() - 1)
	return TreeEntry::Vectorize;
	}

	LLVM_DEBUG(dbgs() << "SLP: Non-consecutive store.\n");
	return TreeEntry::NeedToGather;
	}
	case Instruction::Call: {
	// Check if the calls are all to the same vectorizable intrinsic or
	// library function.
	CallInst *CI = cast<CallInst>(VL0);
	Intrinsic::ID ID = getVectorIntrinsicIDForCall(CI, TLI);

	VFShape Shape = VFShape::get(
	CI->getFunctionType(),
	ElementCount::getFixed(static_cast<unsigned int>(VL.size())),
	false /HasGlobalPred/);
	Function VecFunc = VFDatabase(CI).getVectorizedFunction(Shape);

	if (!VecFunc && !isTriviallyVectorizable(ID)) {
	LLVM_DEBUG(dbgs() << "SLP: Non-vectorizable call.\n");
	return TreeEntry::NeedToGather;
	}
	Function *F = CI->getCalledFunction();
	unsigned NumArgs = CI->arg_size();
	SmallVector<Value *, 4> ScalarArgs(NumArgs, nullptr);
	for (unsigned J = 0; J != NumArgs; ++J)
	if (isVectorIntrinsicWithScalarOpAtArg(ID, J))
	ScalarArgs[J] = CI->getArgOperand(J);
	for (Value *V : VL) {
	CallInst *CI2 = dyn_cast<CallInst>(V);
	if (!CI2 \|\| CI2->getCalledFunction() != F \|\|
	getVectorIntrinsicIDForCall(CI2, TLI) != ID \|\|
	(VecFunc &&
	VecFunc != VFDatabase(*CI2).getVectorizedFunction(Shape)) \|\|
	!CI->hasIdenticalOperandBundleSchema(*CI2)) {
	LLVM_DEBUG(dbgs() << "SLP: mismatched calls:" << CI << "!=" << V
	<< "\n");
	return TreeEntry::NeedToGather;
	}
	// Some intrinsics have scalar arguments and should be same in order for
	// them to be vectorized.
	for (unsigned J = 0; J != NumArgs; ++J) {
	if (isVectorIntrinsicWithScalarOpAtArg(ID, J)) {
	Value *A1J = CI2->getArgOperand(J);
	if (ScalarArgs[J] != A1J) {
	LLVM_DEBUG(dbgs()
	<< "SLP: mismatched arguments in call:" << *CI
	<< " argument " << ScalarArgs[J] << "!=" << A1J << "\n");
	return TreeEntry::NeedToGather;
	}
	}
	}
	// Verify that the bundle operands are identical between the two calls.
	if (CI->hasOperandBundles() &&
	!std::equal(CI->op_begin() + CI->getBundleOperandsStartIndex(),
	CI->op_begin() + CI->getBundleOperandsEndIndex(),
	CI2->op_begin() + CI2->getBundleOperandsStartIndex())) {
	LLVM_DEBUG(dbgs() << "SLP: mismatched bundle operands in calls:" << *CI
	<< "!=" << *V << '\n');
	return TreeEntry::NeedToGather;
	}
	}

	return TreeEntry::Vectorize;
	}
	case Instruction::ShuffleVector: {
	// If this is not an alternate sequence of opcode like add-sub
	// then do not vectorize this instruction.
	if (!S.isAltShuffle()) {
	LLVM_DEBUG(dbgs() << "SLP: ShuffleVector are not vectorized.\n");
	return TreeEntry::NeedToGather;
	}
	if (!SLPSkipEarlyProfitabilityCheck && !areAltOperandsProfitable(S, VL)) {
	LLVM_DEBUG(
	dbgs()
	<< "SLP: ShuffleVector not vectorized, operands are buildvector and "
	"the whole alt sequence is not profitable.\n");
	return TreeEntry::NeedToGather;
	}

	return TreeEntry::Vectorize;
	}
	default:
	LLVM_DEBUG(dbgs() << "SLP: Gathering unknown instruction.\n");
	return TreeEntry::NeedToGather;
	}
	}

	namespace {
	/// Allows to correctly handle operands of the phi nodes based on the \p Main
	/// PHINode order of incoming basic blocks/values.
	class PHIHandler {
	DominatorTree &DT;
	PHINode *Main = nullptr;
	SmallVector<Value *> Phis;
	SmallVector<SmallVector<Value *>> Operands;

	public:
	PHIHandler() = delete;
	PHIHandler(DominatorTree &DT, PHINode Main, ArrayRef<Value > Phis)
	: DT(DT), Main(Main), Phis(Phis),
	Operands(Main->getNumIncomingValues(),
	SmallVector<Value *>(Phis.size(), nullptr)) {}
	void buildOperands() {
	constexpr unsigned FastLimit = 4;
	if (Main->getNumIncomingValues() <= FastLimit) {
	for (unsigned I : seq<unsigned>(0, Main->getNumIncomingValues())) {
	BasicBlock *InBB = Main->getIncomingBlock(I);
	if (!DT.isReachableFromEntry(InBB)) {
	Operands[I].assign(Phis.size(), PoisonValue::get(Main->getType()));
	continue;
	}
	// Prepare the operand vector.
	for (auto [Idx, V] : enumerate(Phis)) {
	auto *P = cast<PHINode>(V);
	if (P->getIncomingBlock(I) == InBB)
	Operands[I][Idx] = P->getIncomingValue(I);
	else
	Operands[I][Idx] = P->getIncomingValueForBlock(InBB);
	}
	}
	return;
	}
	SmallDenseMap<BasicBlock *, SmallVector<unsigned>, 4> Blocks;
	for (unsigned I : seq<unsigned>(0, Main->getNumIncomingValues())) {
	BasicBlock *InBB = Main->getIncomingBlock(I);
	if (!DT.isReachableFromEntry(InBB)) {
	Operands[I].assign(Phis.size(), PoisonValue::get(Main->getType()));
	continue;
	}
	Blocks.try_emplace(InBB).first->second.push_back(I);
	}
	for (auto [Idx, V] : enumerate(Phis)) {
	auto *P = cast<PHINode>(V);
	for (unsigned I : seq<unsigned>(0, P->getNumIncomingValues())) {
	BasicBlock *InBB = P->getIncomingBlock(I);
	if (InBB == Main->getIncomingBlock(I)) {
	if (isa_and_nonnull<PoisonValue>(Operands[I][Idx]))
	continue;
	Operands[I][Idx] = P->getIncomingValue(I);
	continue;
	}
	auto It = Blocks.find(InBB);
	if (It == Blocks.end())
	continue;
	Operands[It->second.front()][Idx] = P->getIncomingValue(I);
	}
	}
	for (const auto &P : Blocks) {
	if (P.getSecond().size() <= 1)
	continue;
	unsigned BasicI = P.getSecond().front();
	for (unsigned I : ArrayRef(P.getSecond()).drop_front()) {
	assert(all_of(enumerate(Operands[I]),
	[&](const auto &Data) {
	return !Data.value() \|\|
	Data.value() == Operands[BasicI][Data.index()];
	}) &&
	"Expected empty operands list.");
	Operands[I] = Operands[BasicI];
	}
	}
	}
	ArrayRef<Value *> getOperands(unsigned I) const { return Operands[I]; }
	};
	} // namespace

	void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
	const EdgeInfo &UserTreeIdx) {
	assert((allConstant(VL) \|\| allSameType(VL)) && "Invalid types!");

	SmallVector<int> ReuseShuffleIndices;
	SmallVector<Value *> UniqueValues;
	SmallVector<Value *> NonUniqueValueVL;
	auto TryToFindDuplicates = [&](const InstructionsState &S,
	bool DoNotFail = false) {
	// Check that every instruction appears once in this bundle.
	DenseMap<Value *, unsigned> UniquePositions(VL.size());
	for (Value *V : VL) {
	if (isConstant(V)) {
	ReuseShuffleIndices.emplace_back(
	isa<UndefValue>(V) ? PoisonMaskElem : UniqueValues.size());
	UniqueValues.emplace_back(V);
	continue;
	}
	auto Res = UniquePositions.try_emplace(V, UniqueValues.size());
	ReuseShuffleIndices.emplace_back(Res.first->second);
	if (Res.second)
	UniqueValues.emplace_back(V);
	}
	size_t NumUniqueScalarValues = UniqueValues.size();
	if (NumUniqueScalarValues == VL.size()) {
	ReuseShuffleIndices.clear();
	} else {
	// FIXME: Reshuffing scalars is not supported yet for non-power-of-2 ops.
	if (UserTreeIdx.UserTE && UserTreeIdx.UserTE->isNonPowOf2Vec()) {
	LLVM_DEBUG(dbgs() << "SLP: Reshuffling scalars not yet supported "
	"for nodes with padding.\n");
	newTreeEntry(VL, std::nullopt /not vectorized/, S, UserTreeIdx);
	return false;
	}
	LLVM_DEBUG(dbgs() << "SLP: Shuffle for reused scalars.\n");
	if (NumUniqueScalarValues <= 1 \|\|
	(UniquePositions.size() == 1 && all_of(UniqueValues,
	[](Value *V) {
	return isa<UndefValue>(V) \|\|
	!isConstant(V);
	})) \|\|
	!llvm::has_single_bit<uint32_t>(NumUniqueScalarValues)) {
	if (DoNotFail && UniquePositions.size() > 1 &&
	NumUniqueScalarValues > 1 && S.MainOp->isSafeToRemove() &&
	all_of(UniqueValues, [=](Value *V) {
	return isa<ExtractElementInst>(V) \|\|
	areAllUsersVectorized(cast<Instruction>(V),
	UserIgnoreList);
	})) {
	unsigned PWSz = PowerOf2Ceil(UniqueValues.size());
	if (PWSz == VL.size()) {
	ReuseShuffleIndices.clear();
	} else {
	NonUniqueValueVL.assign(UniqueValues.begin(), UniqueValues.end());
	NonUniqueValueVL.append(PWSz - UniqueValues.size(),
	UniqueValues.back());
	VL = NonUniqueValueVL;
	}
	return true;
	}
	LLVM_DEBUG(dbgs() << "SLP: Scalar used twice in bundle.\n");
	newTreeEntry(VL, std::nullopt /not vectorized/, S, UserTreeIdx);
	return false;
	}
	VL = UniqueValues;
	}
	return true;
	};

	InstructionsState S = getSameOpcode(VL, *TLI);

	// Don't vectorize ephemeral values.
	if (!EphValues.empty()) {
	for (Value *V : VL) {
	if (EphValues.count(V)) {
	LLVM_DEBUG(dbgs() << "SLP: The instruction (" << *V
	<< ") is ephemeral.\n");
	newTreeEntry(VL, std::nullopt /not vectorized/, S, UserTreeIdx);
	return;
	}
	}
	}

	// Gather if we hit the RecursionMaxDepth, unless this is a load (or z/sext of
	// a load), in which case peek through to include it in the tree, without
	// ballooning over-budget.
	if (Depth >= RecursionMaxDepth &&
	!(S.MainOp && isa<Instruction>(S.MainOp) && S.MainOp == S.AltOp &&
	VL.size() >= 4 &&
	(match(S.MainOp, m_Load(m_Value())) \|\| all_of(VL, [&S](const Value *I) {
	return match(I,
	m_OneUse(m_ZExtOrSExt(m_OneUse(m_Load(m_Value()))))) &&
	cast<Instruction>(I)->getOpcode() ==
	cast<Instruction>(S.MainOp)->getOpcode();
	})))) {
	LLVM_DEBUG(dbgs() << "SLP: Gathering due to max recursion depth.\n");
	if (TryToFindDuplicates(S))
	newTreeEntry(VL, std::nullopt /not vectorized/, S, UserTreeIdx,
	ReuseShuffleIndices);
	return;
	}

	// Don't handle scalable vectors
	if (S.getOpcode() == Instruction::ExtractElement &&
	isa<ScalableVectorType>(
	cast<ExtractElementInst>(S.OpValue)->getVectorOperandType())) {
	LLVM_DEBUG(dbgs() << "SLP: Gathering due to scalable vector type.\n");
	if (TryToFindDuplicates(S))
	newTreeEntry(VL, std::nullopt /not vectorized/, S, UserTreeIdx,
	ReuseShuffleIndices);
	return;
	}

	// Don't handle vectors.
	if (!SLPReVec && S.OpValue->getType()->isVectorTy() &&
	!isa<InsertElementInst>(S.OpValue)) {
	LLVM_DEBUG(dbgs() << "SLP: Gathering due to vector type.\n");
	newTreeEntry(VL, std::nullopt /not vectorized/, S, UserTreeIdx);
	return;
	}

	if (StoreInst *SI = dyn_cast<StoreInst>(S.OpValue))
	if (!SLPReVec && SI->getValueOperand()->getType()->isVectorTy()) {
	LLVM_DEBUG(dbgs() << "SLP: Gathering due to store vector type.\n");
	newTreeEntry(VL, std::nullopt /not vectorized/, S, UserTreeIdx);
	return;
	}

	// If all of the operands are identical or constant we have a simple solution.
	// If we deal with insert/extract instructions, they all must have constant
	// indices, otherwise we should gather them, not try to vectorize.
	// If alternate op node with 2 elements with gathered operands - do not
	// vectorize.
	auto &&NotProfitableForVectorization = [&S, this,
	Depth](ArrayRef<Value *> VL) {
	if (!S.getOpcode() \|\| !S.isAltShuffle() \|\| VL.size() > 2)
	return false;
	if (VectorizableTree.size() < MinTreeSize)
	return false;
	if (Depth >= RecursionMaxDepth - 1)
	return true;
	// Check if all operands are extracts, part of vector node or can build a
	// regular vectorize node.
	SmallVector<unsigned, 2> InstsCount(VL.size(), 0);
	for (Value *V : VL) {
	auto *I = cast<Instruction>(V);
	InstsCount.push_back(count_if(I->operand_values(), [](Value *Op) {
	return isa<Instruction>(Op) \|\| isVectorLikeInstWithConstOps(Op);
	}));
	}
	bool IsCommutative = isCommutative(S.MainOp) \|\| isCommutative(S.AltOp);
	if ((IsCommutative &&
	std::accumulate(InstsCount.begin(), InstsCount.end(), 0) < 2) \|\|
	(!IsCommutative &&
	all_of(InstsCount, [](unsigned ICnt) { return ICnt < 2; })))
	return true;
	assert(VL.size() == 2 && "Expected only 2 alternate op instructions.");
	SmallVector<SmallVector<std::pair<Value , Value >>> Candidates;
	auto *I1 = cast<Instruction>(VL.front());
	auto *I2 = cast<Instruction>(VL.back());
	for (int Op = 0, E = S.MainOp->getNumOperands(); Op < E; ++Op)
	Candidates.emplace_back().emplace_back(I1->getOperand(Op),
	I2->getOperand(Op));
	if (static_cast<unsigned>(count_if(
	Candidates, [this](ArrayRef<std::pair<Value , Value >> Cand) {
	return findBestRootPair(Cand, LookAheadHeuristics::ScoreSplat);
	})) >= S.MainOp->getNumOperands() / 2)
	return false;
	if (S.MainOp->getNumOperands() > 2)
	return true;
	if (IsCommutative) {
	// Check permuted operands.
	Candidates.clear();
	for (int Op = 0, E = S.MainOp->getNumOperands(); Op < E; ++Op)
	Candidates.emplace_back().emplace_back(I1->getOperand(Op),
	I2->getOperand((Op + 1) % E));
	if (any_of(
	Candidates, [this](ArrayRef<std::pair<Value , Value >> Cand) {
	return findBestRootPair(Cand, LookAheadHeuristics::ScoreSplat);
	}))
	return false;
	}
	return true;
	};
	SmallVector<unsigned> SortedIndices;
	BasicBlock *BB = nullptr;
	bool IsScatterVectorizeUserTE =
	UserTreeIdx.UserTE &&
	UserTreeIdx.UserTE->State == TreeEntry::ScatterVectorize;
	bool AreAllSameBlock = S.getOpcode() && allSameBlock(VL);
	bool AreScatterAllGEPSameBlock =
	(IsScatterVectorizeUserTE && S.OpValue->getType()->isPointerTy() &&
	VL.size() > 2 &&
	all_of(VL,
	[&BB](Value *V) {
	auto *I = dyn_cast<GetElementPtrInst>(V);
	if (!I)
	return doesNotNeedToBeScheduled(V);
	if (!BB)
	BB = I->getParent();
	return BB == I->getParent() && I->getNumOperands() == 2;
	}) &&
	BB &&
	sortPtrAccesses(VL, UserTreeIdx.UserTE->getMainOp()->getType(), DL, SE,
	SortedIndices));
	bool AreAllSameInsts = AreAllSameBlock \|\| AreScatterAllGEPSameBlock;
	if (!AreAllSameInsts \|\| allConstant(VL) \|\| isSplat(VL) \|\|
	(isa<InsertElementInst, ExtractValueInst, ExtractElementInst>(
	S.OpValue) &&
	!all_of(VL, isVectorLikeInstWithConstOps)) \|\|
	NotProfitableForVectorization(VL)) {
	LLVM_DEBUG(dbgs() << "SLP: Gathering due to C,S,B,O, small shuffle. \n");
	if (TryToFindDuplicates(S))
	newTreeEntry(VL, std::nullopt /not vectorized/, S, UserTreeIdx,
	ReuseShuffleIndices);
	return;
	}

	// We now know that this is a vector of instructions of the same type from
	// the same block.

	// Check if this is a duplicate of another entry.
	if (TreeEntry *E = getTreeEntry(S.OpValue)) {
	LLVM_DEBUG(dbgs() << "SLP: \tChecking bundle: " << *S.OpValue << ".\n");
	if (!E->isSame(VL)) {
	auto It = MultiNodeScalars.find(S.OpValue);
	if (It != MultiNodeScalars.end()) {
	auto *TEIt = find_if(It->getSecond(),
	[&](TreeEntry *ME) { return ME->isSame(VL); });
	if (TEIt != It->getSecond().end())
	E = *TEIt;
	else
	E = nullptr;
	} else {
	E = nullptr;
	}
	}
	if (!E) {
	if (!doesNotNeedToBeScheduled(S.OpValue)) {
	LLVM_DEBUG(dbgs() << "SLP: Gathering due to partial overlap.\n");
	if (TryToFindDuplicates(S))
	newTreeEntry(VL, std::nullopt /not vectorized/, S, UserTreeIdx,
	ReuseShuffleIndices);
	return;
	}
	} else {
	// Record the reuse of the tree node. FIXME, currently this is only used
	// to properly draw the graph rather than for the actual vectorization.
	E->UserTreeIndices.push_back(UserTreeIdx);
	LLVM_DEBUG(dbgs() << "SLP: Perfect diamond merge at " << *S.OpValue
	<< ".\n");
	return;
	}
	}

	// Check that none of the instructions in the bundle are already in the tree.
	for (Value *V : VL) {
	if ((!IsScatterVectorizeUserTE && !isa<Instruction>(V)) \|\|
	doesNotNeedToBeScheduled(V))
	continue;
	if (getTreeEntry(V)) {
	LLVM_DEBUG(dbgs() << "SLP: The instruction (" << *V
	<< ") is already in tree.\n");
	if (TryToFindDuplicates(S))
	newTreeEntry(VL, std::nullopt /not vectorized/, S, UserTreeIdx,
	ReuseShuffleIndices);
	return;
	}
	}

	// The reduction nodes (stored in UserIgnoreList) also should stay scalar.
	if (UserIgnoreList && !UserIgnoreList->empty()) {
	for (Value *V : VL) {
	if (UserIgnoreList && UserIgnoreList->contains(V)) {
	LLVM_DEBUG(dbgs() << "SLP: Gathering due to gathered scalar.\n");
	if (TryToFindDuplicates(S))
	newTreeEntry(VL, std::nullopt /not vectorized/, S, UserTreeIdx,
	ReuseShuffleIndices);
	return;
	}
	}
	}

	// Special processing for sorted pointers for ScatterVectorize node with
	// constant indeces only.
	if (!AreAllSameBlock && AreScatterAllGEPSameBlock) {
	assert(S.OpValue->getType()->isPointerTy() &&
	count_if(VL, IsaPred<GetElementPtrInst>) >= 2 &&
	"Expected pointers only.");
	// Reset S to make it GetElementPtr kind of node.
	const auto *It = find_if(VL, IsaPred<GetElementPtrInst>);
	assert(It != VL.end() && "Expected at least one GEP.");
	S = getSameOpcode(It, TLI);
	}

	// Check that all of the users of the scalars that we want to vectorize are
	// schedulable.
	auto *VL0 = cast<Instruction>(S.OpValue);
	BB = VL0->getParent();

	if (!DT->isReachableFromEntry(BB)) {
	// Don't go into unreachable blocks. They may contain instructions with
	// dependency cycles which confuse the final scheduling.
	LLVM_DEBUG(dbgs() << "SLP: bundle in unreachable block.\n");
	newTreeEntry(VL, std::nullopt /not vectorized/, S, UserTreeIdx);
	return;
	}

	// Don't go into catchswitch blocks, which can happen with PHIs.
	// Such blocks can only have PHIs and the catchswitch. There is no
	// place to insert a shuffle if we need to, so just avoid that issue.
	if (isa<CatchSwitchInst>(BB->getTerminator())) {
	LLVM_DEBUG(dbgs() << "SLP: bundle in catchswitch block.\n");
	newTreeEntry(VL, std::nullopt /not vectorized/, S, UserTreeIdx);
	return;
	}

	// Check that every instruction appears once in this bundle.
	if (!TryToFindDuplicates(S, /DoNotFail=/true))
	return;

	// Perform specific checks for each particular instruction kind.
	OrdersType CurrentOrder;
	SmallVector<Value *> PointerOps;
	TreeEntry::EntryState State = getScalarsVectorizationState(
	S, VL, IsScatterVectorizeUserTE, CurrentOrder, PointerOps);
	if (State == TreeEntry::NeedToGather) {
	newTreeEntry(VL, std::nullopt /not vectorized/, S, UserTreeIdx,
	ReuseShuffleIndices);
	return;
	}

	auto &BSRef = BlocksSchedules[BB];
	if (!BSRef)
	BSRef = std::make_unique<BlockScheduling>(BB);

	BlockScheduling &BS = *BSRef;

	std::optional<ScheduleData *> Bundle =
	BS.tryScheduleBundle(UniqueValues, this, S);
	#ifdef EXPENSIVE_CHECKS
	// Make sure we didn't break any internal invariants
	BS.verify();
	#endif
	if (!Bundle) {
	LLVM_DEBUG(dbgs() << "SLP: We are not able to schedule this bundle!\n");
	assert((!BS.getScheduleData(VL0) \|\|
	!BS.getScheduleData(VL0)->isPartOfBundle()) &&
	"tryScheduleBundle should cancelScheduling on failure");
	newTreeEntry(VL, std::nullopt /not vectorized/, S, UserTreeIdx,
	ReuseShuffleIndices);
	NonScheduledFirst.insert(VL.front());
	return;
	}
	LLVM_DEBUG(dbgs() << "SLP: We are able to schedule this bundle.\n");

	unsigned ShuffleOrOp = S.isAltShuffle() ?
	(unsigned) Instruction::ShuffleVector : S.getOpcode();
	switch (ShuffleOrOp) {
	case Instruction::PHI: {
	auto *PH = cast<PHINode>(VL0);

	TreeEntry *TE =
	newTreeEntry(VL, Bundle, S, UserTreeIdx, ReuseShuffleIndices);
	LLVM_DEBUG(dbgs() << "SLP: added a vector of PHINodes.\n");

	// Keeps the reordered operands to avoid code duplication.
	PHIHandler Handler(*DT, PH, VL);
	Handler.buildOperands();
	for (unsigned I : seq<unsigned>(0, PH->getNumOperands()))
	TE->setOperand(I, Handler.getOperands(I));
	for (unsigned I : seq<unsigned>(0, PH->getNumOperands()))
	buildTree_rec(Handler.getOperands(I), Depth + 1, {TE, I});
	return;
	}
	case Instruction::ExtractValue:
	case Instruction::ExtractElement: {
	if (CurrentOrder.empty()) {
	LLVM_DEBUG(dbgs() << "SLP: Reusing or shuffling extract sequence.\n");
	} else {
	LLVM_DEBUG({
	dbgs() << "SLP: Reusing or shuffling of reordered extract sequence "
	"with order";
	for (unsigned Idx : CurrentOrder)
	dbgs() << " " << Idx;
	dbgs() << "\n";
	});
	fixupOrderingIndices(CurrentOrder);
	}
	// Insert new order with initial value 0, if it does not exist,
	// otherwise return the iterator to the existing one.
	newTreeEntry(VL, Bundle /vectorized/, S, UserTreeIdx,
	ReuseShuffleIndices, CurrentOrder);
	// This is a special case, as it does not gather, but at the same time
	// we are not extending buildTree_rec() towards the operands.
	ValueList Op0;
	Op0.assign(VL.size(), VL0->getOperand(0));
	VectorizableTree.back()->setOperand(0, Op0);
	return;
	}
	case Instruction::InsertElement: {
	assert(ReuseShuffleIndices.empty() && "All inserts should be unique");

	auto OrdCompare = [](const std::pair<int, int> &P1,
	const std::pair<int, int> &P2) {
	return P1.first > P2.first;
	};
	PriorityQueue<std::pair<int, int>, SmallVector<std::pair<int, int>>,
	decltype(OrdCompare)>
	Indices(OrdCompare);
	for (int I = 0, E = VL.size(); I < E; ++I) {
	unsigned Idx = *getElementIndex(VL[I]);
	Indices.emplace(Idx, I);
	}
	OrdersType CurrentOrder(VL.size(), VL.size());
	bool IsIdentity = true;
	for (int I = 0, E = VL.size(); I < E; ++I) {
	CurrentOrder[Indices.top().second] = I;
	IsIdentity &= Indices.top().second == I;
	Indices.pop();
	}
	if (IsIdentity)
	CurrentOrder.clear();
	TreeEntry TE = newTreeEntry(VL, Bundle /vectorized*/, S, UserTreeIdx,
	std::nullopt, CurrentOrder);
	LLVM_DEBUG(dbgs() << "SLP: added inserts bundle.\n");

	TE->setOperandsInOrder();
	buildTree_rec(TE->getOperand(1), Depth + 1, {TE, 1});
	return;
	}
	case Instruction::Load: {
	// Check that a vectorized load would load the same memory as a scalar
	// load. For example, we don't want to vectorize loads that are smaller
	// than 8-bit. Even though we have a packed struct {<i2, i2, i2, i2>} LLVM
	// treats loading/storing it as an i8 struct. If we vectorize loads/stores
	// from such a struct, we read/write packed bits disagreeing with the
	// unvectorized version.
	TreeEntry *TE = nullptr;
	fixupOrderingIndices(CurrentOrder);
	switch (State) {
	case TreeEntry::Vectorize:
	TE = newTreeEntry(VL, Bundle /vectorized/, S, UserTreeIdx,
	ReuseShuffleIndices, CurrentOrder);
	if (CurrentOrder.empty())
	LLVM_DEBUG(dbgs() << "SLP: added a vector of loads.\n");
	else
	LLVM_DEBUG(dbgs() << "SLP: added a vector of jumbled loads.\n");
	TE->setOperandsInOrder();
	break;
	case TreeEntry::StridedVectorize:
	// Vectorizing non-consecutive loads with `llvm.masked.gather`.
	TE = newTreeEntry(VL, TreeEntry::StridedVectorize, Bundle, S,
	UserTreeIdx, ReuseShuffleIndices, CurrentOrder);
	TE->setOperandsInOrder();
	LLVM_DEBUG(dbgs() << "SLP: added a vector of strided loads.\n");
	break;
	case TreeEntry::ScatterVectorize:
	// Vectorizing non-consecutive loads with `llvm.masked.gather`.
	TE = newTreeEntry(VL, TreeEntry::ScatterVectorize, Bundle, S,
	UserTreeIdx, ReuseShuffleIndices);
	TE->setOperandsInOrder();
	buildTree_rec(PointerOps, Depth + 1, {TE, 0});
	LLVM_DEBUG(dbgs() << "SLP: added a vector of non-consecutive loads.\n");
	break;
	case TreeEntry::NeedToGather:
	llvm_unreachable("Unexpected loads state.");
	}
	return;
	}
	case Instruction::ZExt:
	case Instruction::SExt:
	case Instruction::FPToUI:
	case Instruction::FPToSI:
	case Instruction::FPExt:
	case Instruction::PtrToInt:
	case Instruction::IntToPtr:
	case Instruction::SIToFP:
	case Instruction::UIToFP:
	case Instruction::Trunc:
	case Instruction::FPTrunc:
	case Instruction::BitCast: {
	auto [PrevMaxBW, PrevMinBW] = CastMaxMinBWSizes.value_or(
	std::make_pair(std::numeric_limits<unsigned>::min(),
	std::numeric_limits<unsigned>::max()));
	if (ShuffleOrOp == Instruction::ZExt \|\|
	ShuffleOrOp == Instruction::SExt) {
	CastMaxMinBWSizes = std::make_pair(
	std::max<unsigned>(DL->getTypeSizeInBits(VL0->getType()),
	PrevMaxBW),
	std::min<unsigned>(
	DL->getTypeSizeInBits(VL0->getOperand(0)->getType()),
	PrevMinBW));
	} else if (ShuffleOrOp == Instruction::Trunc) {
	CastMaxMinBWSizes = std::make_pair(
	std::max<unsigned>(
	DL->getTypeSizeInBits(VL0->getOperand(0)->getType()),
	PrevMaxBW),
	std::min<unsigned>(DL->getTypeSizeInBits(VL0->getType()),
	PrevMinBW));
	ExtraBitWidthNodes.insert(VectorizableTree.size() + 1);
	} else if (ShuffleOrOp == Instruction::SIToFP \|\|
	ShuffleOrOp == Instruction::UIToFP) {
	unsigned NumSignBits =
	ComputeNumSignBits(VL0->getOperand(0), *DL, 0, AC, nullptr, DT);
	if (auto *OpI = dyn_cast<Instruction>(VL0->getOperand(0))) {
	APInt Mask = DB->getDemandedBits(OpI);
	NumSignBits = std::max(NumSignBits, Mask.countl_zero());
	}
	if (NumSignBits * 2 >=
	DL->getTypeSizeInBits(VL0->getOperand(0)->getType()))
	ExtraBitWidthNodes.insert(VectorizableTree.size() + 1);
	}
	TreeEntry TE = newTreeEntry(VL, Bundle /vectorized*/, S, UserTreeIdx,
	ReuseShuffleIndices);
	LLVM_DEBUG(dbgs() << "SLP: added a vector of casts.\n");

	TE->setOperandsInOrder();
	for (unsigned I : seq<unsigned>(0, VL0->getNumOperands()))
	buildTree_rec(TE->getOperand(I), Depth + 1, {TE, I});
	return;
	}
	case Instruction::ICmp:
	case Instruction::FCmp: {
	// Check that all of the compares have the same predicate.
	CmpInst::Predicate P0 = cast<CmpInst>(VL0)->getPredicate();
	TreeEntry TE = newTreeEntry(VL, Bundle /vectorized*/, S, UserTreeIdx,
	ReuseShuffleIndices);
	LLVM_DEBUG(dbgs() << "SLP: added a vector of compares.\n");

	ValueList Left, Right;
	if (cast<CmpInst>(VL0)->isCommutative()) {
	// Commutative predicate - collect + sort operands of the instructions
	// so that each side is more likely to have the same opcode.
	assert(P0 == CmpInst::getSwappedPredicate(P0) &&
	"Commutative Predicate mismatch");
	reorderInputsAccordingToOpcode(VL, Left, Right, *this);
	} else {
	// Collect operands - commute if it uses the swapped predicate.
	for (Value *V : VL) {
	auto *Cmp = cast<CmpInst>(V);
	Value *LHS = Cmp->getOperand(0);
	Value *RHS = Cmp->getOperand(1);
	if (Cmp->getPredicate() != P0)
	std::swap(LHS, RHS);
	Left.push_back(LHS);
	Right.push_back(RHS);
	}
	}
	TE->setOperand(0, Left);
	TE->setOperand(1, Right);
	buildTree_rec(Left, Depth + 1, {TE, 0});
	buildTree_rec(Right, Depth + 1, {TE, 1});
	if (ShuffleOrOp == Instruction::ICmp) {
	unsigned NumSignBits0 =
	ComputeNumSignBits(VL0->getOperand(0), *DL, 0, AC, nullptr, DT);
	if (NumSignBits0 * 2 >=
	DL->getTypeSizeInBits(VL0->getOperand(0)->getType()))
	ExtraBitWidthNodes.insert(getOperandEntry(TE, 0)->Idx);
	unsigned NumSignBits1 =
	ComputeNumSignBits(VL0->getOperand(1), *DL, 0, AC, nullptr, DT);
	if (NumSignBits1 * 2 >=
	DL->getTypeSizeInBits(VL0->getOperand(1)->getType()))
	ExtraBitWidthNodes.insert(getOperandEntry(TE, 1)->Idx);
	}
	return;
	}
	case Instruction::Select:
	case Instruction::FNeg:
	case Instruction::Add:
	case Instruction::FAdd:
	case Instruction::Sub:
	case Instruction::FSub:
	case Instruction::Mul:
	case Instruction::FMul:
	case Instruction::UDiv:
	case Instruction::SDiv:
	case Instruction::FDiv:
	case Instruction::URem:
	case Instruction::SRem:
	case Instruction::FRem:
	case Instruction::Shl:
	case Instruction::LShr:
	case Instruction::AShr:
	case Instruction::And:
	case Instruction::Or:
	case Instruction::Xor: {
	TreeEntry TE = newTreeEntry(VL, Bundle /vectorized*/, S, UserTreeIdx,
	ReuseShuffleIndices);
	LLVM_DEBUG(dbgs() << "SLP: added a vector of un/bin op.\n");

	// Sort operands of the instructions so that each side is more likely to
	// have the same opcode.
	if (isa<BinaryOperator>(VL0) && isCommutative(VL0)) {
	ValueList Left, Right;
	reorderInputsAccordingToOpcode(VL, Left, Right, *this);
	TE->setOperand(0, Left);
	TE->setOperand(1, Right);
	buildTree_rec(Left, Depth + 1, {TE, 0});
	buildTree_rec(Right, Depth + 1, {TE, 1});
	return;
	}

	TE->setOperandsInOrder();
	for (unsigned I : seq<unsigned>(0, VL0->getNumOperands()))
	buildTree_rec(TE->getOperand(I), Depth + 1, {TE, I});
	return;
	}
	case Instruction::GetElementPtr: {
	TreeEntry TE = newTreeEntry(VL, Bundle /vectorized*/, S, UserTreeIdx,
	ReuseShuffleIndices);
	LLVM_DEBUG(dbgs() << "SLP: added a vector of GEPs.\n");
	SmallVector<ValueList, 2> Operands(2);
	// Prepare the operand vector for pointer operands.
	for (Value *V : VL) {
	auto *GEP = dyn_cast<GetElementPtrInst>(V);
	if (!GEP) {
	Operands.front().push_back(V);
	continue;
	}
	Operands.front().push_back(GEP->getPointerOperand());
	}
	TE->setOperand(0, Operands.front());
	// Need to cast all indices to the same type before vectorization to
	// avoid crash.
	// Required to be able to find correct matches between different gather
	// nodes and reuse the vectorized values rather than trying to gather them
	// again.
	int IndexIdx = 1;
	Type *VL0Ty = VL0->getOperand(IndexIdx)->getType();
	Type *Ty = all_of(VL,
	[VL0Ty, IndexIdx](Value *V) {
	auto *GEP = dyn_cast<GetElementPtrInst>(V);
	if (!GEP)
	return true;
	return VL0Ty == GEP->getOperand(IndexIdx)->getType();
	})
	? VL0Ty
	: DL->getIndexType(cast<GetElementPtrInst>(VL0)
	->getPointerOperandType()
	->getScalarType());
	// Prepare the operand vector.
	for (Value *V : VL) {
	auto *I = dyn_cast<GetElementPtrInst>(V);
	if (!I) {
	Operands.back().push_back(
	ConstantInt::get(Ty, 0, /isSigned=/false));
	continue;
	}
	auto *Op = I->getOperand(IndexIdx);
	auto *CI = dyn_cast<ConstantInt>(Op);
	if (!CI)
	Operands.back().push_back(Op);
	else
	Operands.back().push_back(ConstantFoldIntegerCast(
	CI, Ty, CI->getValue().isSignBitSet(), *DL));
	}
	TE->setOperand(IndexIdx, Operands.back());

	for (unsigned I = 0, Ops = Operands.size(); I < Ops; ++I)
	buildTree_rec(Operands[I], Depth + 1, {TE, I});
	return;
	}
	case Instruction::Store: {
	bool Consecutive = CurrentOrder.empty();
	if (!Consecutive)
	fixupOrderingIndices(CurrentOrder);
	TreeEntry TE = newTreeEntry(VL, Bundle /vectorized*/, S, UserTreeIdx,
	ReuseShuffleIndices, CurrentOrder);
	TE->setOperandsInOrder();
	buildTree_rec(TE->getOperand(0), Depth + 1, {TE, 0});
	if (Consecutive)
	LLVM_DEBUG(dbgs() << "SLP: added a vector of stores.\n");
	else
	LLVM_DEBUG(dbgs() << "SLP: added a vector of jumbled stores.\n");
	return;
	}
	case Instruction::Call: {
	// Check if the calls are all to the same vectorizable intrinsic or
	// library function.
	CallInst *CI = cast<CallInst>(VL0);
	Intrinsic::ID ID = getVectorIntrinsicIDForCall(CI, TLI);

	TreeEntry TE = newTreeEntry(VL, Bundle /vectorized*/, S, UserTreeIdx,
	ReuseShuffleIndices);
	// Sort operands of the instructions so that each side is more likely to
	// have the same opcode.
	if (isCommutative(VL0)) {
	ValueList Left, Right;
	reorderInputsAccordingToOpcode(VL, Left, Right, *this);
	TE->setOperand(0, Left);
	TE->setOperand(1, Right);
	SmallVector<ValueList> Operands;
	for (unsigned I : seq<unsigned>(2, CI->arg_size())) {
	Operands.emplace_back();
	if (isVectorIntrinsicWithScalarOpAtArg(ID, I))
	continue;
	for (Value *V : VL) {
	auto *CI2 = cast<CallInst>(V);
	Operands.back().push_back(CI2->getArgOperand(I));
	}
	TE->setOperand(I, Operands.back());
	}
	buildTree_rec(Left, Depth + 1, {TE, 0});
	buildTree_rec(Right, Depth + 1, {TE, 1});
	for (unsigned I : seq<unsigned>(2, CI->arg_size())) {
	if (Operands[I - 2].empty())
	continue;
	buildTree_rec(Operands[I - 2], Depth + 1, {TE, I});
	}
	return;
	}
	TE->setOperandsInOrder();
	for (unsigned I : seq<unsigned>(0, CI->arg_size())) {
	// For scalar operands no need to create an entry since no need to
	// vectorize it.
	if (isVectorIntrinsicWithScalarOpAtArg(ID, I))
	continue;
	ValueList Operands;
	// Prepare the operand vector.
	for (Value *V : VL) {
	auto *CI2 = cast<CallInst>(V);
	Operands.push_back(CI2->getArgOperand(I));
	}
	buildTree_rec(Operands, Depth + 1, {TE, I});
	}
	return;
	}
	case Instruction::ShuffleVector: {
	TreeEntry TE = newTreeEntry(VL, Bundle /vectorized*/, S, UserTreeIdx,
	ReuseShuffleIndices);
	LLVM_DEBUG(dbgs() << "SLP: added a ShuffleVector op.\n");

	// Reorder operands if reordering would enable vectorization.
	auto *CI = dyn_cast<CmpInst>(VL0);
	if (isa<BinaryOperator>(VL0) \|\| CI) {
	ValueList Left, Right;
	if (!CI \|\| all_of(VL, [](Value *V) {
	return cast<CmpInst>(V)->isCommutative();
	})) {
	reorderInputsAccordingToOpcode(VL, Left, Right, *this);
	} else {
	auto *MainCI = cast<CmpInst>(S.MainOp);
	auto *AltCI = cast<CmpInst>(S.AltOp);
	CmpInst::Predicate MainP = MainCI->getPredicate();
	CmpInst::Predicate AltP = AltCI->getPredicate();
	assert(MainP != AltP &&
	"Expected different main/alternate predicates.");
	// Collect operands - commute if it uses the swapped predicate or
	// alternate operation.
	for (Value *V : VL) {
	auto *Cmp = cast<CmpInst>(V);
	Value *LHS = Cmp->getOperand(0);
	Value *RHS = Cmp->getOperand(1);

	if (isAlternateInstruction(Cmp, MainCI, AltCI, *TLI)) {
	if (AltP == CmpInst::getSwappedPredicate(Cmp->getPredicate()))
	std::swap(LHS, RHS);
	} else {
	if (MainP == CmpInst::getSwappedPredicate(Cmp->getPredicate()))
	std::swap(LHS, RHS);
	}
	Left.push_back(LHS);
	Right.push_back(RHS);
	}
	}
	TE->setOperand(0, Left);
	TE->setOperand(1, Right);
	buildTree_rec(Left, Depth + 1, {TE, 0});
	buildTree_rec(Right, Depth + 1, {TE, 1});
	return;
	}

	TE->setOperandsInOrder();
	for (unsigned I : seq<unsigned>(0, VL0->getNumOperands()))
	buildTree_rec(TE->getOperand(I), Depth + 1, {TE, I});
	return;
	}
	default:
	break;
	}
	llvm_unreachable("Unexpected vectorization of the instructions.");
	}

	unsigned BoUpSLP::canMapToVector(Type *T) const {
	unsigned N = 1;
	Type *EltTy = T;

	while (isa<StructType, ArrayType, FixedVectorType>(EltTy)) {
	if (auto *ST = dyn_cast<StructType>(EltTy)) {
	// Check that struct is homogeneous.
	for (const auto *Ty : ST->elements())
	if (Ty != *ST->element_begin())
	return 0;
	N *= ST->getNumElements();
	EltTy = *ST->element_begin();
	} else if (auto *AT = dyn_cast<ArrayType>(EltTy)) {
	N *= AT->getNumElements();
	EltTy = AT->getElementType();
	} else {
	auto *VT = cast<FixedVectorType>(EltTy);
	N *= VT->getNumElements();
	EltTy = VT->getElementType();
	}
	}

	if (!isValidElementType(EltTy))
	return 0;
	uint64_t VTSize = DL->getTypeStoreSizeInBits(getWidenedType(EltTy, N));
	if (VTSize < MinVecRegSize \|\| VTSize > MaxVecRegSize \|\|
	VTSize != DL->getTypeStoreSizeInBits(T))
	return 0;
	return N;
	}

	bool BoUpSLP::canReuseExtract(ArrayRef<Value > VL, Value OpValue,
	SmallVectorImpl<unsigned> &CurrentOrder,
	bool ResizeAllowed) const {
	const auto *It = find_if(VL, IsaPred<ExtractElementInst, ExtractValueInst>);
	assert(It != VL.end() && "Expected at least one extract instruction.");
	auto E0 = cast<Instruction>(It);
	assert(
	all_of(VL, IsaPred<UndefValue, ExtractElementInst, ExtractValueInst>) &&
	"Invalid opcode");
	// Check if all of the extracts come from the same vector and from the
	// correct offset.
	Value *Vec = E0->getOperand(0);

	CurrentOrder.clear();

	// We have to extract from a vector/aggregate with the same number of elements.
	unsigned NElts;
	if (E0->getOpcode() == Instruction::ExtractValue) {
	NElts = canMapToVector(Vec->getType());
	if (!NElts)
	return false;
	// Check if load can be rewritten as load of vector.
	LoadInst *LI = dyn_cast<LoadInst>(Vec);
	if (!LI \|\| !LI->isSimple() \|\| !LI->hasNUses(VL.size()))
	return false;
	} else {
	NElts = cast<FixedVectorType>(Vec->getType())->getNumElements();
	}

	unsigned E = VL.size();
	if (!ResizeAllowed && NElts != E)
	return false;
	SmallVector<int> Indices(E, PoisonMaskElem);
	unsigned MinIdx = NElts, MaxIdx = 0;
	for (auto [I, V] : enumerate(VL)) {
	auto *Inst = dyn_cast<Instruction>(V);
	if (!Inst)
	continue;
	if (Inst->getOperand(0) != Vec)
	return false;
	if (auto *EE = dyn_cast<ExtractElementInst>(Inst))
	if (isa<UndefValue>(EE->getIndexOperand()))
	continue;
	std::optional<unsigned> Idx = getExtractIndex(Inst);
	if (!Idx)
	return false;
	const unsigned ExtIdx = *Idx;
	if (ExtIdx >= NElts)
	continue;
	Indices[I] = ExtIdx;
	if (MinIdx > ExtIdx)
	MinIdx = ExtIdx;
	if (MaxIdx < ExtIdx)
	MaxIdx = ExtIdx;
	}
	if (MaxIdx - MinIdx + 1 > E)
	return false;
	if (MaxIdx + 1 <= E)
	MinIdx = 0;

	// Check that all of the indices extract from the correct offset.
	bool ShouldKeepOrder = true;
	// Assign to all items the initial value E + 1 so we can check if the extract
	// instruction index was used already.
	// Also, later we can check that all the indices are used and we have a
	// consecutive access in the extract instructions, by checking that no
	// element of CurrentOrder still has value E + 1.
	CurrentOrder.assign(E, E);
	for (unsigned I = 0; I < E; ++I) {
	if (Indices[I] == PoisonMaskElem)
	continue;
	const unsigned ExtIdx = Indices[I] - MinIdx;
	if (CurrentOrder[ExtIdx] != E) {
	CurrentOrder.clear();
	return false;
	}
	ShouldKeepOrder &= ExtIdx == I;
	CurrentOrder[ExtIdx] = I;
	}
	if (ShouldKeepOrder)
	CurrentOrder.clear();

	return ShouldKeepOrder;
	}

	bool BoUpSLP::areAllUsersVectorized(
	Instruction I, const SmallDenseSet<Value > *VectorizedVals) const {
	return (I->hasOneUse() && (!VectorizedVals \|\| VectorizedVals->contains(I))) \|\|
	all_of(I->users(), [this](User *U) {
	return ScalarToTreeEntry.contains(U) \|\|
	isVectorLikeInstWithConstOps(U) \|\|
	(isa<ExtractElementInst>(U) && MustGather.contains(U));
	});
	}

	static std::pair<InstructionCost, InstructionCost>
	getVectorCallCosts(CallInst CI, FixedVectorType VecTy,
	TargetTransformInfo TTI, TargetLibraryInfo TLI,
	ArrayRef<Type *> ArgTys) {
	Intrinsic::ID ID = getVectorIntrinsicIDForCall(CI, TLI);

	// Calculate the cost of the scalar and vector calls.
	FastMathFlags FMF;
	if (auto *FPCI = dyn_cast<FPMathOperator>(CI))
	FMF = FPCI->getFastMathFlags();
	SmallVector<const Value *> Arguments(CI->args());
	IntrinsicCostAttributes CostAttrs(ID, VecTy, Arguments, ArgTys, FMF,
	dyn_cast<IntrinsicInst>(CI));
	auto IntrinsicCost =
	TTI->getIntrinsicInstrCost(CostAttrs, TTI::TCK_RecipThroughput);

	auto Shape = VFShape::get(CI->getFunctionType(),
	ElementCount::getFixed(VecTy->getNumElements()),
	false /HasGlobalPred/);
	Function VecFunc = VFDatabase(CI).getVectorizedFunction(Shape);
	auto LibCost = IntrinsicCost;
	if (!CI->isNoBuiltin() && VecFunc) {
	// Calculate the cost of the vector library call.
	// If the corresponding vector call is cheaper, return its cost.
	LibCost =
	TTI->getCallInstrCost(nullptr, VecTy, ArgTys, TTI::TCK_RecipThroughput);
	}
	return {IntrinsicCost, LibCost};
	}

	void BoUpSLP::TreeEntry::buildAltOpShuffleMask(
	const function_ref<bool(Instruction *)> IsAltOp, SmallVectorImpl<int> &Mask,
	SmallVectorImpl<Value > OpScalars,
	SmallVectorImpl<Value > AltScalars) const {
	unsigned Sz = Scalars.size();
	Mask.assign(Sz, PoisonMaskElem);
	SmallVector<int> OrderMask;
	if (!ReorderIndices.empty())
	inversePermutation(ReorderIndices, OrderMask);
	for (unsigned I = 0; I < Sz; ++I) {
	unsigned Idx = I;
	if (!ReorderIndices.empty())
	Idx = OrderMask[I];
	auto *OpInst = cast<Instruction>(Scalars[Idx]);
	if (IsAltOp(OpInst)) {
	Mask[I] = Sz + Idx;
	if (AltScalars)
	AltScalars->push_back(OpInst);
	} else {
	Mask[I] = Idx;
	if (OpScalars)
	OpScalars->push_back(OpInst);
	}
	}
	if (!ReuseShuffleIndices.empty()) {
	SmallVector<int> NewMask(ReuseShuffleIndices.size(), PoisonMaskElem);
	transform(ReuseShuffleIndices, NewMask.begin(), [&Mask](int Idx) {
	return Idx != PoisonMaskElem ? Mask[Idx] : PoisonMaskElem;
	});
	Mask.swap(NewMask);
	}
	}

	static bool isAlternateInstruction(const Instruction *I,
	const Instruction *MainOp,
	const Instruction *AltOp,
	const TargetLibraryInfo &TLI) {
	if (auto *MainCI = dyn_cast<CmpInst>(MainOp)) {
	auto *AltCI = cast<CmpInst>(AltOp);
	CmpInst::Predicate MainP = MainCI->getPredicate();
	CmpInst::Predicate AltP = AltCI->getPredicate();
	assert(MainP != AltP && "Expected different main/alternate predicates.");
	auto *CI = cast<CmpInst>(I);
	if (isCmpSameOrSwapped(MainCI, CI, TLI))
	return false;
	if (isCmpSameOrSwapped(AltCI, CI, TLI))
	return true;
	CmpInst::Predicate P = CI->getPredicate();
	CmpInst::Predicate SwappedP = CmpInst::getSwappedPredicate(P);

	assert((MainP == P \|\| AltP == P \|\| MainP == SwappedP \|\| AltP == SwappedP) &&
	"CmpInst expected to match either main or alternate predicate or "
	"their swap.");
	(void)AltP;
	return MainP != P && MainP != SwappedP;
	}
	return I->getOpcode() == AltOp->getOpcode();
	}

	TTI::OperandValueInfo BoUpSLP::getOperandInfo(ArrayRef<Value *> Ops) {
	assert(!Ops.empty());
	const auto *Op0 = Ops.front();

	const bool IsConstant = all_of(Ops, [](Value *V) {
	// TODO: We should allow undef elements here
	return isConstant(V) && !isa<UndefValue>(V);
	});
	const bool IsUniform = all_of(Ops, [=](Value *V) {
	// TODO: We should allow undef elements here
	return V == Op0;
	});
	const bool IsPowerOfTwo = all_of(Ops, [](Value *V) {
	// TODO: We should allow undef elements here
	if (auto *CI = dyn_cast<ConstantInt>(V))
	return CI->getValue().isPowerOf2();
	return false;
	});
	const bool IsNegatedPowerOfTwo = all_of(Ops, [](Value *V) {
	// TODO: We should allow undef elements here
	if (auto *CI = dyn_cast<ConstantInt>(V))
	return CI->getValue().isNegatedPowerOf2();
	return false;
	});

	TTI::OperandValueKind VK = TTI::OK_AnyValue;
	if (IsConstant && IsUniform)
	VK = TTI::OK_UniformConstantValue;
	else if (IsConstant)
	VK = TTI::OK_NonUniformConstantValue;
	else if (IsUniform)
	VK = TTI::OK_UniformValue;

	TTI::OperandValueProperties VP = TTI::OP_None;
	VP = IsPowerOfTwo ? TTI::OP_PowerOf2 : VP;
	VP = IsNegatedPowerOfTwo ? TTI::OP_NegatedPowerOf2 : VP;

	return {VK, VP};
	}

	namespace {
	/// The base class for shuffle instruction emission and shuffle cost estimation.
	class BaseShuffleAnalysis {
	protected:
	/// Checks if the mask is an identity mask.
	/// \param IsStrict if is true the function returns false if mask size does
	/// not match vector size.
	static bool isIdentityMask(ArrayRef<int> Mask, const FixedVectorType *VecTy,
	bool IsStrict) {
	int Limit = Mask.size();
	int VF = VecTy->getNumElements();
	int Index = -1;
	if (VF == Limit && ShuffleVectorInst::isIdentityMask(Mask, Limit))
	return true;
	if (!IsStrict) {
	// Consider extract subvector starting from index 0.
	if (ShuffleVectorInst::isExtractSubvectorMask(Mask, VF, Index) &&
	Index == 0)
	return true;
	// All VF-size submasks are identity (e.g.
	// <poison,poison,poison,poison,0,1,2,poison,poison,1,2,3> etc. for VF 4).
	if (Limit % VF == 0 && all_of(seq<int>(0, Limit / VF), [=](int Idx) {
	ArrayRef<int> Slice = Mask.slice(Idx * VF, VF);
	return all_of(Slice, [](int I) { return I == PoisonMaskElem; }) \|\|
	ShuffleVectorInst::isIdentityMask(Slice, VF);
	}))
	return true;
	}
	return false;
	}

	/// Tries to combine 2 different masks into single one.
	/// \param LocalVF Vector length of the permuted input vector. \p Mask may
	/// change the size of the vector, \p LocalVF is the original size of the
	/// shuffled vector.
	static void combineMasks(unsigned LocalVF, SmallVectorImpl<int> &Mask,
	ArrayRef<int> ExtMask) {
	unsigned VF = Mask.size();
	SmallVector<int> NewMask(ExtMask.size(), PoisonMaskElem);
	for (int I = 0, Sz = ExtMask.size(); I < Sz; ++I) {
	if (ExtMask[I] == PoisonMaskElem)
	continue;
	int MaskedIdx = Mask[ExtMask[I] % VF];
	NewMask[I] =
	MaskedIdx == PoisonMaskElem ? PoisonMaskElem : MaskedIdx % LocalVF;
	}
	Mask.swap(NewMask);
	}

	/// Looks through shuffles trying to reduce final number of shuffles in the
	/// code. The function looks through the previously emitted shuffle
	/// instructions and properly mark indices in mask as undef.
	/// For example, given the code
	/// \code
	/// %s1 = shufflevector <2 x ty> %0, poison, <1, 0>
	/// %s2 = shufflevector <2 x ty> %1, poison, <1, 0>
	/// \endcode
	/// and if need to emit shuffle of %s1 and %s2 with mask <1, 0, 3, 2>, it will
	/// look through %s1 and %s2 and select vectors %0 and %1 with mask
	/// <0, 1, 2, 3> for the shuffle.
	/// If 2 operands are of different size, the smallest one will be resized and
	/// the mask recalculated properly.
	/// For example, given the code
	/// \code
	/// %s1 = shufflevector <2 x ty> %0, poison, <1, 0, 1, 0>
	/// %s2 = shufflevector <2 x ty> %1, poison, <1, 0, 1, 0>
	/// \endcode
	/// and if need to emit shuffle of %s1 and %s2 with mask <1, 0, 5, 4>, it will
	/// look through %s1 and %s2 and select vectors %0 and %1 with mask
	/// <0, 1, 2, 3> for the shuffle.
	/// So, it tries to transform permutations to simple vector merge, if
	/// possible.
	/// \param V The input vector which must be shuffled using the given \p Mask.
	/// If the better candidate is found, \p V is set to this best candidate
	/// vector.
	/// \param Mask The input mask for the shuffle. If the best candidate is found
	/// during looking-through-shuffles attempt, it is updated accordingly.
	/// \param SinglePermute true if the shuffle operation is originally a
	/// single-value-permutation. In this case the look-through-shuffles procedure
	/// may look for resizing shuffles as the best candidates.
	/// \return true if the shuffle results in the non-resizing identity shuffle
	/// (and thus can be ignored), false - otherwise.
	static bool peekThroughShuffles(Value *&V, SmallVectorImpl<int> &Mask,
	bool SinglePermute) {
	Value *Op = V;
	ShuffleVectorInst *IdentityOp = nullptr;
	SmallVector<int> IdentityMask;
	while (auto *SV = dyn_cast<ShuffleVectorInst>(Op)) {
	// Exit if not a fixed vector type or changing size shuffle.
	auto *SVTy = dyn_cast<FixedVectorType>(SV->getType());
	if (!SVTy)
	break;
	// Remember the identity or broadcast mask, if it is not a resizing
	// shuffle. If no better candidates are found, this Op and Mask will be
	// used in the final shuffle.
	if (isIdentityMask(Mask, SVTy, /IsStrict=/false)) {
	if (!IdentityOp \|\| !SinglePermute \|\|
	(isIdentityMask(Mask, SVTy, /IsStrict=/true) &&
	!ShuffleVectorInst::isZeroEltSplatMask(IdentityMask,
	IdentityMask.size()))) {
	IdentityOp = SV;
	// Store current mask in the IdentityMask so later we did not lost
	// this info if IdentityOp is selected as the best candidate for the
	// permutation.
	IdentityMask.assign(Mask);
	}
	}
	// Remember the broadcast mask. If no better candidates are found, this Op
	// and Mask will be used in the final shuffle.
	// Zero splat can be used as identity too, since it might be used with
	// mask <0, 1, 2, ...>, i.e. identity mask without extra reshuffling.
	// E.g. if need to shuffle the vector with the mask <3, 1, 2, 0>, which is
	// expensive, the analysis founds out, that the source vector is just a
	// broadcast, this original mask can be transformed to identity mask <0,
	// 1, 2, 3>.
	// \code
	// %0 = shuffle %v, poison, zeroinitalizer
	// %res = shuffle %0, poison, <3, 1, 2, 0>
	// \endcode
	// may be transformed to
	// \code
	// %0 = shuffle %v, poison, zeroinitalizer
	// %res = shuffle %0, poison, <0, 1, 2, 3>
	// \endcode
	if (SV->isZeroEltSplat()) {
	IdentityOp = SV;
	IdentityMask.assign(Mask);
	}
	int LocalVF = Mask.size();
	if (auto *SVOpTy =
	dyn_cast<FixedVectorType>(SV->getOperand(0)->getType()))
	LocalVF = SVOpTy->getNumElements();
	SmallVector<int> ExtMask(Mask.size(), PoisonMaskElem);
	for (auto [Idx, I] : enumerate(Mask)) {
	if (I == PoisonMaskElem \|\|
	static_cast<unsigned>(I) >= SV->getShuffleMask().size())
	continue;
	ExtMask[Idx] = SV->getMaskValue(I);
	}
	bool IsOp1Undef =
	isUndefVector(SV->getOperand(0),
	buildUseMask(LocalVF, ExtMask, UseMask::FirstArg))
	.all();
	bool IsOp2Undef =
	isUndefVector(SV->getOperand(1),
	buildUseMask(LocalVF, ExtMask, UseMask::SecondArg))
	.all();
	if (!IsOp1Undef && !IsOp2Undef) {
	// Update mask and mark undef elems.
	for (int &I : Mask) {
	if (I == PoisonMaskElem)
	continue;
	if (SV->getMaskValue(I % SV->getShuffleMask().size()) ==
	PoisonMaskElem)
	I = PoisonMaskElem;
	}
	break;
	}
	SmallVector<int> ShuffleMask(SV->getShuffleMask().begin(),
	SV->getShuffleMask().end());
	combineMasks(LocalVF, ShuffleMask, Mask);
	Mask.swap(ShuffleMask);
	if (IsOp2Undef)
	Op = SV->getOperand(0);
	else
	Op = SV->getOperand(1);
	}
	if (auto *OpTy = dyn_cast<FixedVectorType>(Op->getType());
	!OpTy \|\| !isIdentityMask(Mask, OpTy, SinglePermute) \|\|
	ShuffleVectorInst::isZeroEltSplatMask(Mask, Mask.size())) {
	if (IdentityOp) {
	V = IdentityOp;
	assert(Mask.size() == IdentityMask.size() &&
	"Expected masks of same sizes.");
	// Clear known poison elements.
	for (auto [I, Idx] : enumerate(Mask))
	if (Idx == PoisonMaskElem)
	IdentityMask[I] = PoisonMaskElem;
	Mask.swap(IdentityMask);
	auto *Shuffle = dyn_cast<ShuffleVectorInst>(V);
	return SinglePermute &&
	(isIdentityMask(Mask, cast<FixedVectorType>(V->getType()),
	/IsStrict=/true) \|\|
	(Shuffle && Mask.size() == Shuffle->getShuffleMask().size() &&
	Shuffle->isZeroEltSplat() &&
	ShuffleVectorInst::isZeroEltSplatMask(Mask, Mask.size())));
	}
	V = Op;
	return false;
	}
	V = Op;
	return true;
	}

	/// Smart shuffle instruction emission, walks through shuffles trees and
	/// tries to find the best matching vector for the actual shuffle
	/// instruction.
	template <typename T, typename ShuffleBuilderTy>
	static T createShuffle(Value V1, Value V2, ArrayRef<int> Mask,
	ShuffleBuilderTy &Builder) {
	assert(V1 && "Expected at least one vector value.");
	if (V2)
	Builder.resizeToMatch(V1, V2);
	int VF = Mask.size();
	if (auto *FTy = dyn_cast<FixedVectorType>(V1->getType()))
	VF = FTy->getNumElements();
	if (V2 &&
	!isUndefVector(V2, buildUseMask(VF, Mask, UseMask::SecondArg)).all()) {
	// Peek through shuffles.
	Value *Op1 = V1;
	Value *Op2 = V2;
	int VF =
	cast<VectorType>(V1->getType())->getElementCount().getKnownMinValue();
	SmallVector<int> CombinedMask1(Mask.size(), PoisonMaskElem);
	SmallVector<int> CombinedMask2(Mask.size(), PoisonMaskElem);
	for (int I = 0, E = Mask.size(); I < E; ++I) {
	if (Mask[I] < VF)
	CombinedMask1[I] = Mask[I];
	else
	CombinedMask2[I] = Mask[I] - VF;
	}
	Value *PrevOp1;
	Value *PrevOp2;
	do {
	PrevOp1 = Op1;
	PrevOp2 = Op2;
	(void)peekThroughShuffles(Op1, CombinedMask1, /SinglePermute=/false);
	(void)peekThroughShuffles(Op2, CombinedMask2, /SinglePermute=/false);
	// Check if we have 2 resizing shuffles - need to peek through operands
	// again.
	if (auto *SV1 = dyn_cast<ShuffleVectorInst>(Op1))
	if (auto *SV2 = dyn_cast<ShuffleVectorInst>(Op2)) {
	SmallVector<int> ExtMask1(Mask.size(), PoisonMaskElem);
	for (auto [Idx, I] : enumerate(CombinedMask1)) {
	if (I == PoisonMaskElem)
	continue;
	ExtMask1[Idx] = SV1->getMaskValue(I);
	}
	SmallBitVector UseMask1 = buildUseMask(
	cast<FixedVectorType>(SV1->getOperand(1)->getType())
	->getNumElements(),
	ExtMask1, UseMask::SecondArg);
	SmallVector<int> ExtMask2(CombinedMask2.size(), PoisonMaskElem);
	for (auto [Idx, I] : enumerate(CombinedMask2)) {
	if (I == PoisonMaskElem)
	continue;
	ExtMask2[Idx] = SV2->getMaskValue(I);
	}
	SmallBitVector UseMask2 = buildUseMask(
	cast<FixedVectorType>(SV2->getOperand(1)->getType())
	->getNumElements(),
	ExtMask2, UseMask::SecondArg);
	if (SV1->getOperand(0)->getType() ==
	SV2->getOperand(0)->getType() &&
	SV1->getOperand(0)->getType() != SV1->getType() &&
	isUndefVector(SV1->getOperand(1), UseMask1).all() &&
	isUndefVector(SV2->getOperand(1), UseMask2).all()) {
	Op1 = SV1->getOperand(0);
	Op2 = SV2->getOperand(0);
	SmallVector<int> ShuffleMask1(SV1->getShuffleMask().begin(),
	SV1->getShuffleMask().end());
	int LocalVF = ShuffleMask1.size();
	if (auto *FTy = dyn_cast<FixedVectorType>(Op1->getType()))
	LocalVF = FTy->getNumElements();
	combineMasks(LocalVF, ShuffleMask1, CombinedMask1);
	CombinedMask1.swap(ShuffleMask1);
	SmallVector<int> ShuffleMask2(SV2->getShuffleMask().begin(),
	SV2->getShuffleMask().end());
	LocalVF = ShuffleMask2.size();
	if (auto *FTy = dyn_cast<FixedVectorType>(Op2->getType()))
	LocalVF = FTy->getNumElements();
	combineMasks(LocalVF, ShuffleMask2, CombinedMask2);
	CombinedMask2.swap(ShuffleMask2);
	}
	}
	} while (PrevOp1 != Op1 \|\| PrevOp2 != Op2);
	Builder.resizeToMatch(Op1, Op2);
	VF = std::max(cast<VectorType>(Op1->getType())
	->getElementCount()
	.getKnownMinValue(),
	cast<VectorType>(Op2->getType())
	->getElementCount()
	.getKnownMinValue());
	for (int I = 0, E = Mask.size(); I < E; ++I) {
	if (CombinedMask2[I] != PoisonMaskElem) {
	assert(CombinedMask1[I] == PoisonMaskElem &&
	"Expected undefined mask element");
	CombinedMask1[I] = CombinedMask2[I] + (Op1 == Op2 ? 0 : VF);
	}
	}
	if (Op1 == Op2 &&
	(ShuffleVectorInst::isIdentityMask(CombinedMask1, VF) \|\|
	(ShuffleVectorInst::isZeroEltSplatMask(CombinedMask1, VF) &&
	isa<ShuffleVectorInst>(Op1) &&
	cast<ShuffleVectorInst>(Op1)->getShuffleMask() ==
	ArrayRef(CombinedMask1))))
	return Builder.createIdentity(Op1);
	return Builder.createShuffleVector(
	Op1, Op1 == Op2 ? PoisonValue::get(Op1->getType()) : Op2,
	CombinedMask1);
	}
	if (isa<PoisonValue>(V1))
	return Builder.createPoison(
	cast<VectorType>(V1->getType())->getElementType(), Mask.size());
	SmallVector<int> NewMask(Mask.begin(), Mask.end());
	bool IsIdentity = peekThroughShuffles(V1, NewMask, /SinglePermute=/true);
	assert(V1 && "Expected non-null value after looking through shuffles.");

	if (!IsIdentity)
	return Builder.createShuffleVector(V1, NewMask);
	return Builder.createIdentity(V1);
	}
	};
	} // namespace

	/// Returns the cost of the shuffle instructions with the given \p Kind, vector
	/// type \p Tp and optional \p Mask. Adds SLP-specifc cost estimation for insert
	/// subvector pattern.
	static InstructionCost
	getShuffleCost(const TargetTransformInfo &TTI, TTI::ShuffleKind Kind,
	VectorType *Tp, ArrayRef<int> Mask = std::nullopt,
	TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
	int Index = 0, VectorType *SubTp = nullptr,
	ArrayRef<const Value *> Args = std::nullopt) {
	if (Kind != TTI::SK_PermuteTwoSrc)
	return TTI.getShuffleCost(Kind, Tp, Mask, CostKind, Index, SubTp, Args);
	int NumSrcElts = Tp->getElementCount().getKnownMinValue();
	int NumSubElts;
	if (Mask.size() > 2 && ShuffleVectorInst::isInsertSubvectorMask(
	Mask, NumSrcElts, NumSubElts, Index)) {
	if (Index + NumSubElts > NumSrcElts &&
	Index + NumSrcElts <= static_cast<int>(Mask.size()))
	return TTI.getShuffleCost(
	TTI::SK_InsertSubvector,
	getWidenedType(Tp->getElementType(), Mask.size()), Mask,
	TTI::TCK_RecipThroughput, Index, Tp);
	}
	return TTI.getShuffleCost(Kind, Tp, Mask, CostKind, Index, SubTp, Args);
	}

	/// Calculate the scalar and the vector costs from vectorizing set of GEPs.
	static std::pair<InstructionCost, InstructionCost>
	getGEPCosts(const TargetTransformInfo &TTI, ArrayRef<Value *> Ptrs,
	Value *BasePtr, unsigned Opcode, TTI::TargetCostKind CostKind,
	Type ScalarTy, VectorType VecTy) {
	InstructionCost ScalarCost = 0;
	InstructionCost VecCost = 0;
	// Here we differentiate two cases: (1) when Ptrs represent a regular
	// vectorization tree node (as they are pointer arguments of scattered
	// loads) or (2) when Ptrs are the arguments of loads or stores being
	// vectorized as plane wide unit-stride load/store since all the
	// loads/stores are known to be from/to adjacent locations.
	if (Opcode == Instruction::Load \|\| Opcode == Instruction::Store) {
	// Case 2: estimate costs for pointer related costs when vectorizing to
	// a wide load/store.
	// Scalar cost is estimated as a set of pointers with known relationship
	// between them.
	// For vector code we will use BasePtr as argument for the wide load/store
	// but we also need to account all the instructions which are going to
	// stay in vectorized code due to uses outside of these scalar
	// loads/stores.
	ScalarCost = TTI.getPointersChainCost(
	Ptrs, BasePtr, TTI::PointersChainInfo::getUnitStride(), ScalarTy,
	CostKind);

	SmallVector<const Value *> PtrsRetainedInVecCode;
	for (Value *V : Ptrs) {
	if (V == BasePtr) {
	PtrsRetainedInVecCode.push_back(V);
	continue;
	}
	auto *Ptr = dyn_cast<GetElementPtrInst>(V);
	// For simplicity assume Ptr to stay in vectorized code if it's not a
	// GEP instruction. We don't care since it's cost considered free.
	// TODO: We should check for any uses outside of vectorizable tree
	// rather than just single use.
	if (!Ptr \|\| !Ptr->hasOneUse())
	PtrsRetainedInVecCode.push_back(V);
	}

	if (PtrsRetainedInVecCode.size() == Ptrs.size()) {
	// If all pointers stay in vectorized code then we don't have
	// any savings on that.
	return std::make_pair(TTI::TCC_Free, TTI::TCC_Free);
	}
	VecCost = TTI.getPointersChainCost(PtrsRetainedInVecCode, BasePtr,
	TTI::PointersChainInfo::getKnownStride(),
	VecTy, CostKind);
	} else {
	// Case 1: Ptrs are the arguments of loads that we are going to transform
	// into masked gather load intrinsic.
	// All the scalar GEPs will be removed as a result of vectorization.
	// For any external uses of some lanes extract element instructions will
	// be generated (which cost is estimated separately).
	TTI::PointersChainInfo PtrsInfo =
	all_of(Ptrs,
	[](const Value *V) {
	auto *Ptr = dyn_cast<GetElementPtrInst>(V);
	return Ptr && !Ptr->hasAllConstantIndices();
	})
	? TTI::PointersChainInfo::getUnknownStride()
	: TTI::PointersChainInfo::getKnownStride();

	ScalarCost =
	TTI.getPointersChainCost(Ptrs, BasePtr, PtrsInfo, ScalarTy, CostKind);
	auto *BaseGEP = dyn_cast<GEPOperator>(BasePtr);
	if (!BaseGEP) {
	auto *It = find_if(Ptrs, IsaPred<GEPOperator>);
	if (It != Ptrs.end())
	BaseGEP = cast<GEPOperator>(*It);
	}
	if (BaseGEP) {
	SmallVector<const Value *> Indices(BaseGEP->indices());
	VecCost = TTI.getGEPCost(BaseGEP->getSourceElementType(),
	BaseGEP->getPointerOperand(), Indices, VecTy,
	CostKind);
	}
	}

	return std::make_pair(ScalarCost, VecCost);
	}

	void BoUpSLP::transformNodes() {
	constexpr TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
	for (std::unique_ptr<TreeEntry> &TE : VectorizableTree) {
	TreeEntry &E = *TE;
	switch (E.getOpcode()) {
	case Instruction::Load: {
	// No need to reorder masked gather loads, just reorder the scalar
	// operands.
	if (E.State != TreeEntry::Vectorize)
	break;
	Type *ScalarTy = E.getMainOp()->getType();
	auto *VecTy = getWidenedType(ScalarTy, E.Scalars.size());
	Align CommonAlignment = computeCommonAlignment<LoadInst>(E.Scalars);
	// Check if profitable to represent consecutive load + reverse as strided
	// load with stride -1.
	if (isReverseOrder(E.ReorderIndices) &&
	TTI->isLegalStridedLoadStore(VecTy, CommonAlignment)) {
	SmallVector<int> Mask;
	inversePermutation(E.ReorderIndices, Mask);
	auto *BaseLI = cast<LoadInst>(E.Scalars.back());
	InstructionCost OriginalVecCost =
	TTI->getMemoryOpCost(Instruction::Load, VecTy, BaseLI->getAlign(),
	BaseLI->getPointerAddressSpace(), CostKind,
	TTI::OperandValueInfo()) +
	::getShuffleCost(*TTI, TTI::SK_Reverse, VecTy, Mask, CostKind);
	InstructionCost StridedCost = TTI->getStridedMemoryOpCost(
	Instruction::Load, VecTy, BaseLI->getPointerOperand(),
	/VariableMask=/false, CommonAlignment, CostKind, BaseLI);
	if (StridedCost < OriginalVecCost)
	// Strided load is more profitable than consecutive load + reverse -
	// transform the node to strided load.
	E.State = TreeEntry::StridedVectorize;
	}
	break;
	}
	case Instruction::Store: {
	Type *ScalarTy =
	cast<StoreInst>(E.getMainOp())->getValueOperand()->getType();
	auto *VecTy = getWidenedType(ScalarTy, E.Scalars.size());
	Align CommonAlignment = computeCommonAlignment<StoreInst>(E.Scalars);
	// Check if profitable to represent consecutive load + reverse as strided
	// load with stride -1.
	if (isReverseOrder(E.ReorderIndices) &&
	TTI->isLegalStridedLoadStore(VecTy, CommonAlignment)) {
	SmallVector<int> Mask;
	inversePermutation(E.ReorderIndices, Mask);
	auto *BaseSI = cast<StoreInst>(E.Scalars.back());
	InstructionCost OriginalVecCost =
	TTI->getMemoryOpCost(Instruction::Store, VecTy, BaseSI->getAlign(),
	BaseSI->getPointerAddressSpace(), CostKind,
	TTI::OperandValueInfo()) +
	::getShuffleCost(*TTI, TTI::SK_Reverse, VecTy, Mask, CostKind);
	InstructionCost StridedCost = TTI->getStridedMemoryOpCost(
	Instruction::Store, VecTy, BaseSI->getPointerOperand(),
	/VariableMask=/false, CommonAlignment, CostKind, BaseSI);
	if (StridedCost < OriginalVecCost)
	// Strided load is more profitable than consecutive load + reverse -
	// transform the node to strided load.
	E.State = TreeEntry::StridedVectorize;
	}
	break;
	}
	default:
	break;
	}
	}
	}

	/// Merges shuffle masks and emits final shuffle instruction, if required. It
	/// supports shuffling of 2 input vectors. It implements lazy shuffles emission,
	/// when the actual shuffle instruction is generated only if this is actually
	/// required. Otherwise, the shuffle instruction emission is delayed till the
	/// end of the process, to reduce the number of emitted instructions and further
	/// analysis/transformations.
	class BoUpSLP::ShuffleCostEstimator : public BaseShuffleAnalysis {
	bool IsFinalized = false;
	SmallVector<int> CommonMask;
	SmallVector<PointerUnion<Value , const TreeEntry >, 2> InVectors;
	Type *ScalarTy = nullptr;
	const TargetTransformInfo &TTI;
	InstructionCost Cost = 0;
	SmallDenseSet<Value *> VectorizedVals;
	BoUpSLP &R;
	SmallPtrSetImpl<Value *> &CheckedExtracts;
	constexpr static TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
	/// While set, still trying to estimate the cost for the same nodes and we
	/// can delay actual cost estimation (virtual shuffle instruction emission).
	/// May help better estimate the cost if same nodes must be permuted + allows
	/// to move most of the long shuffles cost estimation to TTI.
	bool SameNodesEstimated = true;

	static Constant getAllOnesValue(const DataLayout &DL, Type Ty) {
	if (Ty->getScalarType()->isPointerTy()) {
	Constant *Res = ConstantExpr::getIntToPtr(
	ConstantInt::getAllOnesValue(
	IntegerType::get(Ty->getContext(),
	DL.getTypeStoreSizeInBits(Ty->getScalarType()))),
	Ty->getScalarType());
	if (auto *VTy = dyn_cast<VectorType>(Ty))
	Res = ConstantVector::getSplat(VTy->getElementCount(), Res);
	return Res;
	}
	return Constant::getAllOnesValue(Ty);
	}

	InstructionCost getBuildVectorCost(ArrayRef<Value > VL, Value Root) {
	if ((!Root && allConstant(VL)) \|\| all_of(VL, IsaPred<UndefValue>))
	return TTI::TCC_Free;
	auto *VecTy = getWidenedType(ScalarTy, VL.size());
	InstructionCost GatherCost = 0;
	SmallVector<Value *> Gathers(VL.begin(), VL.end());
	// Improve gather cost for gather of loads, if we can group some of the
	// loads into vector loads.
	InstructionsState S = getSameOpcode(VL, *R.TLI);
	const unsigned Sz = R.DL->getTypeSizeInBits(ScalarTy);
	unsigned MinVF = R.getMinVF(2 * Sz);
	if (VL.size() > 2 &&
	((S.getOpcode() == Instruction::Load && !S.isAltShuffle()) \|\|
	(InVectors.empty() &&
	any_of(seq<unsigned>(0, VL.size() / MinVF),
	[&](unsigned Idx) {
	ArrayRef<Value > SubVL = VL.slice(Idx MinVF, MinVF);
	InstructionsState S = getSameOpcode(SubVL, *R.TLI);
	return S.getOpcode() == Instruction::Load &&
	!S.isAltShuffle();
	}))) &&
	!all_of(Gathers, [&](Value *V) { return R.getTreeEntry(V); }) &&
	!isSplat(Gathers)) {
	InstructionCost BaseCost = R.getGatherCost(Gathers, !Root, ScalarTy);
	SetVector<Value *> VectorizedLoads;
	SmallVector<std::pair<unsigned, LoadsState>> VectorizedStarts;
	SmallVector<unsigned> ScatterVectorized;
	unsigned StartIdx = 0;
	unsigned VF = VL.size() / 2;
	for (; VF >= MinVF; VF /= 2) {
	for (unsigned Cnt = StartIdx, End = VL.size(); Cnt + VF <= End;
	Cnt += VF) {
	ArrayRef<Value *> Slice = VL.slice(Cnt, VF);
	if (S.getOpcode() != Instruction::Load \|\| S.isAltShuffle()) {
	InstructionsState SliceS = getSameOpcode(Slice, *R.TLI);
	if (SliceS.getOpcode() != Instruction::Load \|\|
	SliceS.isAltShuffle())
	continue;
	}
	if (!VectorizedLoads.count(Slice.front()) &&
	!VectorizedLoads.count(Slice.back()) && allSameBlock(Slice)) {
	SmallVector<Value *> PointerOps;
	OrdersType CurrentOrder;
	LoadsState LS = R.canVectorizeLoads(Slice, Slice.front(),
	CurrentOrder, PointerOps);
	switch (LS) {
	case LoadsState::Vectorize:
	case LoadsState::ScatterVectorize:
	case LoadsState::StridedVectorize:
	// Mark the vectorized loads so that we don't vectorize them
	// again.
	// TODO: better handling of loads with reorders.
	if (((LS == LoadsState::Vectorize \|\|
	LS == LoadsState::StridedVectorize) &&
	CurrentOrder.empty()) \|\|
	(LS == LoadsState::StridedVectorize &&
	isReverseOrder(CurrentOrder)))
	VectorizedStarts.emplace_back(Cnt, LS);
	else
	ScatterVectorized.push_back(Cnt);
	VectorizedLoads.insert(Slice.begin(), Slice.end());
	// If we vectorized initial block, no need to try to vectorize
	// it again.
	if (Cnt == StartIdx)
	StartIdx += VF;
	break;
	case LoadsState::Gather:
	break;
	}
	}
	}
	// Check if the whole array was vectorized already - exit.
	if (StartIdx >= VL.size())
	break;
	// Found vectorizable parts - exit.
	if (!VectorizedLoads.empty())
	break;
	}
	if (!VectorizedLoads.empty()) {
	unsigned NumParts = TTI.getNumberOfParts(VecTy);
	bool NeedInsertSubvectorAnalysis =
	!NumParts \|\| (VL.size() / VF) > NumParts;
	// Get the cost for gathered loads.
	for (unsigned I = 0, End = VL.size(); I < End; I += VF) {
	if (VectorizedLoads.contains(VL[I]))
	continue;
	GatherCost +=
	getBuildVectorCost(VL.slice(I, std::min(End - I, VF)), Root);
	}
	// Exclude potentially vectorized loads from list of gathered
	// scalars.
	Gathers.assign(Gathers.size(), PoisonValue::get(VL.front()->getType()));
	// The cost for vectorized loads.
	InstructionCost ScalarsCost = 0;
	for (Value *V : VectorizedLoads) {
	auto *LI = cast<LoadInst>(V);
	ScalarsCost +=
	TTI.getMemoryOpCost(Instruction::Load, LI->getType(),
	LI->getAlign(), LI->getPointerAddressSpace(),
	CostKind, TTI::OperandValueInfo(), LI);
	}
	auto *LoadTy = getWidenedType(VL.front()->getType(), VF);
	for (const std::pair<unsigned, LoadsState> &P : VectorizedStarts) {
	auto *LI = cast<LoadInst>(VL[P.first]);
	Align Alignment = LI->getAlign();
	GatherCost +=
	P.second == LoadsState::Vectorize
	? TTI.getMemoryOpCost(Instruction::Load, LoadTy, Alignment,
	LI->getPointerAddressSpace(), CostKind,
	TTI::OperandValueInfo(), LI)
	: TTI.getStridedMemoryOpCost(
	Instruction::Load, LoadTy, LI->getPointerOperand(),
	/VariableMask=/false, Alignment, CostKind, LI);
	// Estimate GEP cost.
	SmallVector<Value *> PointerOps(VF);
	for (auto [I, V] : enumerate(VL.slice(P.first, VF)))
	PointerOps[I] = cast<LoadInst>(V)->getPointerOperand();
	auto [ScalarGEPCost, VectorGEPCost] =
	getGEPCosts(TTI, PointerOps, LI->getPointerOperand(),
	Instruction::Load, CostKind, LI->getType(), LoadTy);
	GatherCost += VectorGEPCost - ScalarGEPCost;
	}
	for (unsigned P : ScatterVectorized) {
	auto *LI0 = cast<LoadInst>(VL[P]);
	ArrayRef<Value *> Slice = VL.slice(P, VF);
	Align CommonAlignment = computeCommonAlignment<LoadInst>(Slice);
	GatherCost += TTI.getGatherScatterOpCost(
	Instruction::Load, LoadTy, LI0->getPointerOperand(),
	/VariableMask=/false, CommonAlignment, CostKind, LI0);
	// Estimate GEP cost.
	SmallVector<Value *> PointerOps(VF);
	for (auto [I, V] : enumerate(Slice))
	PointerOps[I] = cast<LoadInst>(V)->getPointerOperand();
	OrdersType Order;
	if (sortPtrAccesses(PointerOps, LI0->getType(), R.DL, R.SE,
	Order)) {
	// TODO: improve checks if GEPs can be vectorized.
	Value *Ptr0 = PointerOps.front();
	Type *ScalarTy = Ptr0->getType();
	auto *VecTy = getWidenedType(ScalarTy, VF);
	auto [ScalarGEPCost, VectorGEPCost] =
	getGEPCosts(TTI, PointerOps, Ptr0, Instruction::GetElementPtr,
	CostKind, ScalarTy, VecTy);
	GatherCost += VectorGEPCost - ScalarGEPCost;
	if (!Order.empty()) {
	SmallVector<int> Mask;
	inversePermutation(Order, Mask);
	GatherCost += ::getShuffleCost(TTI, TTI::SK_PermuteSingleSrc,
	VecTy, Mask, CostKind);
	}
	} else {
	GatherCost += R.getGatherCost(PointerOps, /ForPoisonSrc=/true,
	PointerOps.front()->getType());
	}
	}
	if (NeedInsertSubvectorAnalysis) {
	// Add the cost for the subvectors insert.
	SmallVector<int> ShuffleMask(VL.size());
	for (unsigned I = VF, E = VL.size(); I < E; I += VF) {
	for (unsigned Idx : seq<unsigned>(0, E))
	ShuffleMask[Idx] = Idx / VF == I ? E + Idx % VF : Idx;
	GatherCost += TTI.getShuffleCost(TTI::SK_InsertSubvector, VecTy,
	ShuffleMask, CostKind, I, LoadTy);
	}
	}
	GatherCost -= ScalarsCost;
	}
	GatherCost = std::min(BaseCost, GatherCost);
	} else if (!Root && isSplat(VL)) {
	// Found the broadcasting of the single scalar, calculate the cost as
	// the broadcast.
	const auto *It = find_if_not(VL, IsaPred<UndefValue>);
	assert(It != VL.end() && "Expected at least one non-undef value.");
	// Add broadcast for non-identity shuffle only.
	bool NeedShuffle =
	count(VL, *It) > 1 &&
	(VL.front() != *It \|\| !all_of(VL.drop_front(), IsaPred<UndefValue>));
	if (!NeedShuffle)
	return TTI.getVectorInstrCost(Instruction::InsertElement, VecTy,
	CostKind, std::distance(VL.begin(), It),
	PoisonValue::get(VecTy), *It);

	SmallVector<int> ShuffleMask(VL.size(), PoisonMaskElem);
	transform(VL, ShuffleMask.begin(), [](Value *V) {
	return isa<PoisonValue>(V) ? PoisonMaskElem : 0;
	});
	InstructionCost InsertCost =
	TTI.getVectorInstrCost(Instruction::InsertElement, VecTy, CostKind, 0,
	PoisonValue::get(VecTy), *It);
	return InsertCost + TTI.getShuffleCost(TargetTransformInfo::SK_Broadcast,
	VecTy, ShuffleMask, CostKind,
	/Index=/0, /SubTp=/nullptr,
	/Args=/*It);
	}
	return GatherCost +
	(all_of(Gathers, IsaPred<UndefValue>)
	? TTI::TCC_Free
	: R.getGatherCost(Gathers, !Root && VL.equals(Gathers),
	ScalarTy));
	};

	/// Compute the cost of creating a vector containing the extracted values from
	/// \p VL.
	InstructionCost
	computeExtractCost(ArrayRef<Value *> VL, ArrayRef<int> Mask,
	ArrayRef<std::optional<TTI::ShuffleKind>> ShuffleKinds,
	unsigned NumParts) {
	assert(VL.size() > NumParts && "Unexpected scalarized shuffle.");
	unsigned NumElts =
	std::accumulate(VL.begin(), VL.end(), 0, [](unsigned Sz, Value *V) {
	auto *EE = dyn_cast<ExtractElementInst>(V);
	if (!EE)
	return Sz;
	auto *VecTy = dyn_cast<FixedVectorType>(EE->getVectorOperandType());
	if (!VecTy)
	return Sz;
	return std::max(Sz, VecTy->getNumElements());
	});
	// FIXME: this must be moved to TTI for better estimation.
	unsigned EltsPerVector = getPartNumElems(VL.size(), NumParts);
	auto CheckPerRegistersShuffle = [&](MutableArrayRef<int> Mask,
	SmallVectorImpl<unsigned> &Indices)
	-> std::optional<TTI::ShuffleKind> {
	if (NumElts <= EltsPerVector)
	return std::nullopt;
	int OffsetReg0 =
	alignDown(std::accumulate(Mask.begin(), Mask.end(), INT_MAX,
	[](int S, int I) {
	if (I == PoisonMaskElem)
	return S;
	return std::min(S, I);
	}),
	EltsPerVector);
	int OffsetReg1 = OffsetReg0;
	DenseSet<int> RegIndices;
	// Check that if trying to permute same single/2 input vectors.
	TTI::ShuffleKind ShuffleKind = TTI::SK_PermuteSingleSrc;
	int FirstRegId = -1;
	Indices.assign(1, OffsetReg0);
	for (auto [Pos, I] : enumerate(Mask)) {
	if (I == PoisonMaskElem)
	continue;
	int Idx = I - OffsetReg0;
	int RegId =
	(Idx / NumElts) * NumParts + (Idx % NumElts) / EltsPerVector;
	if (FirstRegId < 0)
	FirstRegId = RegId;
	RegIndices.insert(RegId);
	if (RegIndices.size() > 2)
	return std::nullopt;
	if (RegIndices.size() == 2) {
	ShuffleKind = TTI::SK_PermuteTwoSrc;
	if (Indices.size() == 1) {
	OffsetReg1 = alignDown(
	std::accumulate(
	std::next(Mask.begin(), Pos), Mask.end(), INT_MAX,
	[&](int S, int I) {
	if (I == PoisonMaskElem)
	return S;
	int RegId = ((I - OffsetReg0) / NumElts) * NumParts +
	((I - OffsetReg0) % NumElts) / EltsPerVector;
	if (RegId == FirstRegId)
	return S;
	return std::min(S, I);
	}),
	EltsPerVector);
	Indices.push_back(OffsetReg1 % NumElts);
	}
	Idx = I - OffsetReg1;
	}
	I = (Idx % NumElts) % EltsPerVector +
	(RegId == FirstRegId ? 0 : EltsPerVector);
	}
	return ShuffleKind;
	};
	InstructionCost Cost = 0;

	// Process extracts in blocks of EltsPerVector to check if the source vector
	// operand can be re-used directly. If not, add the cost of creating a
	// shuffle to extract the values into a vector register.
	for (unsigned Part : seq<unsigned>(NumParts)) {
	if (!ShuffleKinds[Part])
	continue;
	ArrayRef<int> MaskSlice = Mask.slice(
	Part * EltsPerVector, getNumElems(Mask.size(), EltsPerVector, Part));
	SmallVector<int> SubMask(EltsPerVector, PoisonMaskElem);
	copy(MaskSlice, SubMask.begin());
	SmallVector<unsigned, 2> Indices;
	std::optional<TTI::ShuffleKind> RegShuffleKind =
	CheckPerRegistersShuffle(SubMask, Indices);
	if (!RegShuffleKind) {
	if (*ShuffleKinds[Part] != TTI::SK_PermuteSingleSrc \|\|
	!ShuffleVectorInst::isIdentityMask(
	MaskSlice, std::max<unsigned>(NumElts, MaskSlice.size())))
	Cost +=
	::getShuffleCost(TTI, *ShuffleKinds[Part],
	getWidenedType(ScalarTy, NumElts), MaskSlice);
	continue;
	}
	if (*RegShuffleKind != TTI::SK_PermuteSingleSrc \|\|
	!ShuffleVectorInst::isIdentityMask(SubMask, EltsPerVector)) {
	Cost +=
	::getShuffleCost(TTI, *RegShuffleKind,
	getWidenedType(ScalarTy, EltsPerVector), SubMask);
	}
	for (unsigned Idx : Indices) {
	assert((Idx + EltsPerVector) <= alignTo(NumElts, EltsPerVector) &&
	"SK_ExtractSubvector index out of range");
	Cost += ::getShuffleCost(
	TTI, TTI::SK_ExtractSubvector,
	getWidenedType(ScalarTy, alignTo(NumElts, EltsPerVector)),
	std::nullopt, CostKind, Idx,
	getWidenedType(ScalarTy, EltsPerVector));
	}
	// Second attempt to check, if just a permute is better estimated than
	// subvector extract.
	SubMask.assign(NumElts, PoisonMaskElem);
	copy(MaskSlice, SubMask.begin());
	InstructionCost OriginalCost = ::getShuffleCost(
	TTI, *ShuffleKinds[Part], getWidenedType(ScalarTy, NumElts), SubMask);
	if (OriginalCost < Cost)
	Cost = OriginalCost;
	}
	return Cost;
	}
	/// Transforms mask \p CommonMask per given \p Mask to make proper set after
	/// shuffle emission.
	static void transformMaskAfterShuffle(MutableArrayRef<int> CommonMask,
	ArrayRef<int> Mask) {
	for (unsigned Idx = 0, Sz = CommonMask.size(); Idx < Sz; ++Idx)
	if (Mask[Idx] != PoisonMaskElem)
	CommonMask[Idx] = Idx;
	}
	/// Adds the cost of reshuffling \p E1 and \p E2 (if present), using given
	/// mask \p Mask, register number \p Part, that includes \p SliceSize
	/// elements.
	void estimateNodesPermuteCost(const TreeEntry &E1, const TreeEntry *E2,
	ArrayRef<int> Mask, unsigned Part,
	unsigned SliceSize) {
	if (SameNodesEstimated) {
	// Delay the cost estimation if the same nodes are reshuffling.
	// If we already requested the cost of reshuffling of E1 and E2 before, no
	// need to estimate another cost with the sub-Mask, instead include this
	// sub-Mask into the CommonMask to estimate it later and avoid double cost
	// estimation.
	if ((InVectors.size() == 2 &&
	InVectors.front().get<const TreeEntry *>() == &E1 &&
	InVectors.back().get<const TreeEntry *>() == E2) \|\|
	(!E2 && InVectors.front().get<const TreeEntry *>() == &E1)) {
	unsigned Limit = getNumElems(Mask.size(), SliceSize, Part);
	assert(all_of(ArrayRef(CommonMask).slice(Part * SliceSize, Limit),
	[](int Idx) { return Idx == PoisonMaskElem; }) &&
	"Expected all poisoned elements.");
	ArrayRef<int> SubMask = ArrayRef(Mask).slice(Part * SliceSize, Limit);
	copy(SubMask, std::next(CommonMask.begin(), SliceSize * Part));
	return;
	}
	// Found non-matching nodes - need to estimate the cost for the matched
	// and transform mask.
	Cost += createShuffle(InVectors.front(),
	InVectors.size() == 1 ? nullptr : InVectors.back(),
	CommonMask);
	transformMaskAfterShuffle(CommonMask, CommonMask);
	}
	SameNodesEstimated = false;
	if (!E2 && InVectors.size() == 1) {
	unsigned VF = E1.getVectorFactor();
	if (Value V1 = InVectors.front().dyn_cast<Value >()) {
	VF = std::max(VF,
	cast<FixedVectorType>(V1->getType())->getNumElements());
	} else {
	const auto E = InVectors.front().get<const TreeEntry >();
	VF = std::max(VF, E->getVectorFactor());
	}
	for (unsigned Idx = 0, Sz = CommonMask.size(); Idx < Sz; ++Idx)
	if (Mask[Idx] != PoisonMaskElem && CommonMask[Idx] == PoisonMaskElem)
	CommonMask[Idx] = Mask[Idx] + VF;
	Cost += createShuffle(InVectors.front(), &E1, CommonMask);
	transformMaskAfterShuffle(CommonMask, CommonMask);
	} else {
	Cost += createShuffle(&E1, E2, Mask);
	transformMaskAfterShuffle(CommonMask, Mask);
	}
	}

	class ShuffleCostBuilder {
	const TargetTransformInfo &TTI;

	static bool isEmptyOrIdentity(ArrayRef<int> Mask, unsigned VF) {
	int Index = -1;
	return Mask.empty() \|\|
	(VF == Mask.size() &&
	ShuffleVectorInst::isIdentityMask(Mask, VF)) \|\|
	(ShuffleVectorInst::isExtractSubvectorMask(Mask, VF, Index) &&
	Index == 0);
	}

	public:
	ShuffleCostBuilder(const TargetTransformInfo &TTI) : TTI(TTI) {}
	~ShuffleCostBuilder() = default;
	InstructionCost createShuffleVector(Value V1, Value ,
	ArrayRef<int> Mask) const {
	// Empty mask or identity mask are free.
	unsigned VF =
	cast<VectorType>(V1->getType())->getElementCount().getKnownMinValue();
	if (isEmptyOrIdentity(Mask, VF))
	return TTI::TCC_Free;
	return ::getShuffleCost(TTI, TTI::SK_PermuteTwoSrc,
	cast<VectorType>(V1->getType()), Mask);
	}
	InstructionCost createShuffleVector(Value *V1, ArrayRef<int> Mask) const {
	// Empty mask or identity mask are free.
	unsigned VF =
	cast<VectorType>(V1->getType())->getElementCount().getKnownMinValue();
	if (isEmptyOrIdentity(Mask, VF))
	return TTI::TCC_Free;
	return TTI.getShuffleCost(TTI::SK_PermuteSingleSrc,
	cast<VectorType>(V1->getType()), Mask);
	}
	InstructionCost createIdentity(Value *) const { return TTI::TCC_Free; }
	InstructionCost createPoison(Type *Ty, unsigned VF) const {
	return TTI::TCC_Free;
	}
	void resizeToMatch(Value &, Value &) const {}
	};

	/// Smart shuffle instruction emission, walks through shuffles trees and
	/// tries to find the best matching vector for the actual shuffle
	/// instruction.
	InstructionCost
	createShuffle(const PointerUnion<Value , const TreeEntry > &P1,
	const PointerUnion<Value , const TreeEntry > &P2,
	ArrayRef<int> Mask) {
	ShuffleCostBuilder Builder(TTI);
	SmallVector<int> CommonMask(Mask.begin(), Mask.end());
	Value V1 = P1.dyn_cast<Value >(), V2 = P2.dyn_cast<Value >();
	unsigned CommonVF = Mask.size();
	InstructionCost ExtraCost = 0;
	auto GetNodeMinBWAffectedCost = [&](const TreeEntry &E,
	unsigned VF) -> InstructionCost {
	if (E.isGather() && allConstant(E.Scalars))
	return TTI::TCC_Free;
	Type *EScalarTy = E.Scalars.front()->getType();
	bool IsSigned = true;
	if (auto It = R.MinBWs.find(&E); It != R.MinBWs.end()) {
	EScalarTy = IntegerType::get(EScalarTy->getContext(), It->second.first);
	IsSigned = It->second.second;
	}
	if (EScalarTy != ScalarTy) {
	unsigned CastOpcode = Instruction::Trunc;
	unsigned DstSz = R.DL->getTypeSizeInBits(ScalarTy);
	unsigned SrcSz = R.DL->getTypeSizeInBits(EScalarTy);
	if (DstSz > SrcSz)
	CastOpcode = IsSigned ? Instruction::SExt : Instruction::ZExt;
	return TTI.getCastInstrCost(CastOpcode, getWidenedType(ScalarTy, VF),
	getWidenedType(EScalarTy, VF),
	TTI::CastContextHint::None, CostKind);
	}
	return TTI::TCC_Free;
	};
	auto GetValueMinBWAffectedCost = [&](const Value *V) -> InstructionCost {
	if (isa<Constant>(V))
	return TTI::TCC_Free;
	auto *VecTy = cast<VectorType>(V->getType());
	Type *EScalarTy = VecTy->getElementType();
	if (EScalarTy != ScalarTy) {
	bool IsSigned = !isKnownNonNegative(V, SimplifyQuery(*R.DL));
	unsigned CastOpcode = Instruction::Trunc;
	unsigned DstSz = R.DL->getTypeSizeInBits(ScalarTy);
	unsigned SrcSz = R.DL->getTypeSizeInBits(EScalarTy);
	if (DstSz > SrcSz)
	CastOpcode = IsSigned ? Instruction::SExt : Instruction::ZExt;
	return TTI.getCastInstrCost(
	CastOpcode, VectorType::get(ScalarTy, VecTy->getElementCount()),
	VecTy, TTI::CastContextHint::None, CostKind);
	}
	return TTI::TCC_Free;
	};
	if (!V1 && !V2 && !P2.isNull()) {
	// Shuffle 2 entry nodes.
	const TreeEntry E = P1.get<const TreeEntry >();
	unsigned VF = E->getVectorFactor();
	const TreeEntry E2 = P2.get<const TreeEntry >();
	CommonVF = std::max(VF, E2->getVectorFactor());
	assert(all_of(Mask,
	[=](int Idx) {
	return Idx < 2 * static_cast<int>(CommonVF);
	}) &&
	"All elements in mask must be less than 2 * CommonVF.");
	if (E->Scalars.size() == E2->Scalars.size()) {
	SmallVector<int> EMask = E->getCommonMask();
	SmallVector<int> E2Mask = E2->getCommonMask();
	if (!EMask.empty() \|\| !E2Mask.empty()) {
	for (int &Idx : CommonMask) {
	if (Idx == PoisonMaskElem)
	continue;
	if (Idx < static_cast<int>(CommonVF) && !EMask.empty())
	Idx = EMask[Idx];
	else if (Idx >= static_cast<int>(CommonVF))
	Idx = (E2Mask.empty() ? Idx - CommonVF : E2Mask[Idx - CommonVF]) +
	E->Scalars.size();
	}
	}
	CommonVF = E->Scalars.size();
	ExtraCost += GetNodeMinBWAffectedCost(*E, CommonVF) +
	GetNodeMinBWAffectedCost(*E2, CommonVF);
	} else {
	ExtraCost += GetNodeMinBWAffectedCost(*E, E->getVectorFactor()) +
	GetNodeMinBWAffectedCost(*E2, E2->getVectorFactor());
	}
	V1 = Constant::getNullValue(getWidenedType(ScalarTy, CommonVF));
	V2 = getAllOnesValue(*R.DL, getWidenedType(ScalarTy, CommonVF));
	} else if (!V1 && P2.isNull()) {
	// Shuffle single entry node.
	const TreeEntry E = P1.get<const TreeEntry >();
	unsigned VF = E->getVectorFactor();
	CommonVF = VF;
	assert(
	all_of(Mask,
	[=](int Idx) { return Idx < static_cast<int>(CommonVF); }) &&
	"All elements in mask must be less than CommonVF.");
	if (E->Scalars.size() == Mask.size() && VF != Mask.size()) {
	SmallVector<int> EMask = E->getCommonMask();
	assert(!EMask.empty() && "Expected non-empty common mask.");
	for (int &Idx : CommonMask) {
	if (Idx != PoisonMaskElem)
	Idx = EMask[Idx];
	}
	CommonVF = E->Scalars.size();
	}
	ExtraCost += GetNodeMinBWAffectedCost(*E, CommonVF);
	V1 = Constant::getNullValue(getWidenedType(ScalarTy, CommonVF));
	// Not identity/broadcast? Try to see if the original vector is better.
	if (!E->ReorderIndices.empty() && CommonVF == E->ReorderIndices.size() &&
	CommonVF == CommonMask.size() &&
	any_of(enumerate(CommonMask),
	[](const auto &&P) {
	return P.value() != PoisonMaskElem &&
	static_cast<unsigned>(P.value()) != P.index();
	}) &&
	any_of(CommonMask,
	[](int Idx) { return Idx != PoisonMaskElem && Idx != 0; })) {
	SmallVector<int> ReorderMask;
	inversePermutation(E->ReorderIndices, ReorderMask);
	::addMask(CommonMask, ReorderMask);
	}
	} else if (V1 && P2.isNull()) {
	// Shuffle single vector.
	ExtraCost += GetValueMinBWAffectedCost(V1);
	CommonVF = cast<FixedVectorType>(V1->getType())->getNumElements();
	assert(
	all_of(Mask,
	[=](int Idx) { return Idx < static_cast<int>(CommonVF); }) &&
	"All elements in mask must be less than CommonVF.");
	} else if (V1 && !V2) {
	// Shuffle vector and tree node.
	unsigned VF = cast<FixedVectorType>(V1->getType())->getNumElements();
	const TreeEntry E2 = P2.get<const TreeEntry >();
	CommonVF = std::max(VF, E2->getVectorFactor());
	assert(all_of(Mask,
	[=](int Idx) {
	return Idx < 2 * static_cast<int>(CommonVF);
	}) &&
	"All elements in mask must be less than 2 * CommonVF.");
	if (E2->Scalars.size() == VF && VF != CommonVF) {
	SmallVector<int> E2Mask = E2->getCommonMask();
	assert(!E2Mask.empty() && "Expected non-empty common mask.");
	for (int &Idx : CommonMask) {
	if (Idx == PoisonMaskElem)
	continue;
	if (Idx >= static_cast<int>(CommonVF))
	Idx = E2Mask[Idx - CommonVF] + VF;
	}
	CommonVF = VF;
	}
	ExtraCost += GetValueMinBWAffectedCost(V1);
	V1 = Constant::getNullValue(getWidenedType(ScalarTy, CommonVF));
	ExtraCost += GetNodeMinBWAffectedCost(
	*E2, std::min(CommonVF, E2->getVectorFactor()));
	V2 = getAllOnesValue(*R.DL, getWidenedType(ScalarTy, CommonVF));
	} else if (!V1 && V2) {
	// Shuffle vector and tree node.
	unsigned VF = cast<FixedVectorType>(V2->getType())->getNumElements();
	const TreeEntry E1 = P1.get<const TreeEntry >();
	CommonVF = std::max(VF, E1->getVectorFactor());
	assert(all_of(Mask,
	[=](int Idx) {
	return Idx < 2 * static_cast<int>(CommonVF);
	}) &&
	"All elements in mask must be less than 2 * CommonVF.");
	if (E1->Scalars.size() == VF && VF != CommonVF) {
	SmallVector<int> E1Mask = E1->getCommonMask();
	assert(!E1Mask.empty() && "Expected non-empty common mask.");
	for (int &Idx : CommonMask) {
	if (Idx == PoisonMaskElem)
	continue;
	if (Idx >= static_cast<int>(CommonVF))
	Idx = E1Mask[Idx - CommonVF] + VF;
	else
	Idx = E1Mask[Idx];
	}
	CommonVF = VF;
	}
	ExtraCost += GetNodeMinBWAffectedCost(
	*E1, std::min(CommonVF, E1->getVectorFactor()));
	V1 = Constant::getNullValue(getWidenedType(ScalarTy, CommonVF));
	ExtraCost += GetValueMinBWAffectedCost(V2);
	V2 = getAllOnesValue(*R.DL, getWidenedType(ScalarTy, CommonVF));
	} else {
	assert(V1 && V2 && "Expected both vectors.");
	unsigned VF = cast<FixedVectorType>(V1->getType())->getNumElements();
	CommonVF =
	std::max(VF, cast<FixedVectorType>(V2->getType())->getNumElements());
	assert(all_of(Mask,
	[=](int Idx) {
	return Idx < 2 * static_cast<int>(CommonVF);
	}) &&
	"All elements in mask must be less than 2 * CommonVF.");
	ExtraCost +=
	GetValueMinBWAffectedCost(V1) + GetValueMinBWAffectedCost(V2);
	if (V1->getType() != V2->getType()) {
	V1 = Constant::getNullValue(getWidenedType(ScalarTy, CommonVF));
	V2 = getAllOnesValue(*R.DL, getWidenedType(ScalarTy, CommonVF));
	} else {
	if (cast<VectorType>(V1->getType())->getElementType() != ScalarTy)
	V1 = Constant::getNullValue(getWidenedType(ScalarTy, CommonVF));
	if (cast<VectorType>(V2->getType())->getElementType() != ScalarTy)
	V2 = getAllOnesValue(*R.DL, getWidenedType(ScalarTy, CommonVF));
	}
	}
	InVectors.front() =
	Constant::getNullValue(getWidenedType(ScalarTy, CommonMask.size()));
	if (InVectors.size() == 2)
	InVectors.pop_back();
	return ExtraCost + BaseShuffleAnalysis::createShuffle<InstructionCost>(
	V1, V2, CommonMask, Builder);
	}

	public:
	ShuffleCostEstimator(Type *ScalarTy, TargetTransformInfo &TTI,
	ArrayRef<Value *> VectorizedVals, BoUpSLP &R,
	SmallPtrSetImpl<Value *> &CheckedExtracts)
	: ScalarTy(ScalarTy), TTI(TTI),
	VectorizedVals(VectorizedVals.begin(), VectorizedVals.end()), R(R),
	CheckedExtracts(CheckedExtracts) {}
	Value adjustExtracts(const TreeEntry E, MutableArrayRef<int> Mask,
	ArrayRef<std::optional<TTI::ShuffleKind>> ShuffleKinds,
	unsigned NumParts, bool &UseVecBaseAsInput) {
	UseVecBaseAsInput = false;
	if (Mask.empty())
	return nullptr;
	Value *VecBase = nullptr;
	ArrayRef<Value *> VL = E->Scalars;
	// If the resulting type is scalarized, do not adjust the cost.
	if (NumParts == VL.size())
	return nullptr;
	// Check if it can be considered reused if same extractelements were
	// vectorized already.
	bool PrevNodeFound = any_of(
	ArrayRef(R.VectorizableTree).take_front(E->Idx),
	[&](const std::unique_ptr<TreeEntry> &TE) {
	return ((!TE->isAltShuffle() &&
	TE->getOpcode() == Instruction::ExtractElement) \|\|
	TE->isGather()) &&
	all_of(enumerate(TE->Scalars), [&](auto &&Data) {
	return VL.size() > Data.index() &&
	(Mask[Data.index()] == PoisonMaskElem \|\|
	isa<UndefValue>(VL[Data.index()]) \|\|
	Data.value() == VL[Data.index()]);
	});
	});
	SmallPtrSet<Value *, 4> UniqueBases;
	unsigned SliceSize = getPartNumElems(VL.size(), NumParts);
	for (unsigned Part : seq<unsigned>(NumParts)) {
	unsigned Limit = getNumElems(VL.size(), SliceSize, Part);
	ArrayRef<int> SubMask = Mask.slice(Part * SliceSize, Limit);
	for (auto [I, V] : enumerate(VL.slice(Part * SliceSize, Limit))) {
	// Ignore non-extractelement scalars.
	if (isa<UndefValue>(V) \|\|
	(!SubMask.empty() && SubMask[I] == PoisonMaskElem))
	continue;
	// If all users of instruction are going to be vectorized and this
	// instruction itself is not going to be vectorized, consider this
	// instruction as dead and remove its cost from the final cost of the
	// vectorized tree.
	// Also, avoid adjusting the cost for extractelements with multiple uses
	// in different graph entries.
	auto *EE = cast<ExtractElementInst>(V);
	VecBase = EE->getVectorOperand();
	UniqueBases.insert(VecBase);
	const TreeEntry *VE = R.getTreeEntry(V);
	if (!CheckedExtracts.insert(V).second \|\|
	!R.areAllUsersVectorized(cast<Instruction>(V), &VectorizedVals) \|\|
	any_of(EE->users(),
	[&](User *U) {
	return isa<GetElementPtrInst>(U) &&
	!R.areAllUsersVectorized(cast<Instruction>(U),
	&VectorizedVals);
	}) \|\|
	(VE && VE != E))
	continue;
	std::optional<unsigned> EEIdx = getExtractIndex(EE);
	if (!EEIdx)
	continue;
	unsigned Idx = *EEIdx;
	// Take credit for instruction that will become dead.
	if (EE->hasOneUse() \|\| !PrevNodeFound) {
	Instruction *Ext = EE->user_back();
	if (isa<SExtInst, ZExtInst>(Ext) &&
	all_of(Ext->users(), IsaPred<GetElementPtrInst>)) {
	// Use getExtractWithExtendCost() to calculate the cost of
	// extractelement/ext pair.
	Cost -=
	TTI.getExtractWithExtendCost(Ext->getOpcode(), Ext->getType(),
	EE->getVectorOperandType(), Idx);
	// Add back the cost of s\|zext which is subtracted separately.
	Cost += TTI.getCastInstrCost(
	Ext->getOpcode(), Ext->getType(), EE->getType(),
	TTI::getCastContextHint(Ext), CostKind, Ext);
	continue;
	}
	}
	Cost -= TTI.getVectorInstrCost(*EE, EE->getVectorOperandType(),
	CostKind, Idx);
	}
	}
	// Check that gather of extractelements can be represented as just a
	// shuffle of a single/two vectors the scalars are extracted from.
	// Found the bunch of extractelement instructions that must be gathered
	// into a vector and can be represented as a permutation elements in a
	// single input vector or of 2 input vectors.
	// Done for reused if same extractelements were vectorized already.
	if (!PrevNodeFound)
	Cost += computeExtractCost(VL, Mask, ShuffleKinds, NumParts);
	InVectors.assign(1, E);
	CommonMask.assign(Mask.begin(), Mask.end());
	transformMaskAfterShuffle(CommonMask, CommonMask);
	SameNodesEstimated = false;
	if (NumParts != 1 && UniqueBases.size() != 1) {
	UseVecBaseAsInput = true;
	VecBase =
	Constant::getNullValue(getWidenedType(ScalarTy, CommonMask.size()));
	}
	return VecBase;
	}
	/// Checks if the specified entry \p E needs to be delayed because of its
	/// dependency nodes.
	std::optional<InstructionCost>
	needToDelay(const TreeEntry *,
	ArrayRef<SmallVector<const TreeEntry *>>) const {
	// No need to delay the cost estimation during analysis.
	return std::nullopt;
	}
	void add(const TreeEntry &E1, const TreeEntry &E2, ArrayRef<int> Mask) {
	if (&E1 == &E2) {
	assert(all_of(Mask,
	[&](int Idx) {
	return Idx < static_cast<int>(E1.getVectorFactor());
	}) &&
	"Expected single vector shuffle mask.");
	add(E1, Mask);
	return;
	}
	if (InVectors.empty()) {
	CommonMask.assign(Mask.begin(), Mask.end());
	InVectors.assign({&E1, &E2});
	return;
	}
	assert(!CommonMask.empty() && "Expected non-empty common mask.");
	auto *MaskVecTy = getWidenedType(ScalarTy, Mask.size());
	unsigned NumParts = TTI.getNumberOfParts(MaskVecTy);
	if (NumParts == 0 \|\| NumParts >= Mask.size())
	NumParts = 1;
	unsigned SliceSize = getPartNumElems(Mask.size(), NumParts);
	const auto *It =
	find_if(Mask, [](int Idx) { return Idx != PoisonMaskElem; });
	unsigned Part = std::distance(Mask.begin(), It) / SliceSize;
	estimateNodesPermuteCost(E1, &E2, Mask, Part, SliceSize);
	}
	void add(const TreeEntry &E1, ArrayRef<int> Mask) {
	if (InVectors.empty()) {
	CommonMask.assign(Mask.begin(), Mask.end());
	InVectors.assign(1, &E1);
	return;
	}
	assert(!CommonMask.empty() && "Expected non-empty common mask.");
	auto *MaskVecTy = getWidenedType(ScalarTy, Mask.size());
	unsigned NumParts = TTI.getNumberOfParts(MaskVecTy);
	if (NumParts == 0 \|\| NumParts >= Mask.size())
	NumParts = 1;
	unsigned SliceSize = getPartNumElems(Mask.size(), NumParts);
	const auto *It =
	find_if(Mask, [](int Idx) { return Idx != PoisonMaskElem; });
	unsigned Part = std::distance(Mask.begin(), It) / SliceSize;
	estimateNodesPermuteCost(E1, nullptr, Mask, Part, SliceSize);
	if (!SameNodesEstimated && InVectors.size() == 1)
	InVectors.emplace_back(&E1);
	}
	/// Adds 2 input vectors and the mask for their shuffling.
	void add(Value V1, Value V2, ArrayRef<int> Mask) {
	// May come only for shuffling of 2 vectors with extractelements, already
	// handled in adjustExtracts.
	assert(InVectors.size() == 1 &&
	all_of(enumerate(CommonMask),
	[&](auto P) {
	if (P.value() == PoisonMaskElem)
	return Mask[P.index()] == PoisonMaskElem;
	auto *EI =
	cast<ExtractElementInst>(InVectors.front()
	.get<const TreeEntry *>()
	->Scalars[P.index()]);
	return EI->getVectorOperand() == V1 \|\|
	EI->getVectorOperand() == V2;
	}) &&
	"Expected extractelement vectors.");
	}
	/// Adds another one input vector and the mask for the shuffling.
	void add(Value *V1, ArrayRef<int> Mask, bool ForExtracts = false) {
	if (InVectors.empty()) {
	assert(CommonMask.empty() && !ForExtracts &&
	"Expected empty input mask/vectors.");
	CommonMask.assign(Mask.begin(), Mask.end());
	InVectors.assign(1, V1);
	return;
	}
	if (ForExtracts) {
	// No need to add vectors here, already handled them in adjustExtracts.
	assert(InVectors.size() == 1 &&
	InVectors.front().is<const TreeEntry *>() && !CommonMask.empty() &&
	all_of(enumerate(CommonMask),
	[&](auto P) {
	Value *Scalar = InVectors.front()
	.get<const TreeEntry *>()
	->Scalars[P.index()];
	if (P.value() == PoisonMaskElem)
	return P.value() == Mask[P.index()] \|\|
	isa<UndefValue>(Scalar);
	if (isa<Constant>(V1))
	return true;
	auto *EI = cast<ExtractElementInst>(Scalar);
	return EI->getVectorOperand() == V1;
	}) &&
	"Expected only tree entry for extractelement vectors.");
	return;
	}
	assert(!InVectors.empty() && !CommonMask.empty() &&
	"Expected only tree entries from extracts/reused buildvectors.");
	unsigned VF = cast<FixedVectorType>(V1->getType())->getNumElements();
	if (InVectors.size() == 2) {
	Cost += createShuffle(InVectors.front(), InVectors.back(), CommonMask);
	transformMaskAfterShuffle(CommonMask, CommonMask);
	VF = std::max<unsigned>(VF, CommonMask.size());
	} else if (const auto *InTE =
	InVectors.front().dyn_cast<const TreeEntry *>()) {
	VF = std::max(VF, InTE->getVectorFactor());
	} else {
	VF = std::max(
	VF, cast<FixedVectorType>(InVectors.front().get<Value *>()->getType())
	->getNumElements());
	}
	InVectors.push_back(V1);
	for (unsigned Idx = 0, Sz = CommonMask.size(); Idx < Sz; ++Idx)
	if (Mask[Idx] != PoisonMaskElem && CommonMask[Idx] == PoisonMaskElem)
	CommonMask[Idx] = Mask[Idx] + VF;
	}
	Value gather(ArrayRef<Value > VL, unsigned MaskVF = 0,
	Value *Root = nullptr) {
	Cost += getBuildVectorCost(VL, Root);
	if (!Root) {
	// FIXME: Need to find a way to avoid use of getNullValue here.
	SmallVector<Constant *> Vals;
	unsigned VF = VL.size();
	if (MaskVF != 0)
	VF = std::min(VF, MaskVF);
	for (Value *V : VL.take_front(VF)) {
	if (isa<UndefValue>(V)) {
	Vals.push_back(cast<Constant>(V));
	continue;
	}
	Vals.push_back(Constant::getNullValue(V->getType()));
	}
	return ConstantVector::get(Vals);
	}
	return ConstantVector::getSplat(
	ElementCount::getFixed(
	cast<FixedVectorType>(Root->getType())->getNumElements()),
	getAllOnesValue(*R.DL, ScalarTy));
	}
	InstructionCost createFreeze(InstructionCost Cost) { return Cost; }
	/// Finalize emission of the shuffles.
	InstructionCost
	finalize(ArrayRef<int> ExtMask, unsigned VF = 0,
	function_ref<void(Value *&, SmallVectorImpl<int> &)> Action = {}) {
	IsFinalized = true;
	if (Action) {
	const PointerUnion<Value , const TreeEntry > &Vec = InVectors.front();
	if (InVectors.size() == 2)
	Cost += createShuffle(Vec, InVectors.back(), CommonMask);
	else
	Cost += createShuffle(Vec, nullptr, CommonMask);
	for (unsigned Idx = 0, Sz = CommonMask.size(); Idx < Sz; ++Idx)
	if (CommonMask[Idx] != PoisonMaskElem)
	CommonMask[Idx] = Idx;
	assert(VF > 0 &&
	"Expected vector length for the final value before action.");
	Value V = Vec.get<Value >();
	Action(V, CommonMask);
	InVectors.front() = V;
	}
	::addMask(CommonMask, ExtMask, /ExtendingManyInputs=/true);
	if (CommonMask.empty()) {
	assert(InVectors.size() == 1 && "Expected only one vector with no mask");
	return Cost;
	}
	return Cost +
	createShuffle(InVectors.front(),
	InVectors.size() == 2 ? InVectors.back() : nullptr,
	CommonMask);
	}

	~ShuffleCostEstimator() {
	assert((IsFinalized \|\| CommonMask.empty()) &&
	"Shuffle construction must be finalized.");
	}
	};

	const BoUpSLP::TreeEntry BoUpSLP::getOperandEntry(const TreeEntry E,
	unsigned Idx) const {
	Value *Op = E->getOperand(Idx).front();
	if (const TreeEntry *TE = getTreeEntry(Op)) {
	if (find_if(TE->UserTreeIndices, [&](const EdgeInfo &EI) {
	return EI.EdgeIdx == Idx && EI.UserTE == E;
	}) != TE->UserTreeIndices.end())
	return TE;
	auto MIt = MultiNodeScalars.find(Op);
	if (MIt != MultiNodeScalars.end()) {
	for (const TreeEntry *TE : MIt->second) {
	if (find_if(TE->UserTreeIndices, [&](const EdgeInfo &EI) {
	return EI.EdgeIdx == Idx && EI.UserTE == E;
	}) != TE->UserTreeIndices.end())
	return TE;
	}
	}
	}
	const auto *It =
	find_if(VectorizableTree, [&](const std::unique_ptr<TreeEntry> &TE) {
	return TE->isGather() &&
	find_if(TE->UserTreeIndices, [&](const EdgeInfo &EI) {
	return EI.EdgeIdx == Idx && EI.UserTE == E;
	}) != TE->UserTreeIndices.end();
	});
	assert(It != VectorizableTree.end() && "Expected vectorizable entry.");
	return It->get();
	}

	TTI::CastContextHint BoUpSLP::getCastContextHint(const TreeEntry &TE) const {
	if (TE.State == TreeEntry::ScatterVectorize \|\|
	TE.State == TreeEntry::StridedVectorize)
	return TTI::CastContextHint::GatherScatter;
	if (TE.State == TreeEntry::Vectorize && TE.getOpcode() == Instruction::Load &&
	!TE.isAltShuffle()) {
	if (TE.ReorderIndices.empty())
	return TTI::CastContextHint::Normal;
	SmallVector<int> Mask;
	inversePermutation(TE.ReorderIndices, Mask);
	if (ShuffleVectorInst::isReverseMask(Mask, Mask.size()))
	return TTI::CastContextHint::Reversed;
	}
	return TTI::CastContextHint::None;
	}

	/// Builds the arguments types vector for the given call instruction with the
	/// given \p ID for the specified vector factor.
	static SmallVector<Type > buildIntrinsicArgTypes(const CallInst CI,
	const Intrinsic::ID ID,
	const unsigned VF,
	unsigned MinBW) {
	SmallVector<Type *> ArgTys;
	for (auto [Idx, Arg] : enumerate(CI->args())) {
	if (ID != Intrinsic::not_intrinsic) {
	if (isVectorIntrinsicWithScalarOpAtArg(ID, Idx)) {
	ArgTys.push_back(Arg->getType());
	continue;
	}
	if (MinBW > 0) {
	ArgTys.push_back(
	getWidenedType(IntegerType::get(CI->getContext(), MinBW), VF));
	continue;
	}
	}
	ArgTys.push_back(getWidenedType(Arg->getType(), VF));
	}
	return ArgTys;
	}

	InstructionCost
	BoUpSLP::getEntryCost(const TreeEntry E, ArrayRef<Value > VectorizedVals,
	SmallPtrSetImpl<Value *> &CheckedExtracts) {
	ArrayRef<Value *> VL = E->Scalars;

	Type *ScalarTy = VL[0]->getType();
	if (!E->isGather()) {
	if (auto *SI = dyn_cast<StoreInst>(VL[0]))
	ScalarTy = SI->getValueOperand()->getType();
	else if (auto *CI = dyn_cast<CmpInst>(VL[0]))
	ScalarTy = CI->getOperand(0)->getType();
	else if (auto *IE = dyn_cast<InsertElementInst>(VL[0]))
	ScalarTy = IE->getOperand(1)->getType();
	}
	if (!isValidElementType(ScalarTy))
	return InstructionCost::getInvalid();
	TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;

	// If we have computed a smaller type for the expression, update VecTy so
	// that the costs will be accurate.
	auto It = MinBWs.find(E);
	Type *OrigScalarTy = ScalarTy;
	if (It != MinBWs.end())
	ScalarTy = IntegerType::get(F->getContext(), It->second.first);
	auto *VecTy = getWidenedType(ScalarTy, VL.size());
	unsigned EntryVF = E->getVectorFactor();
	auto *FinalVecTy = getWidenedType(ScalarTy, EntryVF);

	bool NeedToShuffleReuses = !E->ReuseShuffleIndices.empty();
	if (E->isGather()) {
	if (allConstant(VL))
	return 0;
	if (isa<InsertElementInst>(VL[0]))
	return InstructionCost::getInvalid();
	return processBuildVector<ShuffleCostEstimator, InstructionCost>(
	E, ScalarTy, TTI, VectorizedVals, this, CheckedExtracts);
	}
	InstructionCost CommonCost = 0;
	SmallVector<int> Mask;
	bool IsReverseOrder = isReverseOrder(E->ReorderIndices);
	if (!E->ReorderIndices.empty() &&
	(E->State != TreeEntry::StridedVectorize \|\| !IsReverseOrder)) {
	SmallVector<int> NewMask;
	if (E->getOpcode() == Instruction::Store) {
	// For stores the order is actually a mask.
	NewMask.resize(E->ReorderIndices.size());
	copy(E->ReorderIndices, NewMask.begin());
	} else {
	inversePermutation(E->ReorderIndices, NewMask);
	}
	::addMask(Mask, NewMask);
	}
	if (NeedToShuffleReuses)
	::addMask(Mask, E->ReuseShuffleIndices);
	if (!Mask.empty() && !ShuffleVectorInst::isIdentityMask(Mask, Mask.size()))
	CommonCost =
	TTI->getShuffleCost(TTI::SK_PermuteSingleSrc, FinalVecTy, Mask);
	assert((E->State == TreeEntry::Vectorize \|\|
	E->State == TreeEntry::ScatterVectorize \|\|
	E->State == TreeEntry::StridedVectorize) &&
	"Unhandled state");
	assert(E->getOpcode() &&
	((allSameType(VL) && allSameBlock(VL)) \|\|
	(E->getOpcode() == Instruction::GetElementPtr &&
	E->getMainOp()->getType()->isPointerTy())) &&
	"Invalid VL");
	Instruction *VL0 = E->getMainOp();
	unsigned ShuffleOrOp =
	E->isAltShuffle() ? (unsigned)Instruction::ShuffleVector : E->getOpcode();
	SetVector<Value *> UniqueValues(VL.begin(), VL.end());
	const unsigned Sz = UniqueValues.size();
	SmallBitVector UsedScalars(Sz, false);
	for (unsigned I = 0; I < Sz; ++I) {
	if (getTreeEntry(UniqueValues[I]) == E)
	continue;
	UsedScalars.set(I);
	}
	auto GetCastContextHint = [&](Value *V) {
	if (const TreeEntry *OpTE = getTreeEntry(V))
	return getCastContextHint(*OpTE);
	InstructionsState SrcState = getSameOpcode(E->getOperand(0), *TLI);
	if (SrcState.getOpcode() == Instruction::Load && !SrcState.isAltShuffle())
	return TTI::CastContextHint::GatherScatter;
	return TTI::CastContextHint::None;
	};
	auto GetCostDiff =
	[=](function_ref<InstructionCost(unsigned)> ScalarEltCost,
	function_ref<InstructionCost(InstructionCost)> VectorCost) {
	// Calculate the cost of this instruction.
	InstructionCost ScalarCost = 0;
	if (isa<CastInst, CallInst>(VL0)) {
	// For some of the instructions no need to calculate cost for each
	// particular instruction, we can use the cost of the single
	// instruction x total number of scalar instructions.
	ScalarCost = (Sz - UsedScalars.count()) * ScalarEltCost(0);
	} else {
	for (unsigned I = 0; I < Sz; ++I) {
	if (UsedScalars.test(I))
	continue;
	ScalarCost += ScalarEltCost(I);
	}
	}

	InstructionCost VecCost = VectorCost(CommonCost);
	// Check if the current node must be resized, if the parent node is not
	// resized.
	if (!UnaryInstruction::isCast(E->getOpcode()) && E->Idx != 0) {
	const EdgeInfo &EI = E->UserTreeIndices.front();
	if ((EI.UserTE->getOpcode() != Instruction::Select \|\|
	EI.EdgeIdx != 0) &&
	It != MinBWs.end()) {
	auto UserBWIt = MinBWs.find(EI.UserTE);
	Type *UserScalarTy =
	EI.UserTE->getOperand(EI.EdgeIdx).front()->getType();
	if (UserBWIt != MinBWs.end())
	UserScalarTy = IntegerType::get(ScalarTy->getContext(),
	UserBWIt->second.first);
	if (ScalarTy != UserScalarTy) {
	unsigned BWSz = DL->getTypeSizeInBits(ScalarTy);
	unsigned SrcBWSz = DL->getTypeSizeInBits(UserScalarTy);
	unsigned VecOpcode;
	auto *UserVecTy =
	getWidenedType(UserScalarTy, E->getVectorFactor());
	if (BWSz > SrcBWSz)
	VecOpcode = Instruction::Trunc;
	else
	VecOpcode =
	It->second.second ? Instruction::SExt : Instruction::ZExt;
	TTI::CastContextHint CCH = GetCastContextHint(VL0);
	VecCost += TTI->getCastInstrCost(VecOpcode, UserVecTy, VecTy, CCH,
	CostKind);
	}
	}
	}
	LLVM_DEBUG(dumpTreeCosts(E, CommonCost, VecCost - CommonCost,
	ScalarCost, "Calculated costs for Tree"));
	return VecCost - ScalarCost;
	};
	// Calculate cost difference from vectorizing set of GEPs.
	// Negative value means vectorizing is profitable.
	auto GetGEPCostDiff = [=](ArrayRef<Value > Ptrs, Value BasePtr) {
	assert((E->State == TreeEntry::Vectorize \|\|
	E->State == TreeEntry::StridedVectorize) &&
	"Entry state expected to be Vectorize or StridedVectorize here.");
	InstructionCost ScalarCost = 0;
	InstructionCost VecCost = 0;
	std::tie(ScalarCost, VecCost) = getGEPCosts(
	*TTI, Ptrs, BasePtr, E->getOpcode(), CostKind, OrigScalarTy, VecTy);
	LLVM_DEBUG(dumpTreeCosts(E, 0, VecCost, ScalarCost,
	"Calculated GEPs cost for Tree"));

	return VecCost - ScalarCost;
	};

	switch (ShuffleOrOp) {
	case Instruction::PHI: {
	// Count reused scalars.
	InstructionCost ScalarCost = 0;
	SmallPtrSet<const TreeEntry *, 4> CountedOps;
	for (Value *V : UniqueValues) {
	auto *PHI = dyn_cast<PHINode>(V);
	if (!PHI)
	continue;

	ValueList Operands(PHI->getNumIncomingValues(), nullptr);
	for (unsigned I = 0, N = PHI->getNumIncomingValues(); I < N; ++I) {
	Value *Op = PHI->getIncomingValue(I);
	Operands[I] = Op;
	}
	if (const TreeEntry *OpTE = getTreeEntry(Operands.front()))
	if (OpTE->isSame(Operands) && CountedOps.insert(OpTE).second)
	if (!OpTE->ReuseShuffleIndices.empty())
	ScalarCost += TTI::TCC_Basic * (OpTE->ReuseShuffleIndices.size() -
	OpTE->Scalars.size());
	}

	return CommonCost - ScalarCost;
	}
	case Instruction::ExtractValue:
	case Instruction::ExtractElement: {
	auto GetScalarCost = [&](unsigned Idx) {
	auto *I = cast<Instruction>(UniqueValues[Idx]);
	VectorType *SrcVecTy;
	if (ShuffleOrOp == Instruction::ExtractElement) {
	auto *EE = cast<ExtractElementInst>(I);
	SrcVecTy = EE->getVectorOperandType();
	} else {
	auto *EV = cast<ExtractValueInst>(I);
	Type *AggregateTy = EV->getAggregateOperand()->getType();
	unsigned NumElts;
	if (auto *ATy = dyn_cast<ArrayType>(AggregateTy))
	NumElts = ATy->getNumElements();
	else
	NumElts = AggregateTy->getStructNumElements();
	SrcVecTy = getWidenedType(OrigScalarTy, NumElts);
	}
	if (I->hasOneUse()) {
	Instruction *Ext = I->user_back();
	if ((isa<SExtInst>(Ext) \|\| isa<ZExtInst>(Ext)) &&
	all_of(Ext->users(), IsaPred<GetElementPtrInst>)) {
	// Use getExtractWithExtendCost() to calculate the cost of
	// extractelement/ext pair.
	InstructionCost Cost = TTI->getExtractWithExtendCost(
	Ext->getOpcode(), Ext->getType(), SrcVecTy, *getExtractIndex(I));
	// Subtract the cost of s\|zext which is subtracted separately.
	Cost -= TTI->getCastInstrCost(
	Ext->getOpcode(), Ext->getType(), I->getType(),
	TTI::getCastContextHint(Ext), CostKind, Ext);
	return Cost;
	}
	}
	return TTI->getVectorInstrCost(Instruction::ExtractElement, SrcVecTy,
	CostKind, *getExtractIndex(I));
	};
	auto GetVectorCost = [](InstructionCost CommonCost) { return CommonCost; };
	return GetCostDiff(GetScalarCost, GetVectorCost);
	}
	case Instruction::InsertElement: {
	assert(E->ReuseShuffleIndices.empty() &&
	"Unique insertelements only are expected.");
	auto *SrcVecTy = cast<FixedVectorType>(VL0->getType());
	unsigned const NumElts = SrcVecTy->getNumElements();
	unsigned const NumScalars = VL.size();

	unsigned NumOfParts = TTI->getNumberOfParts(SrcVecTy);

	SmallVector<int> InsertMask(NumElts, PoisonMaskElem);
	unsigned OffsetBeg = *getElementIndex(VL.front());
	unsigned OffsetEnd = OffsetBeg;
	InsertMask[OffsetBeg] = 0;
	for (auto [I, V] : enumerate(VL.drop_front())) {
	unsigned Idx = *getElementIndex(V);
	if (OffsetBeg > Idx)
	OffsetBeg = Idx;
	else if (OffsetEnd < Idx)
	OffsetEnd = Idx;
	InsertMask[Idx] = I + 1;
	}
	unsigned VecScalarsSz = PowerOf2Ceil(NumElts);
	if (NumOfParts > 0)
	VecScalarsSz = PowerOf2Ceil((NumElts + NumOfParts - 1) / NumOfParts);
	unsigned VecSz = (1 + OffsetEnd / VecScalarsSz - OffsetBeg / VecScalarsSz) *
	VecScalarsSz;
	unsigned Offset = VecScalarsSz * (OffsetBeg / VecScalarsSz);
	unsigned InsertVecSz = std::min<unsigned>(
	PowerOf2Ceil(OffsetEnd - OffsetBeg + 1),
	((OffsetEnd - OffsetBeg + VecScalarsSz) / VecScalarsSz) * VecScalarsSz);
	bool IsWholeSubvector =
	OffsetBeg == Offset && ((OffsetEnd + 1) % VecScalarsSz == 0);
	// Check if we can safely insert a subvector. If it is not possible, just
	// generate a whole-sized vector and shuffle the source vector and the new
	// subvector.
	if (OffsetBeg + InsertVecSz > VecSz) {
	// Align OffsetBeg to generate correct mask.
	OffsetBeg = alignDown(OffsetBeg, VecSz, Offset);
	InsertVecSz = VecSz;
	}

	APInt DemandedElts = APInt::getZero(NumElts);
	// TODO: Add support for Instruction::InsertValue.
	SmallVector<int> Mask;
	if (!E->ReorderIndices.empty()) {
	inversePermutation(E->ReorderIndices, Mask);
	Mask.append(InsertVecSz - Mask.size(), PoisonMaskElem);
	} else {
	Mask.assign(VecSz, PoisonMaskElem);
	std::iota(Mask.begin(), std::next(Mask.begin(), InsertVecSz), 0);
	}
	bool IsIdentity = true;
	SmallVector<int> PrevMask(InsertVecSz, PoisonMaskElem);
	Mask.swap(PrevMask);
	for (unsigned I = 0; I < NumScalars; ++I) {
	unsigned InsertIdx = *getElementIndex(VL[PrevMask[I]]);
	DemandedElts.setBit(InsertIdx);
	IsIdentity &= InsertIdx - OffsetBeg == I;
	Mask[InsertIdx - OffsetBeg] = I;
	}
	assert(Offset < NumElts && "Failed to find vector index offset");

	InstructionCost Cost = 0;
	Cost -= TTI->getScalarizationOverhead(SrcVecTy, DemandedElts,
	/Insert/ true, /Extract/ false,
	CostKind);

	// First cost - resize to actual vector size if not identity shuffle or
	// need to shift the vector.
	// Do not calculate the cost if the actual size is the register size and
	// we can merge this shuffle with the following SK_Select.
	auto *InsertVecTy = getWidenedType(ScalarTy, InsertVecSz);
	if (!IsIdentity)
	Cost += TTI->getShuffleCost(TargetTransformInfo::SK_PermuteSingleSrc,
	InsertVecTy, Mask);
	auto FirstInsert = cast<Instruction>(find_if(E->Scalars, [E](Value *V) {
	return !is_contained(E->Scalars, cast<Instruction>(V)->getOperand(0));
	}));
	// Second cost - permutation with subvector, if some elements are from the
	// initial vector or inserting a subvector.
	// TODO: Implement the analysis of the FirstInsert->getOperand(0)
	// subvector of ActualVecTy.
	SmallBitVector InMask =
	isUndefVector(FirstInsert->getOperand(0),
	buildUseMask(NumElts, InsertMask, UseMask::UndefsAsMask));
	if (!InMask.all() && NumScalars != NumElts && !IsWholeSubvector) {
	if (InsertVecSz != VecSz) {
	auto *ActualVecTy = getWidenedType(ScalarTy, VecSz);
	Cost += TTI->getShuffleCost(TTI::SK_InsertSubvector, ActualVecTy,
	std::nullopt, CostKind, OffsetBeg - Offset,
	InsertVecTy);
	} else {
	for (unsigned I = 0, End = OffsetBeg - Offset; I < End; ++I)
	Mask[I] = InMask.test(I) ? PoisonMaskElem : I;
	for (unsigned I = OffsetBeg - Offset, End = OffsetEnd - Offset;
	I <= End; ++I)
	if (Mask[I] != PoisonMaskElem)
	Mask[I] = I + VecSz;
	for (unsigned I = OffsetEnd + 1 - Offset; I < VecSz; ++I)
	Mask[I] =
	((I >= InMask.size()) \|\| InMask.test(I)) ? PoisonMaskElem : I;
	Cost +=
	::getShuffleCost(*TTI, TTI::SK_PermuteTwoSrc, InsertVecTy, Mask);
	}
	}
	return Cost;
	}
	case Instruction::ZExt:
	case Instruction::SExt:
	case Instruction::FPToUI:
	case Instruction::FPToSI:
	case Instruction::FPExt:
	case Instruction::PtrToInt:
	case Instruction::IntToPtr:
	case Instruction::SIToFP:
	case Instruction::UIToFP:
	case Instruction::Trunc:
	case Instruction::FPTrunc:
	case Instruction::BitCast: {
	auto SrcIt = MinBWs.find(getOperandEntry(E, 0));
	Type *SrcScalarTy = VL0->getOperand(0)->getType();
	auto *SrcVecTy = getWidenedType(SrcScalarTy, VL.size());
	unsigned Opcode = ShuffleOrOp;
	unsigned VecOpcode = Opcode;
	if (!ScalarTy->isFloatingPointTy() && !SrcScalarTy->isFloatingPointTy() &&
	(SrcIt != MinBWs.end() \|\| It != MinBWs.end())) {
	// Check if the values are candidates to demote.
	unsigned SrcBWSz = DL->getTypeSizeInBits(SrcScalarTy);
	if (SrcIt != MinBWs.end()) {
	SrcBWSz = SrcIt->second.first;
	SrcScalarTy = IntegerType::get(F->getContext(), SrcBWSz);
	SrcVecTy = getWidenedType(SrcScalarTy, VL.size());
	}
	unsigned BWSz = DL->getTypeSizeInBits(ScalarTy);
	if (BWSz == SrcBWSz) {
	VecOpcode = Instruction::BitCast;
	} else if (BWSz < SrcBWSz) {
	VecOpcode = Instruction::Trunc;
	} else if (It != MinBWs.end()) {
	assert(BWSz > SrcBWSz && "Invalid cast!");
	VecOpcode = It->second.second ? Instruction::SExt : Instruction::ZExt;
	} else if (SrcIt != MinBWs.end()) {
	assert(BWSz > SrcBWSz && "Invalid cast!");
	VecOpcode =
	SrcIt->second.second ? Instruction::SExt : Instruction::ZExt;
	}
	} else if (VecOpcode == Instruction::SIToFP && SrcIt != MinBWs.end() &&
	!SrcIt->second.second) {
	VecOpcode = Instruction::UIToFP;
	}
	auto GetScalarCost = [&](unsigned Idx) -> InstructionCost {
	auto *VI = cast<Instruction>(UniqueValues[Idx]);
	return TTI->getCastInstrCost(Opcode, VL0->getType(),
	VL0->getOperand(0)->getType(),
	TTI::getCastContextHint(VI), CostKind, VI);
	};
	auto GetVectorCost = [=](InstructionCost CommonCost) {
	// Do not count cost here if minimum bitwidth is in effect and it is just
	// a bitcast (here it is just a noop).
	if (VecOpcode != Opcode && VecOpcode == Instruction::BitCast)
	return CommonCost;
	auto *VI = VL0->getOpcode() == Opcode ? VL0 : nullptr;
	TTI::CastContextHint CCH = GetCastContextHint(VL0->getOperand(0));
	return CommonCost +
	TTI->getCastInstrCost(VecOpcode, VecTy, SrcVecTy, CCH, CostKind,
	VecOpcode == Opcode ? VI : nullptr);
	};
	return GetCostDiff(GetScalarCost, GetVectorCost);
	}
	case Instruction::FCmp:
	case Instruction::ICmp:
	case Instruction::Select: {
	CmpInst::Predicate VecPred, SwappedVecPred;
	auto MatchCmp = m_Cmp(VecPred, m_Value(), m_Value());
	if (match(VL0, m_Select(MatchCmp, m_Value(), m_Value())) \|\|
	match(VL0, MatchCmp))
	SwappedVecPred = CmpInst::getSwappedPredicate(VecPred);
	else
	SwappedVecPred = VecPred = ScalarTy->isFloatingPointTy()
	? CmpInst::BAD_FCMP_PREDICATE
	: CmpInst::BAD_ICMP_PREDICATE;
	auto GetScalarCost = [&](unsigned Idx) {
	auto *VI = cast<Instruction>(UniqueValues[Idx]);
	CmpInst::Predicate CurrentPred = ScalarTy->isFloatingPointTy()
	? CmpInst::BAD_FCMP_PREDICATE
	: CmpInst::BAD_ICMP_PREDICATE;
	auto MatchCmp = m_Cmp(CurrentPred, m_Value(), m_Value());
	if ((!match(VI, m_Select(MatchCmp, m_Value(), m_Value())) &&
	!match(VI, MatchCmp)) \|\|
	(CurrentPred != VecPred && CurrentPred != SwappedVecPred))
	VecPred = SwappedVecPred = ScalarTy->isFloatingPointTy()
	? CmpInst::BAD_FCMP_PREDICATE
	: CmpInst::BAD_ICMP_PREDICATE;

	InstructionCost ScalarCost = TTI->getCmpSelInstrCost(
	E->getOpcode(), OrigScalarTy, Builder.getInt1Ty(), CurrentPred,
	CostKind, VI);
	auto [MinMaxID, SelectOnly] = canConvertToMinOrMaxIntrinsic(VI);
	if (MinMaxID != Intrinsic::not_intrinsic) {
	Type *CanonicalType = OrigScalarTy;
	if (CanonicalType->isPtrOrPtrVectorTy())
	CanonicalType = CanonicalType->getWithNewType(IntegerType::get(
	CanonicalType->getContext(),
	DL->getTypeSizeInBits(CanonicalType->getScalarType())));

	IntrinsicCostAttributes CostAttrs(MinMaxID, CanonicalType,
	{CanonicalType, CanonicalType});
	InstructionCost IntrinsicCost =
	TTI->getIntrinsicInstrCost(CostAttrs, CostKind);
	// If the selects are the only uses of the compares, they will be
	// dead and we can adjust the cost by removing their cost.
	if (SelectOnly) {
	auto *CI = cast<CmpInst>(VI->getOperand(0));
	IntrinsicCost -= TTI->getCmpSelInstrCost(
	CI->getOpcode(), OrigScalarTy, Builder.getInt1Ty(),
	CI->getPredicate(), CostKind, CI);
	}
	ScalarCost = std::min(ScalarCost, IntrinsicCost);
	}

	return ScalarCost;
	};
	auto GetVectorCost = [&](InstructionCost CommonCost) {
	auto *MaskTy = getWidenedType(Builder.getInt1Ty(), VL.size());

	InstructionCost VecCost = TTI->getCmpSelInstrCost(
	E->getOpcode(), VecTy, MaskTy, VecPred, CostKind, VL0);
	// Check if it is possible and profitable to use min/max for selects
	// in VL.
	//
	auto [MinMaxID, SelectOnly] = canConvertToMinOrMaxIntrinsic(VL);
	if (MinMaxID != Intrinsic::not_intrinsic) {
	Type *CanonicalType = VecTy;
	if (CanonicalType->isPtrOrPtrVectorTy())
	CanonicalType = CanonicalType->getWithNewType(IntegerType::get(
	CanonicalType->getContext(),
	DL->getTypeSizeInBits(CanonicalType->getScalarType())));
	IntrinsicCostAttributes CostAttrs(MinMaxID, CanonicalType,
	{CanonicalType, CanonicalType});
	InstructionCost IntrinsicCost =
	TTI->getIntrinsicInstrCost(CostAttrs, CostKind);
	// If the selects are the only uses of the compares, they will be
	// dead and we can adjust the cost by removing their cost.
	if (SelectOnly) {
	auto *CI =
	cast<CmpInst>(cast<Instruction>(VL.front())->getOperand(0));
	IntrinsicCost -= TTI->getCmpSelInstrCost(CI->getOpcode(), VecTy,
	MaskTy, VecPred, CostKind);
	}
	VecCost = std::min(VecCost, IntrinsicCost);
	}
	return VecCost + CommonCost;
	};
	return GetCostDiff(GetScalarCost, GetVectorCost);
	}
	case Instruction::FNeg:
	case Instruction::Add:
	case Instruction::FAdd:
	case Instruction::Sub:
	case Instruction::FSub:
	case Instruction::Mul:
	case Instruction::FMul:
	case Instruction::UDiv:
	case Instruction::SDiv:
	case Instruction::FDiv:
	case Instruction::URem:
	case Instruction::SRem:
	case Instruction::FRem:
	case Instruction::Shl:
	case Instruction::LShr:
	case Instruction::AShr:
	case Instruction::And:
	case Instruction::Or:
	case Instruction::Xor: {
	auto GetScalarCost = [&](unsigned Idx) {
	auto *VI = cast<Instruction>(UniqueValues[Idx]);
	unsigned OpIdx = isa<UnaryOperator>(VI) ? 0 : 1;
	TTI::OperandValueInfo Op1Info = TTI::getOperandInfo(VI->getOperand(0));
	TTI::OperandValueInfo Op2Info =
	TTI::getOperandInfo(VI->getOperand(OpIdx));
	SmallVector<const Value *> Operands(VI->operand_values());
	return TTI->getArithmeticInstrCost(ShuffleOrOp, OrigScalarTy, CostKind,
	Op1Info, Op2Info, Operands, VI);
	};
	auto GetVectorCost = [=](InstructionCost CommonCost) {
	if (ShuffleOrOp == Instruction::And && It != MinBWs.end()) {
	for (unsigned I : seq<unsigned>(0, E->getNumOperands())) {
	ArrayRef<Value *> Ops = E->getOperand(I);
	if (all_of(Ops, [&](Value *Op) {
	auto *CI = dyn_cast<ConstantInt>(Op);
	return CI && CI->getValue().countr_one() >= It->second.first;
	}))
	return CommonCost;
	}
	}
	unsigned OpIdx = isa<UnaryOperator>(VL0) ? 0 : 1;
	TTI::OperandValueInfo Op1Info = getOperandInfo(E->getOperand(0));
	TTI::OperandValueInfo Op2Info = getOperandInfo(E->getOperand(OpIdx));
	return TTI->getArithmeticInstrCost(ShuffleOrOp, VecTy, CostKind, Op1Info,
	Op2Info, std::nullopt, nullptr, TLI) +
	CommonCost;
	};
	return GetCostDiff(GetScalarCost, GetVectorCost);
	}
	case Instruction::GetElementPtr: {
	return CommonCost + GetGEPCostDiff(VL, VL0);
	}
	case Instruction::Load: {
	auto GetScalarCost = [&](unsigned Idx) {
	auto *VI = cast<LoadInst>(UniqueValues[Idx]);
	return TTI->getMemoryOpCost(Instruction::Load, OrigScalarTy,
	VI->getAlign(), VI->getPointerAddressSpace(),
	CostKind, TTI::OperandValueInfo(), VI);
	};
	auto *LI0 = cast<LoadInst>(VL0);
	auto GetVectorCost = [&](InstructionCost CommonCost) {
	InstructionCost VecLdCost;
	if (E->State == TreeEntry::Vectorize) {
	VecLdCost = TTI->getMemoryOpCost(
	Instruction::Load, VecTy, LI0->getAlign(),
	LI0->getPointerAddressSpace(), CostKind, TTI::OperandValueInfo());
	} else if (E->State == TreeEntry::StridedVectorize) {
	Align CommonAlignment =
	computeCommonAlignment<LoadInst>(UniqueValues.getArrayRef());
	VecLdCost = TTI->getStridedMemoryOpCost(
	Instruction::Load, VecTy, LI0->getPointerOperand(),
	/VariableMask=/false, CommonAlignment, CostKind);
	} else {
	assert(E->State == TreeEntry::ScatterVectorize && "Unknown EntryState");
	Align CommonAlignment =
	computeCommonAlignment<LoadInst>(UniqueValues.getArrayRef());
	VecLdCost = TTI->getGatherScatterOpCost(
	Instruction::Load, VecTy, LI0->getPointerOperand(),
	/VariableMask=/false, CommonAlignment, CostKind);
	}
	return VecLdCost + CommonCost;
	};

	InstructionCost Cost = GetCostDiff(GetScalarCost, GetVectorCost);
	// If this node generates masked gather load then it is not a terminal node.
	// Hence address operand cost is estimated separately.
	if (E->State == TreeEntry::ScatterVectorize)
	return Cost;

	// Estimate cost of GEPs since this tree node is a terminator.
	SmallVector<Value *> PointerOps(VL.size());
	for (auto [I, V] : enumerate(VL))
	PointerOps[I] = cast<LoadInst>(V)->getPointerOperand();
	return Cost + GetGEPCostDiff(PointerOps, LI0->getPointerOperand());
	}
	case Instruction::Store: {
	bool IsReorder = !E->ReorderIndices.empty();
	auto GetScalarCost = [=](unsigned Idx) {
	auto *VI = cast<StoreInst>(VL[Idx]);
	TTI::OperandValueInfo OpInfo = TTI::getOperandInfo(VI->getValueOperand());
	return TTI->getMemoryOpCost(Instruction::Store, OrigScalarTy,
	VI->getAlign(), VI->getPointerAddressSpace(),
	CostKind, OpInfo, VI);
	};
	auto *BaseSI =
	cast<StoreInst>(IsReorder ? VL[E->ReorderIndices.front()] : VL0);
	auto GetVectorCost = [=](InstructionCost CommonCost) {
	// We know that we can merge the stores. Calculate the cost.
	InstructionCost VecStCost;
	if (E->State == TreeEntry::StridedVectorize) {
	Align CommonAlignment =
	computeCommonAlignment<StoreInst>(UniqueValues.getArrayRef());
	VecStCost = TTI->getStridedMemoryOpCost(
	Instruction::Store, VecTy, BaseSI->getPointerOperand(),
	/VariableMask=/false, CommonAlignment, CostKind);
	} else {
	assert(E->State == TreeEntry::Vectorize &&
	"Expected either strided or consecutive stores.");
	TTI::OperandValueInfo OpInfo = getOperandInfo(E->getOperand(0));
	VecStCost = TTI->getMemoryOpCost(
	Instruction::Store, VecTy, BaseSI->getAlign(),
	BaseSI->getPointerAddressSpace(), CostKind, OpInfo);
	}
	return VecStCost + CommonCost;
	};
	SmallVector<Value *> PointerOps(VL.size());
	for (auto [I, V] : enumerate(VL)) {
	unsigned Idx = IsReorder ? E->ReorderIndices[I] : I;
	PointerOps[Idx] = cast<StoreInst>(V)->getPointerOperand();
	}

	return GetCostDiff(GetScalarCost, GetVectorCost) +
	GetGEPCostDiff(PointerOps, BaseSI->getPointerOperand());
	}
	case Instruction::Call: {
	auto GetScalarCost = [&](unsigned Idx) {
	auto *CI = cast<CallInst>(UniqueValues[Idx]);
	Intrinsic::ID ID = getVectorIntrinsicIDForCall(CI, TLI);
	if (ID != Intrinsic::not_intrinsic) {
	IntrinsicCostAttributes CostAttrs(ID, *CI, 1);
	return TTI->getIntrinsicInstrCost(CostAttrs, CostKind);
	}
	return TTI->getCallInstrCost(CI->getCalledFunction(),
	CI->getFunctionType()->getReturnType(),
	CI->getFunctionType()->params(), CostKind);
	};
	auto GetVectorCost = [=](InstructionCost CommonCost) {
	auto *CI = cast<CallInst>(VL0);
	Intrinsic::ID ID = getVectorIntrinsicIDForCall(CI, TLI);
	SmallVector<Type *> ArgTys =
	buildIntrinsicArgTypes(CI, ID, VecTy->getNumElements(),
	It != MinBWs.end() ? It->second.first : 0);
	auto VecCallCosts = getVectorCallCosts(CI, VecTy, TTI, TLI, ArgTys);
	return std::min(VecCallCosts.first, VecCallCosts.second) + CommonCost;
	};
	return GetCostDiff(GetScalarCost, GetVectorCost);
	}
	case Instruction::ShuffleVector: {
	assert(E->isAltShuffle() &&
	((Instruction::isBinaryOp(E->getOpcode()) &&
	Instruction::isBinaryOp(E->getAltOpcode())) \|\|
	(Instruction::isCast(E->getOpcode()) &&
	Instruction::isCast(E->getAltOpcode())) \|\|
	(isa<CmpInst>(VL0) && isa<CmpInst>(E->getAltOp()))) &&
	"Invalid Shuffle Vector Operand");
	// Try to find the previous shuffle node with the same operands and same
	// main/alternate ops.
	auto TryFindNodeWithEqualOperands = [=]() {
	for (const std::unique_ptr<TreeEntry> &TE : VectorizableTree) {
	if (TE.get() == E)
	break;
	if (TE->isAltShuffle() &&
	((TE->getOpcode() == E->getOpcode() &&
	TE->getAltOpcode() == E->getAltOpcode()) \|\|
	(TE->getOpcode() == E->getAltOpcode() &&
	TE->getAltOpcode() == E->getOpcode())) &&
	TE->hasEqualOperands(*E))
	return true;
	}
	return false;
	};
	auto GetScalarCost = [&](unsigned Idx) {
	auto *VI = cast<Instruction>(UniqueValues[Idx]);
	assert(E->isOpcodeOrAlt(VI) && "Unexpected main/alternate opcode");
	(void)E;
	return TTI->getInstructionCost(VI, CostKind);
	};
	// Need to clear CommonCost since the final shuffle cost is included into
	// vector cost.
	auto GetVectorCost = [&, &TTIRef = *TTI](InstructionCost) {
	// VecCost is equal to sum of the cost of creating 2 vectors
	// and the cost of creating shuffle.
	InstructionCost VecCost = 0;
	if (TryFindNodeWithEqualOperands()) {
	LLVM_DEBUG({
	dbgs() << "SLP: diamond match for alternate node found.\n";
	E->dump();
	});
	// No need to add new vector costs here since we're going to reuse
	// same main/alternate vector ops, just do different shuffling.
	} else if (Instruction::isBinaryOp(E->getOpcode())) {
	VecCost =
	TTIRef.getArithmeticInstrCost(E->getOpcode(), VecTy, CostKind);
	VecCost +=
	TTIRef.getArithmeticInstrCost(E->getAltOpcode(), VecTy, CostKind);
	} else if (auto *CI0 = dyn_cast<CmpInst>(VL0)) {
	auto *MaskTy = getWidenedType(Builder.getInt1Ty(), VL.size());
	VecCost = TTIRef.getCmpSelInstrCost(E->getOpcode(), VecTy, MaskTy,
	CI0->getPredicate(), CostKind, VL0);
	VecCost += TTIRef.getCmpSelInstrCost(
	E->getOpcode(), VecTy, MaskTy,
	cast<CmpInst>(E->getAltOp())->getPredicate(), CostKind,
	E->getAltOp());
	} else {
	Type *SrcSclTy = E->getMainOp()->getOperand(0)->getType();
	auto *SrcTy = getWidenedType(SrcSclTy, VL.size());
	if (SrcSclTy->isIntegerTy() && ScalarTy->isIntegerTy()) {
	auto SrcIt = MinBWs.find(getOperandEntry(E, 0));
	unsigned BWSz = DL->getTypeSizeInBits(ScalarTy);
	unsigned SrcBWSz =
	DL->getTypeSizeInBits(E->getMainOp()->getOperand(0)->getType());
	if (SrcIt != MinBWs.end()) {
	SrcBWSz = SrcIt->second.first;
	SrcSclTy = IntegerType::get(SrcSclTy->getContext(), SrcBWSz);
	SrcTy = getWidenedType(SrcSclTy, VL.size());
	}
	if (BWSz <= SrcBWSz) {
	if (BWSz < SrcBWSz)
	VecCost =
	TTIRef.getCastInstrCost(Instruction::Trunc, VecTy, SrcTy,
	TTI::CastContextHint::None, CostKind);
	LLVM_DEBUG({
	dbgs()
	<< "SLP: alternate extension, which should be truncated.\n";
	E->dump();
	});
	return VecCost;
	}
	}
	VecCost = TTIRef.getCastInstrCost(E->getOpcode(), VecTy, SrcTy,
	TTI::CastContextHint::None, CostKind);
	VecCost +=
	TTIRef.getCastInstrCost(E->getAltOpcode(), VecTy, SrcTy,
	TTI::CastContextHint::None, CostKind);
	}
	SmallVector<int> Mask;
	E->buildAltOpShuffleMask(
	[E](Instruction *I) {
	assert(E->isOpcodeOrAlt(I) && "Unexpected main/alternate opcode");
	return I->getOpcode() == E->getAltOpcode();
	},
	Mask);
	VecCost += ::getShuffleCost(TTIRef, TargetTransformInfo::SK_PermuteTwoSrc,
	FinalVecTy, Mask);
	// Patterns like [fadd,fsub] can be combined into a single instruction
	// in x86. Reordering them into [fsub,fadd] blocks this pattern. So we
	// need to take into account their order when looking for the most used
	// order.
	unsigned Opcode0 = E->getOpcode();
	unsigned Opcode1 = E->getAltOpcode();
	SmallBitVector OpcodeMask(getAltInstrMask(E->Scalars, Opcode0, Opcode1));
	// If this pattern is supported by the target then we consider the
	// order.
	if (TTIRef.isLegalAltInstr(VecTy, Opcode0, Opcode1, OpcodeMask)) {
	InstructionCost AltVecCost = TTIRef.getAltInstrCost(
	VecTy, Opcode0, Opcode1, OpcodeMask, CostKind);
	return AltVecCost < VecCost ? AltVecCost : VecCost;
	}
	// TODO: Check the reverse order too.
	return VecCost;
	};
	return GetCostDiff(GetScalarCost, GetVectorCost);
	}
	default:
	llvm_unreachable("Unknown instruction");
	}
	}

	bool BoUpSLP::isFullyVectorizableTinyTree(bool ForReduction) const {
	LLVM_DEBUG(dbgs() << "SLP: Check whether the tree with height "
	<< VectorizableTree.size() << " is fully vectorizable .\n");

	auto &&AreVectorizableGathers = [this](const TreeEntry *TE, unsigned Limit) {
	SmallVector<int> Mask;
	return TE->isGather() &&
	!any_of(TE->Scalars,
	[this](Value *V) { return EphValues.contains(V); }) &&
	(allConstant(TE->Scalars) \|\| isSplat(TE->Scalars) \|\|
	TE->Scalars.size() < Limit \|\|
	((TE->getOpcode() == Instruction::ExtractElement \|\|
	all_of(TE->Scalars, IsaPred<ExtractElementInst, UndefValue>)) &&
	isFixedVectorShuffle(TE->Scalars, Mask)) \|\|
	(TE->isGather() && TE->getOpcode() == Instruction::Load &&
	!TE->isAltShuffle()));
	};

	// We only handle trees of heights 1 and 2.
	if (VectorizableTree.size() == 1 &&
	(VectorizableTree[0]->State == TreeEntry::Vectorize \|\|
	(ForReduction &&
	AreVectorizableGathers(VectorizableTree[0].get(),
	VectorizableTree[0]->Scalars.size()) &&
	VectorizableTree[0]->getVectorFactor() > 2)))
	return true;

	if (VectorizableTree.size() != 2)
	return false;

	// Handle splat and all-constants stores. Also try to vectorize tiny trees
	// with the second gather nodes if they have less scalar operands rather than
	// the initial tree element (may be profitable to shuffle the second gather)
	// or they are extractelements, which form shuffle.
	SmallVector<int> Mask;
	if (VectorizableTree[0]->State == TreeEntry::Vectorize &&
	AreVectorizableGathers(VectorizableTree[1].get(),
	VectorizableTree[0]->Scalars.size()))
	return true;

	// Gathering cost would be too much for tiny trees.
	if (VectorizableTree[0]->isGather() \|\|
	(VectorizableTree[1]->isGather() &&
	VectorizableTree[0]->State != TreeEntry::ScatterVectorize &&
	VectorizableTree[0]->State != TreeEntry::StridedVectorize))
	return false;

	return true;
	}

	static bool isLoadCombineCandidateImpl(Value *Root, unsigned NumElts,
	TargetTransformInfo *TTI,
	bool MustMatchOrInst) {
	// Look past the root to find a source value. Arbitrarily follow the
	// path through operand 0 of any 'or'. Also, peek through optional
	// shift-left-by-multiple-of-8-bits.
	Value *ZextLoad = Root;
	const APInt *ShAmtC;
	bool FoundOr = false;
	while (!isa<ConstantExpr>(ZextLoad) &&
	(match(ZextLoad, m_Or(m_Value(), m_Value())) \|\|
	(match(ZextLoad, m_Shl(m_Value(), m_APInt(ShAmtC))) &&
	ShAmtC->urem(8) == 0))) {
	auto *BinOp = cast<BinaryOperator>(ZextLoad);
	ZextLoad = BinOp->getOperand(0);
	if (BinOp->getOpcode() == Instruction::Or)
	FoundOr = true;
	}
	// Check if the input is an extended load of the required or/shift expression.
	Value *Load;
	if ((MustMatchOrInst && !FoundOr) \|\| ZextLoad == Root \|\|
	!match(ZextLoad, m_ZExt(m_Value(Load))) \|\| !isa<LoadInst>(Load))
	return false;

	// Require that the total load bit width is a legal integer type.
	// For example, <8 x i8> --> i64 is a legal integer on a 64-bit target.
	// But <16 x i8> --> i128 is not, so the backend probably can't reduce it.
	Type *SrcTy = Load->getType();
	unsigned LoadBitWidth = SrcTy->getIntegerBitWidth() * NumElts;
	if (!TTI->isTypeLegal(IntegerType::get(Root->getContext(), LoadBitWidth)))
	return false;

	// Everything matched - assume that we can fold the whole sequence using
	// load combining.
	LLVM_DEBUG(dbgs() << "SLP: Assume load combining for tree starting at "
	<< *(cast<Instruction>(Root)) << "\n");

	return true;
	}

	bool BoUpSLP::isLoadCombineReductionCandidate(RecurKind RdxKind) const {
	if (RdxKind != RecurKind::Or)
	return false;

	unsigned NumElts = VectorizableTree[0]->Scalars.size();
	Value *FirstReduced = VectorizableTree[0]->Scalars[0];
	return isLoadCombineCandidateImpl(FirstReduced, NumElts, TTI,
	/* MatchOr */ false);
	}

	bool BoUpSLP::isLoadCombineCandidate(ArrayRef<Value *> Stores) const {
	// Peek through a final sequence of stores and check if all operations are
	// likely to be load-combined.
	unsigned NumElts = Stores.size();
	for (Value *Scalar : Stores) {
	Value *X;
	if (!match(Scalar, m_Store(m_Value(X), m_Value())) \|\|
	!isLoadCombineCandidateImpl(X, NumElts, TTI, /* MatchOr */ true))
	return false;
	}
	return true;
	}

	bool BoUpSLP::isTreeTinyAndNotFullyVectorizable(bool ForReduction) const {
	// No need to vectorize inserts of gathered values.
	if (VectorizableTree.size() == 2 &&
	isa<InsertElementInst>(VectorizableTree[0]->Scalars[0]) &&
	VectorizableTree[1]->isGather() &&
	(VectorizableTree[1]->getVectorFactor() <= 2 \|\|
	!(isSplat(VectorizableTree[1]->Scalars) \|\|
	allConstant(VectorizableTree[1]->Scalars))))
	return true;

	// If the graph includes only PHI nodes and gathers, it is defnitely not
	// profitable for the vectorization, we can skip it, if the cost threshold is
	// default. The cost of vectorized PHI nodes is almost always 0 + the cost of
	// gathers/buildvectors.
	constexpr int Limit = 4;
	if (!ForReduction && !SLPCostThreshold.getNumOccurrences() &&
	!VectorizableTree.empty() &&
	all_of(VectorizableTree, [&](const std::unique_ptr<TreeEntry> &TE) {
	return (TE->isGather() &&
	TE->getOpcode() != Instruction::ExtractElement &&
	count_if(TE->Scalars, IsaPred<ExtractElementInst>) <= Limit) \|\|
	TE->getOpcode() == Instruction::PHI;
	}))
	return true;

	// We can vectorize the tree if its size is greater than or equal to the
	// minimum size specified by the MinTreeSize command line option.
	if (VectorizableTree.size() >= MinTreeSize)
	return false;

	// If we have a tiny tree (a tree whose size is less than MinTreeSize), we
	// can vectorize it if we can prove it fully vectorizable.
	if (isFullyVectorizableTinyTree(ForReduction))
	return false;

	// Check if any of the gather node forms an insertelement buildvector
	// somewhere.
	bool IsAllowedSingleBVNode =
	VectorizableTree.size() > 1 \|\|
	(VectorizableTree.size() == 1 && VectorizableTree.front()->getOpcode() &&
	!VectorizableTree.front()->isAltShuffle() &&
	VectorizableTree.front()->getOpcode() != Instruction::PHI &&
	VectorizableTree.front()->getOpcode() != Instruction::GetElementPtr &&
	allSameBlock(VectorizableTree.front()->Scalars));
	if (any_of(VectorizableTree, [&](const std::unique_ptr<TreeEntry> &TE) {
	return TE->isGather() && all_of(TE->Scalars, [&](Value *V) {
	return isa<ExtractElementInst, UndefValue>(V) \|\|
	(IsAllowedSingleBVNode &&
	!V->hasNUsesOrMore(UsesLimit) &&
	any_of(V->users(), IsaPred<InsertElementInst>));
	});
	}))
	return false;

	assert(VectorizableTree.empty()
	? ExternalUses.empty()
	: true && "We shouldn't have any external users");

	// Otherwise, we can't vectorize the tree. It is both tiny and not fully
	// vectorizable.
	return true;
	}

	InstructionCost BoUpSLP::getSpillCost() const {
	// Walk from the bottom of the tree to the top, tracking which values are
	// live. When we see a call instruction that is not part of our tree,
	// query TTI to see if there is a cost to keeping values live over it
	// (for example, if spills and fills are required).
	unsigned BundleWidth = VectorizableTree.front()->Scalars.size();
	InstructionCost Cost = 0;

	SmallPtrSet<Instruction *, 4> LiveValues;
	Instruction *PrevInst = nullptr;

	// The entries in VectorizableTree are not necessarily ordered by their
	// position in basic blocks. Collect them and order them by dominance so later
	// instructions are guaranteed to be visited first. For instructions in
	// different basic blocks, we only scan to the beginning of the block, so
	// their order does not matter, as long as all instructions in a basic block
	// are grouped together. Using dominance ensures a deterministic order.
	SmallVector<Instruction *, 16> OrderedScalars;
	for (const auto &TEPtr : VectorizableTree) {
	if (TEPtr->State != TreeEntry::Vectorize)
	continue;
	Instruction *Inst = dyn_cast<Instruction>(TEPtr->Scalars[0]);
	if (!Inst)
	continue;
	OrderedScalars.push_back(Inst);
	}
	llvm::sort(OrderedScalars, [&](Instruction A, Instruction B) {
	auto *NodeA = DT->getNode(A->getParent());
	auto *NodeB = DT->getNode(B->getParent());
	assert(NodeA && "Should only process reachable instructions");
	assert(NodeB && "Should only process reachable instructions");
	assert((NodeA == NodeB) == (NodeA->getDFSNumIn() == NodeB->getDFSNumIn()) &&
	"Different nodes should have different DFS numbers");
	if (NodeA != NodeB)
	return NodeA->getDFSNumIn() > NodeB->getDFSNumIn();
	return B->comesBefore(A);
	});

	for (Instruction *Inst : OrderedScalars) {
	if (!PrevInst) {
	PrevInst = Inst;
	continue;
	}

	// Update LiveValues.
	LiveValues.erase(PrevInst);
	for (auto &J : PrevInst->operands()) {
	if (isa<Instruction>(&J) && getTreeEntry(&J))
	LiveValues.insert(cast<Instruction>(&*J));
	}

	LLVM_DEBUG({
	dbgs() << "SLP: #LV: " << LiveValues.size();
	for (auto *X : LiveValues)
	dbgs() << " " << X->getName();
	dbgs() << ", Looking at ";
	Inst->dump();
	});

	// Now find the sequence of instructions between PrevInst and Inst.
	unsigned NumCalls = 0;
	BasicBlock::reverse_iterator InstIt = ++Inst->getIterator().getReverse(),
	PrevInstIt =
	PrevInst->getIterator().getReverse();
	while (InstIt != PrevInstIt) {
	if (PrevInstIt == PrevInst->getParent()->rend()) {
	PrevInstIt = Inst->getParent()->rbegin();
	continue;
	}

	auto NoCallIntrinsic = [this](Instruction *I) {
	if (auto *II = dyn_cast<IntrinsicInst>(I)) {
	if (II->isAssumeLikeIntrinsic())
	return true;
	FastMathFlags FMF;
	SmallVector<Type *, 4> Tys;
	for (auto &ArgOp : II->args())
	Tys.push_back(ArgOp->getType());
	if (auto *FPMO = dyn_cast<FPMathOperator>(II))
	FMF = FPMO->getFastMathFlags();
	IntrinsicCostAttributes ICA(II->getIntrinsicID(), II->getType(), Tys,
	FMF);
	InstructionCost IntrCost =
	TTI->getIntrinsicInstrCost(ICA, TTI::TCK_RecipThroughput);
	InstructionCost CallCost = TTI->getCallInstrCost(
	nullptr, II->getType(), Tys, TTI::TCK_RecipThroughput);
	if (IntrCost < CallCost)
	return true;
	}
	return false;
	};

	// Debug information does not impact spill cost.
	if (isa<CallBase>(&PrevInstIt) && !NoCallIntrinsic(&PrevInstIt) &&
	&*PrevInstIt != PrevInst)
	NumCalls++;

	++PrevInstIt;
	}

	if (NumCalls) {
	SmallVector<Type *, 4> V;
	for (auto *II : LiveValues) {
	auto *ScalarTy = II->getType();
	if (auto *VectorTy = dyn_cast<FixedVectorType>(ScalarTy))
	ScalarTy = VectorTy->getElementType();
	V.push_back(getWidenedType(ScalarTy, BundleWidth));
	}
	Cost += NumCalls * TTI->getCostOfKeepingLiveOverCall(V);
	}

	PrevInst = Inst;
	}

	return Cost;
	}

	/// Checks if the \p IE1 instructions is followed by \p IE2 instruction in the
	/// buildvector sequence.
	static bool isFirstInsertElement(const InsertElementInst *IE1,
	const InsertElementInst *IE2) {
	if (IE1 == IE2)
	return false;
	const auto *I1 = IE1;
	const auto *I2 = IE2;
	const InsertElementInst *PrevI1;
	const InsertElementInst *PrevI2;
	unsigned Idx1 = *getElementIndex(IE1);
	unsigned Idx2 = *getElementIndex(IE2);
	do {
	if (I2 == IE1)
	return true;
	if (I1 == IE2)
	return false;
	PrevI1 = I1;
	PrevI2 = I2;
	if (I1 && (I1 == IE1 \|\| I1->hasOneUse()) &&
	getElementIndex(I1).value_or(Idx2) != Idx2)
	I1 = dyn_cast<InsertElementInst>(I1->getOperand(0));
	if (I2 && ((I2 == IE2 \|\| I2->hasOneUse())) &&
	getElementIndex(I2).value_or(Idx1) != Idx1)
	I2 = dyn_cast<InsertElementInst>(I2->getOperand(0));
	} while ((I1 && PrevI1 != I1) \|\| (I2 && PrevI2 != I2));
	llvm_unreachable("Two different buildvectors not expected.");
	}

	namespace {
	/// Returns incoming Value , if the requested type is Value too, or a default
	/// value, otherwise.
	struct ValueSelect {
	template <typename U>
	static std::enable_if_t<std::is_same_v<Value , U>, Value > get(Value *V) {
	return V;
	}
	template <typename U>
	static std::enable_if_t<!std::is_same_v<Value , U>, U> get(Value ) {
	return U();
	}
	};
	} // namespace

	/// Does the analysis of the provided shuffle masks and performs the requested
	/// actions on the vectors with the given shuffle masks. It tries to do it in
	/// several steps.
	/// 1. If the Base vector is not undef vector, resizing the very first mask to
	/// have common VF and perform action for 2 input vectors (including non-undef
	/// Base). Other shuffle masks are combined with the resulting after the 1 stage
	/// and processed as a shuffle of 2 elements.
	/// 2. If the Base is undef vector and have only 1 shuffle mask, perform the
	/// action only for 1 vector with the given mask, if it is not the identity
	/// mask.
	/// 3. If > 2 masks are used, perform the remaining shuffle actions for 2
	/// vectors, combing the masks properly between the steps.
	template <typename T>
	static T *performExtractsShuffleAction(
	MutableArrayRef<std::pair<T , SmallVector<int>>> ShuffleMask, Value Base,
	function_ref<unsigned(T *)> GetVF,
	function_ref<std::pair<T , bool>(T , ArrayRef<int>, bool)> ResizeAction,
	function_ref<T (ArrayRef<int>, ArrayRef<T >)> Action) {
	assert(!ShuffleMask.empty() && "Empty list of shuffles for inserts.");
	SmallVector<int> Mask(ShuffleMask.begin()->second);
	auto VMIt = std::next(ShuffleMask.begin());
	T *Prev = nullptr;
	SmallBitVector UseMask =
	buildUseMask(Mask.size(), Mask, UseMask::UndefsAsMask);
	SmallBitVector IsBaseUndef = isUndefVector(Base, UseMask);
	if (!IsBaseUndef.all()) {
	// Base is not undef, need to combine it with the next subvectors.
	std::pair<T *, bool> Res =
	ResizeAction(ShuffleMask.begin()->first, Mask, /ForSingleMask=/false);
	SmallBitVector IsBasePoison = isUndefVector<true>(Base, UseMask);
	for (unsigned Idx = 0, VF = Mask.size(); Idx < VF; ++Idx) {
	if (Mask[Idx] == PoisonMaskElem)
	Mask[Idx] = IsBasePoison.test(Idx) ? PoisonMaskElem : Idx;
	else
	Mask[Idx] = (Res.second ? Idx : Mask[Idx]) + VF;
	}
	auto V = ValueSelect::get<T >(Base);
	(void)V;
	assert((!V \|\| GetVF(V) == Mask.size()) &&
	"Expected base vector of VF number of elements.");
	Prev = Action(Mask, {nullptr, Res.first});
	} else if (ShuffleMask.size() == 1) {
	// Base is undef and only 1 vector is shuffled - perform the action only for
	// single vector, if the mask is not the identity mask.
	std::pair<T *, bool> Res = ResizeAction(ShuffleMask.begin()->first, Mask,
	/ForSingleMask=/true);
	if (Res.second)
	// Identity mask is found.
	Prev = Res.first;
	else
	Prev = Action(Mask, {ShuffleMask.begin()->first});
	} else {
	// Base is undef and at least 2 input vectors shuffled - perform 2 vectors
	// shuffles step by step, combining shuffle between the steps.
	unsigned Vec1VF = GetVF(ShuffleMask.begin()->first);
	unsigned Vec2VF = GetVF(VMIt->first);
	if (Vec1VF == Vec2VF) {
	// No need to resize the input vectors since they are of the same size, we
	// can shuffle them directly.
	ArrayRef<int> SecMask = VMIt->second;
	for (unsigned I = 0, VF = Mask.size(); I < VF; ++I) {
	if (SecMask[I] != PoisonMaskElem) {
	assert(Mask[I] == PoisonMaskElem && "Multiple uses of scalars.");
	Mask[I] = SecMask[I] + Vec1VF;
	}
	}
	Prev = Action(Mask, {ShuffleMask.begin()->first, VMIt->first});
	} else {
	// Vectors of different sizes - resize and reshuffle.
	std::pair<T *, bool> Res1 = ResizeAction(ShuffleMask.begin()->first, Mask,
	/ForSingleMask=/false);
	std::pair<T *, bool> Res2 =
	ResizeAction(VMIt->first, VMIt->second, /ForSingleMask=/false);
	ArrayRef<int> SecMask = VMIt->second;
	for (unsigned I = 0, VF = Mask.size(); I < VF; ++I) {
	if (Mask[I] != PoisonMaskElem) {
	assert(SecMask[I] == PoisonMaskElem && "Multiple uses of scalars.");
	if (Res1.second)
	Mask[I] = I;
	} else if (SecMask[I] != PoisonMaskElem) {
	assert(Mask[I] == PoisonMaskElem && "Multiple uses of scalars.");
	Mask[I] = (Res2.second ? I : SecMask[I]) + VF;
	}
	}
	Prev = Action(Mask, {Res1.first, Res2.first});
	}
	VMIt = std::next(VMIt);
	}
	bool IsBaseNotUndef = !IsBaseUndef.all();
	(void)IsBaseNotUndef;
	// Perform requested actions for the remaining masks/vectors.
	for (auto E = ShuffleMask.end(); VMIt != E; ++VMIt) {
	// Shuffle other input vectors, if any.
	std::pair<T *, bool> Res =
	ResizeAction(VMIt->first, VMIt->second, /ForSingleMask=/false);
	ArrayRef<int> SecMask = VMIt->second;
	for (unsigned I = 0, VF = Mask.size(); I < VF; ++I) {
	if (SecMask[I] != PoisonMaskElem) {
	assert((Mask[I] == PoisonMaskElem \|\| IsBaseNotUndef) &&
	"Multiple uses of scalars.");
	Mask[I] = (Res.second ? I : SecMask[I]) + VF;
	} else if (Mask[I] != PoisonMaskElem) {
	Mask[I] = I;
	}
	}
	Prev = Action(Mask, {Prev, Res.first});
	}
	return Prev;
	}

	InstructionCost BoUpSLP::getTreeCost(ArrayRef<Value *> VectorizedVals) {
	InstructionCost Cost = 0;
	LLVM_DEBUG(dbgs() << "SLP: Calculating cost for tree of size "
	<< VectorizableTree.size() << ".\n");

	unsigned BundleWidth = VectorizableTree[0]->Scalars.size();

	SmallPtrSet<Value *, 4> CheckedExtracts;
	for (unsigned I = 0, E = VectorizableTree.size(); I < E; ++I) {
	TreeEntry &TE = *VectorizableTree[I];
	if (TE.isGather()) {
	if (const TreeEntry *E = getTreeEntry(TE.getMainOp());
	E && E->getVectorFactor() == TE.getVectorFactor() &&
	E->isSame(TE.Scalars)) {
	// Some gather nodes might be absolutely the same as some vectorizable
	// nodes after reordering, need to handle it.
	LLVM_DEBUG(dbgs() << "SLP: Adding cost 0 for bundle "
	<< shortBundleName(TE.Scalars) << ".\n"
	<< "SLP: Current total cost = " << Cost << "\n");
	continue;
	}
	}

	InstructionCost C = getEntryCost(&TE, VectorizedVals, CheckedExtracts);
	Cost += C;
	LLVM_DEBUG(dbgs() << "SLP: Adding cost " << C << " for bundle "
	<< shortBundleName(TE.Scalars) << ".\n"
	<< "SLP: Current total cost = " << Cost << "\n");
	}

	SmallPtrSet<Value *, 16> ExtractCostCalculated;
	InstructionCost ExtractCost = 0;
	SmallVector<MapVector<const TreeEntry *, SmallVector<int>>> ShuffleMasks;
	SmallVector<std::pair<Value , const TreeEntry >> FirstUsers;
	SmallVector<APInt> DemandedElts;
	SmallDenseSet<Value *, 4> UsedInserts;
	DenseSet<std::pair<const TreeEntry , Type >> VectorCasts;
	std::optional<DenseMap<Value *, unsigned>> ValueToExtUses;
	for (ExternalUser &EU : ExternalUses) {
	// We only add extract cost once for the same scalar.
	if (!isa_and_nonnull<InsertElementInst>(EU.User) &&
	!ExtractCostCalculated.insert(EU.Scalar).second)
	continue;

	// Uses by ephemeral values are free (because the ephemeral value will be
	// removed prior to code generation, and so the extraction will be
	// removed as well).
	if (EphValues.count(EU.User))
	continue;

	// No extract cost for vector "scalar"
	if (isa<FixedVectorType>(EU.Scalar->getType()))
	continue;

	// If found user is an insertelement, do not calculate extract cost but try
	// to detect it as a final shuffled/identity match.
	if (auto *VU = dyn_cast_or_null<InsertElementInst>(EU.User);
	VU && VU->getOperand(1) == EU.Scalar) {
	if (auto *FTy = dyn_cast<FixedVectorType>(VU->getType())) {
	if (!UsedInserts.insert(VU).second)
	continue;
	std::optional<unsigned> InsertIdx = getElementIndex(VU);
	if (InsertIdx) {
	const TreeEntry *ScalarTE = getTreeEntry(EU.Scalar);
	auto *It = find_if(
	FirstUsers,
	[this, VU](const std::pair<Value , const TreeEntry > &Pair) {
	return areTwoInsertFromSameBuildVector(
	VU, cast<InsertElementInst>(Pair.first),
	[this](InsertElementInst II) -> Value {
	Value *Op0 = II->getOperand(0);
	if (getTreeEntry(II) && !getTreeEntry(Op0))
	return nullptr;
	return Op0;
	});
	});
	int VecId = -1;
	if (It == FirstUsers.end()) {
	(void)ShuffleMasks.emplace_back();
	SmallVectorImpl<int> &Mask = ShuffleMasks.back()[ScalarTE];
	if (Mask.empty())
	Mask.assign(FTy->getNumElements(), PoisonMaskElem);
	// Find the insertvector, vectorized in tree, if any.
	Value *Base = VU;
	while (auto *IEBase = dyn_cast<InsertElementInst>(Base)) {
	if (IEBase != EU.User &&
	(!IEBase->hasOneUse() \|\|
	getElementIndex(IEBase).value_or(InsertIdx) == InsertIdx))
	break;
	// Build the mask for the vectorized insertelement instructions.
	if (const TreeEntry *E = getTreeEntry(IEBase)) {
	VU = IEBase;
	do {
	IEBase = cast<InsertElementInst>(Base);
	int Idx = *getElementIndex(IEBase);
	assert(Mask[Idx] == PoisonMaskElem &&
	"InsertElementInstruction used already.");
	Mask[Idx] = Idx;
	Base = IEBase->getOperand(0);
	} while (E == getTreeEntry(Base));
	break;
	}
	Base = cast<InsertElementInst>(Base)->getOperand(0);
	}
	FirstUsers.emplace_back(VU, ScalarTE);
	DemandedElts.push_back(APInt::getZero(FTy->getNumElements()));
	VecId = FirstUsers.size() - 1;
	auto It = MinBWs.find(ScalarTE);
	if (It != MinBWs.end() &&
	VectorCasts
	.insert(std::make_pair(ScalarTE, FTy->getElementType()))
	.second) {
	unsigned BWSz = It->second.first;
	unsigned DstBWSz = DL->getTypeSizeInBits(FTy->getElementType());
	unsigned VecOpcode;
	if (DstBWSz < BWSz)
	VecOpcode = Instruction::Trunc;
	else
	VecOpcode =
	It->second.second ? Instruction::SExt : Instruction::ZExt;
	TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
	InstructionCost C = TTI->getCastInstrCost(
	VecOpcode, FTy,
	getWidenedType(IntegerType::get(FTy->getContext(), BWSz),
	FTy->getNumElements()),
	TTI::CastContextHint::None, CostKind);
	LLVM_DEBUG(dbgs() << "SLP: Adding cost " << C
	<< " for extending externally used vector with "
	"non-equal minimum bitwidth.\n");
	Cost += C;
	}
	} else {
	if (isFirstInsertElement(VU, cast<InsertElementInst>(It->first)))
	It->first = VU;
	VecId = std::distance(FirstUsers.begin(), It);
	}
	int InIdx = *InsertIdx;
	SmallVectorImpl<int> &Mask = ShuffleMasks[VecId][ScalarTE];
	if (Mask.empty())
	Mask.assign(FTy->getNumElements(), PoisonMaskElem);
	Mask[InIdx] = EU.Lane;
	DemandedElts[VecId].setBit(InIdx);
	continue;
	}
	}
	}
	// Leave the GEPs as is, they are free in most cases and better to keep them
	// as GEPs.
	TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
	if (auto *GEP = dyn_cast<GetElementPtrInst>(EU.Scalar)) {
	if (!ValueToExtUses) {
	ValueToExtUses.emplace();
	for_each(enumerate(ExternalUses), [&](const auto &P) {
	ValueToExtUses->try_emplace(P.value().Scalar, P.index());
	});
	}
	// Can use original GEP, if no operands vectorized or they are marked as
	// externally used already.
	bool CanBeUsedAsGEP = all_of(GEP->operands(), [&](Value *V) {
	if (!getTreeEntry(V))
	return true;
	auto It = ValueToExtUses->find(V);
	if (It != ValueToExtUses->end()) {
	// Replace all uses to avoid compiler crash.
	ExternalUses[It->second].User = nullptr;
	return true;
	}
	return false;
	});
	if (CanBeUsedAsGEP) {
	ExtractCost += TTI->getInstructionCost(GEP, CostKind);
	ExternalUsesAsGEPs.insert(EU.Scalar);
	continue;
	}
	}

	// If we plan to rewrite the tree in a smaller type, we will need to sign
	// extend the extracted value back to the original type. Here, we account
	// for the extract and the added cost of the sign extend if needed.
	auto *VecTy = getWidenedType(EU.Scalar->getType(), BundleWidth);
	auto It = MinBWs.find(getTreeEntry(EU.Scalar));
	if (It != MinBWs.end()) {
	auto *MinTy = IntegerType::get(F->getContext(), It->second.first);
	unsigned Extend =
	It->second.second ? Instruction::SExt : Instruction::ZExt;
	VecTy = getWidenedType(MinTy, BundleWidth);
	ExtractCost += TTI->getExtractWithExtendCost(Extend, EU.Scalar->getType(),
	VecTy, EU.Lane);
	} else {
	ExtractCost += TTI->getVectorInstrCost(Instruction::ExtractElement, VecTy,
	CostKind, EU.Lane);
	}
	}
	// Add reduced value cost, if resized.
	if (!VectorizedVals.empty()) {
	const TreeEntry &Root = *VectorizableTree.front();
	auto BWIt = MinBWs.find(&Root);
	if (BWIt != MinBWs.end()) {
	Type *DstTy = Root.Scalars.front()->getType();
	unsigned OriginalSz = DL->getTypeSizeInBits(DstTy);
	unsigned SrcSz =
	ReductionBitWidth == 0 ? BWIt->second.first : ReductionBitWidth;
	if (OriginalSz != SrcSz) {
	unsigned Opcode = Instruction::Trunc;
	if (OriginalSz > SrcSz)
	Opcode = BWIt->second.second ? Instruction::SExt : Instruction::ZExt;
	Type *SrcTy = IntegerType::get(DstTy->getContext(), SrcSz);
	Cost += TTI->getCastInstrCost(Opcode, DstTy, SrcTy,
	TTI::CastContextHint::None,
	TTI::TCK_RecipThroughput);
	}
	}
	}

	InstructionCost SpillCost = getSpillCost();
	Cost += SpillCost + ExtractCost;
	auto &&ResizeToVF = [this, &Cost](const TreeEntry *TE, ArrayRef<int> Mask,
	bool) {
	InstructionCost C = 0;
	unsigned VF = Mask.size();
	unsigned VecVF = TE->getVectorFactor();
	if (VF != VecVF &&
	(any_of(Mask, [VF](int Idx) { return Idx >= static_cast<int>(VF); }) \|\|
	!ShuffleVectorInst::isIdentityMask(Mask, VF))) {
	SmallVector<int> OrigMask(VecVF, PoisonMaskElem);
	std::copy(Mask.begin(), std::next(Mask.begin(), std::min(VF, VecVF)),
	OrigMask.begin());
	C = TTI->getShuffleCost(TTI::SK_PermuteSingleSrc,
	getWidenedType(TE->getMainOp()->getType(), VecVF),
	OrigMask);
	LLVM_DEBUG(
	dbgs() << "SLP: Adding cost " << C
	<< " for final shuffle of insertelement external users.\n";
	TE->dump(); dbgs() << "SLP: Current total cost = " << Cost << "\n");
	Cost += C;
	return std::make_pair(TE, true);
	}
	return std::make_pair(TE, false);
	};
	// Calculate the cost of the reshuffled vectors, if any.
	for (int I = 0, E = FirstUsers.size(); I < E; ++I) {
	Value *Base = cast<Instruction>(FirstUsers[I].first)->getOperand(0);
	auto Vector = ShuffleMasks[I].takeVector();
	unsigned VF = 0;
	auto EstimateShufflesCost = [&](ArrayRef<int> Mask,
	ArrayRef<const TreeEntry *> TEs) {
	assert((TEs.size() == 1 \|\| TEs.size() == 2) &&
	"Expected exactly 1 or 2 tree entries.");
	if (TEs.size() == 1) {
	if (VF == 0)
	VF = TEs.front()->getVectorFactor();
	auto *FTy = getWidenedType(TEs.back()->Scalars.front()->getType(), VF);
	if (!ShuffleVectorInst::isIdentityMask(Mask, VF) &&
	!all_of(enumerate(Mask), [=](const auto &Data) {
	return Data.value() == PoisonMaskElem \|\|
	(Data.index() < VF &&
	static_cast<int>(Data.index()) == Data.value());
	})) {
	InstructionCost C =
	TTI->getShuffleCost(TTI::SK_PermuteSingleSrc, FTy, Mask);
	LLVM_DEBUG(dbgs() << "SLP: Adding cost " << C
	<< " for final shuffle of insertelement "
	"external users.\n";
	TEs.front()->dump();
	dbgs() << "SLP: Current total cost = " << Cost << "\n");
	Cost += C;
	}
	} else {
	if (VF == 0) {
	if (TEs.front() &&
	TEs.front()->getVectorFactor() == TEs.back()->getVectorFactor())
	VF = TEs.front()->getVectorFactor();
	else
	VF = Mask.size();
	}
	auto *FTy = getWidenedType(TEs.back()->Scalars.front()->getType(), VF);
	InstructionCost C =
	::getShuffleCost(*TTI, TTI::SK_PermuteTwoSrc, FTy, Mask);
	LLVM_DEBUG(dbgs() << "SLP: Adding cost " << C
	<< " for final shuffle of vector node and external "
	"insertelement users.\n";
	if (TEs.front()) { TEs.front()->dump(); } TEs.back()->dump();
	dbgs() << "SLP: Current total cost = " << Cost << "\n");
	Cost += C;
	}
	VF = Mask.size();
	return TEs.back();
	};
	(void)performExtractsShuffleAction<const TreeEntry>(
	MutableArrayRef(Vector.data(), Vector.size()), Base,
	[](const TreeEntry *E) { return E->getVectorFactor(); }, ResizeToVF,
	EstimateShufflesCost);
	InstructionCost InsertCost = TTI->getScalarizationOverhead(
	cast<FixedVectorType>(FirstUsers[I].first->getType()), DemandedElts[I],
	/Insert/ true, /Extract/ false, TTI::TCK_RecipThroughput);
	Cost -= InsertCost;
	}

	// Add the cost for reduced value resize (if required).
	if (ReductionBitWidth != 0) {
	assert(UserIgnoreList && "Expected reduction tree.");
	const TreeEntry &E = *VectorizableTree.front();
	auto It = MinBWs.find(&E);
	if (It != MinBWs.end() && It->second.first != ReductionBitWidth) {
	unsigned SrcSize = It->second.first;
	unsigned DstSize = ReductionBitWidth;
	unsigned Opcode = Instruction::Trunc;
	if (SrcSize < DstSize)
	Opcode = It->second.second ? Instruction::SExt : Instruction::ZExt;
	auto *SrcVecTy =
	getWidenedType(Builder.getIntNTy(SrcSize), E.getVectorFactor());
	auto *DstVecTy =
	getWidenedType(Builder.getIntNTy(DstSize), E.getVectorFactor());
	TTI::CastContextHint CCH = getCastContextHint(E);
	InstructionCost CastCost;
	switch (E.getOpcode()) {
	case Instruction::SExt:
	case Instruction::ZExt:
	case Instruction::Trunc: {
	const TreeEntry *OpTE = getOperandEntry(&E, 0);
	CCH = getCastContextHint(*OpTE);
	break;
	}
	default:
	break;
	}
	CastCost += TTI->getCastInstrCost(Opcode, DstVecTy, SrcVecTy, CCH,
	TTI::TCK_RecipThroughput);
	Cost += CastCost;
	LLVM_DEBUG(dbgs() << "SLP: Adding cost " << CastCost
	<< " for final resize for reduction from " << SrcVecTy
	<< " to " << DstVecTy << "\n";
	dbgs() << "SLP: Current total cost = " << Cost << "\n");
	}
	}

	#ifndef NDEBUG
	SmallString<256> Str;
	{
	raw_svector_ostream OS(Str);
	OS << "SLP: Spill Cost = " << SpillCost << ".\n"
	<< "SLP: Extract Cost = " << ExtractCost << ".\n"
	<< "SLP: Total Cost = " << Cost << ".\n";
	}
	LLVM_DEBUG(dbgs() << Str);
	if (ViewSLPTree)
	ViewGraph(this, "SLP" + F->getName(), false, Str);
	#endif

	return Cost;
	}

	/// Tries to find extractelement instructions with constant indices from fixed
	/// vector type and gather such instructions into a bunch, which highly likely
	/// might be detected as a shuffle of 1 or 2 input vectors. If this attempt was
	/// successful, the matched scalars are replaced by poison values in \p VL for
	/// future analysis.
	std::optional<TTI::ShuffleKind>
	BoUpSLP::tryToGatherSingleRegisterExtractElements(
	MutableArrayRef<Value *> VL, SmallVectorImpl<int> &Mask) const {
	// Scan list of gathered scalars for extractelements that can be represented
	// as shuffles.
	MapVector<Value *, SmallVector<int>> VectorOpToIdx;
	SmallVector<int> UndefVectorExtracts;
	for (int I = 0, E = VL.size(); I < E; ++I) {
	auto *EI = dyn_cast<ExtractElementInst>(VL[I]);
	if (!EI) {
	if (isa<UndefValue>(VL[I]))
	UndefVectorExtracts.push_back(I);
	continue;
	}
	auto *VecTy = dyn_cast<FixedVectorType>(EI->getVectorOperandType());
	if (!VecTy \|\| !isa<ConstantInt, UndefValue>(EI->getIndexOperand()))
	continue;
	std::optional<unsigned> Idx = getExtractIndex(EI);
	// Undefined index.
	if (!Idx) {
	UndefVectorExtracts.push_back(I);
	continue;
	}
	SmallBitVector ExtractMask(VecTy->getNumElements(), true);
	ExtractMask.reset(*Idx);
	if (isUndefVector(EI->getVectorOperand(), ExtractMask).all()) {
	UndefVectorExtracts.push_back(I);
	continue;
	}
	VectorOpToIdx[EI->getVectorOperand()].push_back(I);
	}
	// Sort the vector operands by the maximum number of uses in extractelements.
	SmallVector<std::pair<Value *, SmallVector<int>>> Vectors =
	VectorOpToIdx.takeVector();
	stable_sort(Vectors, [](const auto &P1, const auto &P2) {
	return P1.second.size() > P2.second.size();
	});
	// Find the best pair of the vectors or a single vector.
	const int UndefSz = UndefVectorExtracts.size();
	unsigned SingleMax = 0;
	unsigned PairMax = 0;
	if (!Vectors.empty()) {
	SingleMax = Vectors.front().second.size() + UndefSz;
	if (Vectors.size() > 1) {
	auto *ItNext = std::next(Vectors.begin());
	PairMax = SingleMax + ItNext->second.size();
	}
	}
	if (SingleMax == 0 && PairMax == 0 && UndefSz == 0)
	return std::nullopt;
	// Check if better to perform a shuffle of 2 vectors or just of a single
	// vector.
	SmallVector<Value *> SavedVL(VL.begin(), VL.end());
	SmallVector<Value *> GatheredExtracts(
	VL.size(), PoisonValue::get(VL.front()->getType()));
	if (SingleMax >= PairMax && SingleMax) {
	for (int Idx : Vectors.front().second)
	std::swap(GatheredExtracts[Idx], VL[Idx]);
	} else if (!Vectors.empty()) {
	for (unsigned Idx : {0, 1})
	for (int Idx : Vectors[Idx].second)
	std::swap(GatheredExtracts[Idx], VL[Idx]);
	}
	// Add extracts from undefs too.
	for (int Idx : UndefVectorExtracts)
	std::swap(GatheredExtracts[Idx], VL[Idx]);
	// Check that gather of extractelements can be represented as just a
	// shuffle of a single/two vectors the scalars are extracted from.
	std::optional<TTI::ShuffleKind> Res =
	isFixedVectorShuffle(GatheredExtracts, Mask);
	if (!Res) {
	// TODO: try to check other subsets if possible.
	// Restore the original VL if attempt was not successful.
	copy(SavedVL, VL.begin());
	return std::nullopt;
	}
	// Restore unused scalars from mask, if some of the extractelements were not
	// selected for shuffle.
	for (int I = 0, E = GatheredExtracts.size(); I < E; ++I) {
	if (Mask[I] == PoisonMaskElem && !isa<PoisonValue>(GatheredExtracts[I]) &&
	isa<UndefValue>(GatheredExtracts[I])) {
	std::swap(VL[I], GatheredExtracts[I]);
	continue;
	}
	auto *EI = dyn_cast<ExtractElementInst>(VL[I]);
	if (!EI \|\| !isa<FixedVectorType>(EI->getVectorOperandType()) \|\|
	!isa<ConstantInt, UndefValue>(EI->getIndexOperand()) \|\|
	is_contained(UndefVectorExtracts, I))
	continue;
	}
	return Res;
	}

	/// Tries to find extractelement instructions with constant indices from fixed
	/// vector type and gather such instructions into a bunch, which highly likely
	/// might be detected as a shuffle of 1 or 2 input vectors. If this attempt was
	/// successful, the matched scalars are replaced by poison values in \p VL for
	/// future analysis.
	SmallVector<std::optional<TTI::ShuffleKind>>
	BoUpSLP::tryToGatherExtractElements(SmallVectorImpl<Value *> &VL,
	SmallVectorImpl<int> &Mask,
	unsigned NumParts) const {
	assert(NumParts > 0 && "NumParts expected be greater than or equal to 1.");
	SmallVector<std::optional<TTI::ShuffleKind>> ShufflesRes(NumParts);
	Mask.assign(VL.size(), PoisonMaskElem);
	unsigned SliceSize = getPartNumElems(VL.size(), NumParts);
	for (unsigned Part : seq<unsigned>(NumParts)) {
	// Scan list of gathered scalars for extractelements that can be represented
	// as shuffles.
	MutableArrayRef<Value *> SubVL = MutableArrayRef(VL).slice(
	Part * SliceSize, getNumElems(VL.size(), SliceSize, Part));
	SmallVector<int> SubMask;
	std::optional<TTI::ShuffleKind> Res =
	tryToGatherSingleRegisterExtractElements(SubVL, SubMask);
	ShufflesRes[Part] = Res;
	copy(SubMask, std::next(Mask.begin(), Part * SliceSize));
	}
	if (none_of(ShufflesRes, [](const std::optional<TTI::ShuffleKind> &Res) {
	return Res.has_value();
	}))
	ShufflesRes.clear();
	return ShufflesRes;
	}

	std::optional<TargetTransformInfo::ShuffleKind>
	BoUpSLP::isGatherShuffledSingleRegisterEntry(
	const TreeEntry TE, ArrayRef<Value > VL, MutableArrayRef<int> Mask,
	SmallVectorImpl<const TreeEntry *> &Entries, unsigned Part, bool ForOrder) {
	Entries.clear();
	// TODO: currently checking only for Scalars in the tree entry, need to count
	// reused elements too for better cost estimation.
	const EdgeInfo &TEUseEI = TE->UserTreeIndices.front();
	const Instruction *TEInsertPt = &getLastInstructionInBundle(TEUseEI.UserTE);
	const BasicBlock *TEInsertBlock = nullptr;
	// Main node of PHI entries keeps the correct order of operands/incoming
	// blocks.
	if (auto *PHI = dyn_cast<PHINode>(TEUseEI.UserTE->getMainOp())) {
	TEInsertBlock = PHI->getIncomingBlock(TEUseEI.EdgeIdx);
	TEInsertPt = TEInsertBlock->getTerminator();
	} else {
	TEInsertBlock = TEInsertPt->getParent();
	}
	if (!DT->isReachableFromEntry(TEInsertBlock))
	return std::nullopt;
	auto *NodeUI = DT->getNode(TEInsertBlock);
	assert(NodeUI && "Should only process reachable instructions");
	SmallPtrSet<Value *, 4> GatheredScalars(VL.begin(), VL.end());
	auto CheckOrdering = [&](const Instruction *InsertPt) {
	// Argument InsertPt is an instruction where vector code for some other
	// tree entry (one that shares one or more scalars with TE) is going to be
	// generated. This lambda returns true if insertion point of vector code
	// for the TE dominates that point (otherwise dependency is the other way
	// around). The other node is not limited to be of a gather kind. Gather
	// nodes are not scheduled and their vector code is inserted before their
	// first user. If user is PHI, that is supposed to be at the end of a
	// predecessor block. Otherwise it is the last instruction among scalars of
	// the user node. So, instead of checking dependency between instructions
	// themselves, we check dependency between their insertion points for vector
	// code (since each scalar instruction ends up as a lane of a vector
	// instruction).
	const BasicBlock *InsertBlock = InsertPt->getParent();
	auto *NodeEUI = DT->getNode(InsertBlock);
	if (!NodeEUI)
	return false;
	assert((NodeUI == NodeEUI) ==
	(NodeUI->getDFSNumIn() == NodeEUI->getDFSNumIn()) &&
	"Different nodes should have different DFS numbers");
	// Check the order of the gather nodes users.
	if (TEInsertPt->getParent() != InsertBlock &&
	(DT->dominates(NodeUI, NodeEUI) \|\| !DT->dominates(NodeEUI, NodeUI)))
	return false;
	if (TEInsertPt->getParent() == InsertBlock &&
	TEInsertPt->comesBefore(InsertPt))
	return false;
	return true;
	};
	// Find all tree entries used by the gathered values. If no common entries
	// found - not a shuffle.
	// Here we build a set of tree nodes for each gathered value and trying to
	// find the intersection between these sets. If we have at least one common
	// tree node for each gathered value - we have just a permutation of the
	// single vector. If we have 2 different sets, we're in situation where we
	// have a permutation of 2 input vectors.
	SmallVector<SmallPtrSet<const TreeEntry *, 4>> UsedTEs;
	DenseMap<Value *, int> UsedValuesEntry;
	for (Value *V : VL) {
	if (isConstant(V))
	continue;
	// Build a list of tree entries where V is used.
	SmallPtrSet<const TreeEntry *, 4> VToTEs;
	for (const TreeEntry *TEPtr : ValueToGatherNodes.find(V)->second) {
	if (TEPtr == TE)
	continue;
	assert(any_of(TEPtr->Scalars,
	[&](Value *V) { return GatheredScalars.contains(V); }) &&
	"Must contain at least single gathered value.");
	assert(TEPtr->UserTreeIndices.size() == 1 &&
	"Expected only single user of a gather node.");
	const EdgeInfo &UseEI = TEPtr->UserTreeIndices.front();

	PHINode *UserPHI = dyn_cast<PHINode>(UseEI.UserTE->getMainOp());
	const Instruction *InsertPt =
	UserPHI ? UserPHI->getIncomingBlock(UseEI.EdgeIdx)->getTerminator()
	: &getLastInstructionInBundle(UseEI.UserTE);
	if (TEInsertPt == InsertPt) {
	// If 2 gathers are operands of the same entry (regardless of whether
	// user is PHI or else), compare operands indices, use the earlier one
	// as the base.
	if (TEUseEI.UserTE == UseEI.UserTE && TEUseEI.EdgeIdx < UseEI.EdgeIdx)
	continue;
	// If the user instruction is used for some reason in different
	// vectorized nodes - make it depend on index.
	if (TEUseEI.UserTE != UseEI.UserTE &&
	TEUseEI.UserTE->Idx < UseEI.UserTE->Idx)
	continue;
	}

	// Check if the user node of the TE comes after user node of TEPtr,
	// otherwise TEPtr depends on TE.
	if ((TEInsertBlock != InsertPt->getParent() \|\|
	TEUseEI.EdgeIdx < UseEI.EdgeIdx \|\| TEUseEI.UserTE != UseEI.UserTE) &&
	!CheckOrdering(InsertPt))
	continue;
	VToTEs.insert(TEPtr);
	}
	if (const TreeEntry *VTE = getTreeEntry(V)) {
	if (ForOrder) {
	if (VTE->State != TreeEntry::Vectorize) {
	auto It = MultiNodeScalars.find(V);
	if (It == MultiNodeScalars.end())
	continue;
	VTE = *It->getSecond().begin();
	// Iterate through all vectorized nodes.
	auto MIt = find_if(It->getSecond(), [](const TreeEntry MTE) {
	return MTE->State == TreeEntry::Vectorize;
	});
	if (MIt == It->getSecond().end())
	continue;
	VTE = *MIt;
	}
	}
	Instruction &LastBundleInst = getLastInstructionInBundle(VTE);
	if (&LastBundleInst == TEInsertPt \|\| !CheckOrdering(&LastBundleInst))
	continue;
	VToTEs.insert(VTE);
	}
	if (VToTEs.empty())
	continue;
	if (UsedTEs.empty()) {
	// The first iteration, just insert the list of nodes to vector.
	UsedTEs.push_back(VToTEs);
	UsedValuesEntry.try_emplace(V, 0);
	} else {
	// Need to check if there are any previously used tree nodes which use V.
	// If there are no such nodes, consider that we have another one input
	// vector.
	SmallPtrSet<const TreeEntry *, 4> SavedVToTEs(VToTEs);
	unsigned Idx = 0;
	for (SmallPtrSet<const TreeEntry *, 4> &Set : UsedTEs) {
	// Do we have a non-empty intersection of previously listed tree entries
	// and tree entries using current V?
	set_intersect(VToTEs, Set);
	if (!VToTEs.empty()) {
	// Yes, write the new subset and continue analysis for the next
	// scalar.
	Set.swap(VToTEs);
	break;
	}
	VToTEs = SavedVToTEs;
	++Idx;
	}
	// No non-empty intersection found - need to add a second set of possible
	// source vectors.
	if (Idx == UsedTEs.size()) {
	// If the number of input vectors is greater than 2 - not a permutation,
	// fallback to the regular gather.
	// TODO: support multiple reshuffled nodes.
	if (UsedTEs.size() == 2)
	continue;
	UsedTEs.push_back(SavedVToTEs);
	Idx = UsedTEs.size() - 1;
	}
	UsedValuesEntry.try_emplace(V, Idx);
	}
	}

	if (UsedTEs.empty()) {
	Entries.clear();
	return std::nullopt;
	}

	unsigned VF = 0;
	if (UsedTEs.size() == 1) {
	// Keep the order to avoid non-determinism.
	SmallVector<const TreeEntry *> FirstEntries(UsedTEs.front().begin(),
	UsedTEs.front().end());
	sort(FirstEntries, [](const TreeEntry TE1, const TreeEntry TE2) {
	return TE1->Idx < TE2->Idx;
	});
	// Try to find the perfect match in another gather node at first.
	auto It = find_if(FirstEntries, [=](const TreeEntry EntryPtr) {
	return EntryPtr->isSame(VL) \|\| EntryPtr->isSame(TE->Scalars);
	});
	if (It != FirstEntries.end() &&
	((*It)->getVectorFactor() == VL.size() \|\|
	((*It)->getVectorFactor() == TE->Scalars.size() &&
	TE->ReuseShuffleIndices.size() == VL.size() &&
	(*It)->isSame(TE->Scalars)))) {
	Entries.push_back(*It);
	if ((*It)->getVectorFactor() == VL.size()) {
	std::iota(std::next(Mask.begin(), Part * VL.size()),
	std::next(Mask.begin(), (Part + 1) * VL.size()), 0);
	} else {
	SmallVector<int> CommonMask = TE->getCommonMask();
	copy(CommonMask, Mask.begin());
	}
	// Clear undef scalars.
	for (int I = 0, Sz = VL.size(); I < Sz; ++I)
	if (isa<PoisonValue>(VL[I]))
	Mask[I] = PoisonMaskElem;
	return TargetTransformInfo::SK_PermuteSingleSrc;
	}
	// No perfect match, just shuffle, so choose the first tree node from the
	// tree.
	Entries.push_back(FirstEntries.front());
	} else {
	// Try to find nodes with the same vector factor.
	assert(UsedTEs.size() == 2 && "Expected at max 2 permuted entries.");
	// Keep the order of tree nodes to avoid non-determinism.
	DenseMap<int, const TreeEntry *> VFToTE;
	for (const TreeEntry *TE : UsedTEs.front()) {
	unsigned VF = TE->getVectorFactor();
	auto It = VFToTE.find(VF);
	if (It != VFToTE.end()) {
	if (It->second->Idx > TE->Idx)
	It->getSecond() = TE;
	continue;
	}
	VFToTE.try_emplace(VF, TE);
	}
	// Same, keep the order to avoid non-determinism.
	SmallVector<const TreeEntry *> SecondEntries(UsedTEs.back().begin(),
	UsedTEs.back().end());
	sort(SecondEntries, [](const TreeEntry TE1, const TreeEntry TE2) {
	return TE1->Idx < TE2->Idx;
	});
	for (const TreeEntry *TE : SecondEntries) {
	auto It = VFToTE.find(TE->getVectorFactor());
	if (It != VFToTE.end()) {
	VF = It->first;
	Entries.push_back(It->second);
	Entries.push_back(TE);
	break;
	}
	}
	// No 2 source vectors with the same vector factor - just choose 2 with max
	// index.
	if (Entries.empty()) {
	Entries.push_back(*llvm::max_element(
	UsedTEs.front(), [](const TreeEntry TE1, const TreeEntry TE2) {
	return TE1->Idx < TE2->Idx;
	}));
	Entries.push_back(SecondEntries.front());
	VF = std::max(Entries.front()->getVectorFactor(),
	Entries.back()->getVectorFactor());
	}
	}

	bool IsSplatOrUndefs = isSplat(VL) \|\| all_of(VL, IsaPred<UndefValue>);
	// Checks if the 2 PHIs are compatible in terms of high possibility to be
	// vectorized.
	auto AreCompatiblePHIs = [&](Value V, Value V1) {
	auto *PHI = cast<PHINode>(V);
	auto *PHI1 = cast<PHINode>(V1);
	// Check that all incoming values are compatible/from same parent (if they
	// are instructions).
	// The incoming values are compatible if they all are constants, or
	// instruction with the same/alternate opcodes from the same basic block.
	for (int I = 0, E = PHI->getNumIncomingValues(); I < E; ++I) {
	Value *In = PHI->getIncomingValue(I);
	Value *In1 = PHI1->getIncomingValue(I);
	if (isConstant(In) && isConstant(In1))
	continue;
	if (!getSameOpcode({In, In1}, *TLI).getOpcode())
	return false;
	if (cast<Instruction>(In)->getParent() !=
	cast<Instruction>(In1)->getParent())
	return false;
	}
	return true;
	};
	// Check if the value can be ignored during analysis for shuffled gathers.
	// We suppose it is better to ignore instruction, which do not form splats,
	// are not vectorized/not extractelements (these instructions will be handled
	// by extractelements processing) or may form vector node in future.
	auto MightBeIgnored = [=](Value *V) {
	auto *I = dyn_cast<Instruction>(V);
	return I && !IsSplatOrUndefs && !ScalarToTreeEntry.count(I) &&
	!isVectorLikeInstWithConstOps(I) &&
	!areAllUsersVectorized(I, UserIgnoreList) && isSimple(I);
	};
	// Check that the neighbor instruction may form a full vector node with the
	// current instruction V. It is possible, if they have same/alternate opcode
	// and same parent basic block.
	auto NeighborMightBeIgnored = [&](Value *V, int Idx) {
	Value *V1 = VL[Idx];
	bool UsedInSameVTE = false;
	auto It = UsedValuesEntry.find(V1);
	if (It != UsedValuesEntry.end())
	UsedInSameVTE = It->second == UsedValuesEntry.find(V)->second;
	return V != V1 && MightBeIgnored(V1) && !UsedInSameVTE &&
	getSameOpcode({V, V1}, *TLI).getOpcode() &&
	cast<Instruction>(V)->getParent() ==
	cast<Instruction>(V1)->getParent() &&
	(!isa<PHINode>(V1) \|\| AreCompatiblePHIs(V, V1));
	};
	// Build a shuffle mask for better cost estimation and vector emission.
	SmallBitVector UsedIdxs(Entries.size());
	SmallVector<std::pair<unsigned, int>> EntryLanes;
	for (int I = 0, E = VL.size(); I < E; ++I) {
	Value *V = VL[I];
	auto It = UsedValuesEntry.find(V);
	if (It == UsedValuesEntry.end())
	continue;
	// Do not try to shuffle scalars, if they are constants, or instructions
	// that can be vectorized as a result of the following vector build
	// vectorization.
	if (isConstant(V) \|\| (MightBeIgnored(V) &&
	((I > 0 && NeighborMightBeIgnored(V, I - 1)) \|\|
	(I != E - 1 && NeighborMightBeIgnored(V, I + 1)))))
	continue;
	unsigned Idx = It->second;
	EntryLanes.emplace_back(Idx, I);
	UsedIdxs.set(Idx);
	}
	// Iterate through all shuffled scalars and select entries, which can be used
	// for final shuffle.
	SmallVector<const TreeEntry *> TempEntries;
	for (unsigned I = 0, Sz = Entries.size(); I < Sz; ++I) {
	if (!UsedIdxs.test(I))
	continue;
	// Fix the entry number for the given scalar. If it is the first entry, set
	// Pair.first to 0, otherwise to 1 (currently select at max 2 nodes).
	// These indices are used when calculating final shuffle mask as the vector
	// offset.
	for (std::pair<unsigned, int> &Pair : EntryLanes)
	if (Pair.first == I)
	Pair.first = TempEntries.size();
	TempEntries.push_back(Entries[I]);
	}
	Entries.swap(TempEntries);
	if (EntryLanes.size() == Entries.size() &&
	!VL.equals(ArrayRef(TE->Scalars)
	.slice(Part * VL.size(),
	std::min<int>(VL.size(), TE->Scalars.size())))) {
	// We may have here 1 or 2 entries only. If the number of scalars is equal
	// to the number of entries, no need to do the analysis, it is not very
	// profitable. Since VL is not the same as TE->Scalars, it means we already
	// have some shuffles before. Cut off not profitable case.
	Entries.clear();
	return std::nullopt;
	}
	// Build the final mask, check for the identity shuffle, if possible.
	bool IsIdentity = Entries.size() == 1;
	// Pair.first is the offset to the vector, while Pair.second is the index of
	// scalar in the list.
	for (const std::pair<unsigned, int> &Pair : EntryLanes) {
	unsigned Idx = Part * VL.size() + Pair.second;
	Mask[Idx] =
	Pair.first * VF +
	(ForOrder ? std::distance(
	Entries[Pair.first]->Scalars.begin(),
	find(Entries[Pair.first]->Scalars, VL[Pair.second]))
	: Entries[Pair.first]->findLaneForValue(VL[Pair.second]));
	IsIdentity &= Mask[Idx] == Pair.second;
	}
	switch (Entries.size()) {
	case 1:
	if (IsIdentity \|\| EntryLanes.size() > 1 \|\| VL.size() <= 2)
	return TargetTransformInfo::SK_PermuteSingleSrc;
	break;
	case 2:
	if (EntryLanes.size() > 2 \|\| VL.size() <= 2)
	return TargetTransformInfo::SK_PermuteTwoSrc;
	break;
	default:
	break;
	}
	Entries.clear();
	// Clear the corresponding mask elements.
	std::fill(std::next(Mask.begin(), Part * VL.size()),
	std::next(Mask.begin(), (Part + 1) * VL.size()), PoisonMaskElem);
	return std::nullopt;
	}

	SmallVector<std::optional<TargetTransformInfo::ShuffleKind>>
	BoUpSLP::isGatherShuffledEntry(
	const TreeEntry TE, ArrayRef<Value > VL, SmallVectorImpl<int> &Mask,
	SmallVectorImpl<SmallVector<const TreeEntry *>> &Entries, unsigned NumParts,
	bool ForOrder) {
	assert(NumParts > 0 && NumParts < VL.size() &&
	"Expected positive number of registers.");
	Entries.clear();
	// No need to check for the topmost gather node.
	if (TE == VectorizableTree.front().get())
	return {};
	// FIXME: Gathering for non-power-of-2 nodes not implemented yet.
	if (TE->isNonPowOf2Vec())
	return {};
	Mask.assign(VL.size(), PoisonMaskElem);
	assert(TE->UserTreeIndices.size() == 1 &&
	"Expected only single user of the gather node.");
	assert(VL.size() % NumParts == 0 &&
	"Number of scalars must be divisible by NumParts.");
	unsigned SliceSize = getPartNumElems(VL.size(), NumParts);
	SmallVector<std::optional<TTI::ShuffleKind>> Res;
	for (unsigned Part : seq<unsigned>(NumParts)) {
	ArrayRef<Value *> SubVL =
	VL.slice(Part * SliceSize, getNumElems(VL.size(), SliceSize, Part));
	SmallVectorImpl<const TreeEntry *> &SubEntries = Entries.emplace_back();
	std::optional<TTI::ShuffleKind> SubRes =
	isGatherShuffledSingleRegisterEntry(TE, SubVL, Mask, SubEntries, Part,
	ForOrder);
	if (!SubRes)
	SubEntries.clear();
	Res.push_back(SubRes);
	if (SubEntries.size() == 1 && *SubRes == TTI::SK_PermuteSingleSrc &&
	SubEntries.front()->getVectorFactor() == VL.size() &&
	(SubEntries.front()->isSame(TE->Scalars) \|\|
	SubEntries.front()->isSame(VL))) {
	SmallVector<const TreeEntry *> LocalSubEntries;
	LocalSubEntries.swap(SubEntries);
	Entries.clear();
	Res.clear();
	std::iota(Mask.begin(), Mask.end(), 0);
	// Clear undef scalars.
	for (int I = 0, Sz = VL.size(); I < Sz; ++I)
	if (isa<PoisonValue>(VL[I]))
	Mask[I] = PoisonMaskElem;
	Entries.emplace_back(1, LocalSubEntries.front());
	Res.push_back(TargetTransformInfo::SK_PermuteSingleSrc);
	return Res;
	}
	}
	if (all_of(Res,
	[](const std::optional<TTI::ShuffleKind> &SK) { return !SK; })) {
	Entries.clear();
	return {};
	}
	return Res;
	}

	InstructionCost BoUpSLP::getGatherCost(ArrayRef<Value *> VL, bool ForPoisonSrc,
	Type *ScalarTy) const {
	auto *VecTy = getWidenedType(ScalarTy, VL.size());
	bool DuplicateNonConst = false;
	// Find the cost of inserting/extracting values from the vector.
	// Check if the same elements are inserted several times and count them as
	// shuffle candidates.
	APInt ShuffledElements = APInt::getZero(VL.size());
	DenseMap<Value *, unsigned> UniqueElements;
	constexpr TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
	InstructionCost Cost;
	auto EstimateInsertCost = [&](unsigned I, Value *V) {
	if (V->getType() != ScalarTy) {
	Cost += TTI->getCastInstrCost(Instruction::Trunc, ScalarTy, V->getType(),
	TTI::CastContextHint::None, CostKind);
	V = nullptr;
	}
	if (!ForPoisonSrc)
	Cost +=
	TTI->getVectorInstrCost(Instruction::InsertElement, VecTy, CostKind,
	I, Constant::getNullValue(VecTy), V);
	};
	SmallVector<int> ShuffleMask(VL.size(), PoisonMaskElem);
	for (unsigned I = 0, E = VL.size(); I < E; ++I) {
	Value *V = VL[I];
	// No need to shuffle duplicates for constants.
	if ((ForPoisonSrc && isConstant(V)) \|\| isa<UndefValue>(V)) {
	ShuffledElements.setBit(I);
	ShuffleMask[I] = isa<PoisonValue>(V) ? PoisonMaskElem : I;
	continue;
	}

	auto Res = UniqueElements.try_emplace(V, I);
	if (Res.second) {
	EstimateInsertCost(I, V);
	ShuffleMask[I] = I;
	continue;
	}

	DuplicateNonConst = true;
	ShuffledElements.setBit(I);
	ShuffleMask[I] = Res.first->second;
	}
	if (ForPoisonSrc)
	Cost =
	TTI->getScalarizationOverhead(VecTy, ~ShuffledElements, /Insert/ true,
	/Extract/ false, CostKind);
	if (DuplicateNonConst)
	Cost += TTI->getShuffleCost(TargetTransformInfo::SK_PermuteSingleSrc,
	VecTy, ShuffleMask);
	return Cost;
	}

	// Perform operand reordering on the instructions in VL and return the reordered
	// operands in Left and Right.
	void BoUpSLP::reorderInputsAccordingToOpcode(ArrayRef<Value *> VL,
	SmallVectorImpl<Value *> &Left,
	SmallVectorImpl<Value *> &Right,
	const BoUpSLP &R) {
	if (VL.empty())
	return;
	VLOperands Ops(VL, R);
	// Reorder the operands in place.
	Ops.reorder();
	Left = Ops.getVL(0);
	Right = Ops.getVL(1);
	}

	Instruction &BoUpSLP::getLastInstructionInBundle(const TreeEntry *E) {
	auto &Res = EntryToLastInstruction.FindAndConstruct(E);
	if (Res.second)
	return *Res.second;
	// Get the basic block this bundle is in. All instructions in the bundle
	// should be in this block (except for extractelement-like instructions with
	// constant indeces).
	auto *Front = E->getMainOp();
	auto *BB = Front->getParent();
	assert(llvm::all_of(E->Scalars, [=](Value *V) -> bool {
	if (E->getOpcode() == Instruction::GetElementPtr &&
	!isa<GetElementPtrInst>(V))
	return true;
	auto *I = cast<Instruction>(V);
	return !E->isOpcodeOrAlt(I) \|\| I->getParent() == BB \|\|
	isVectorLikeInstWithConstOps(I);
	}));

	auto FindLastInst = [&]() {
	Instruction *LastInst = Front;
	for (Value *V : E->Scalars) {
	auto *I = dyn_cast<Instruction>(V);
	if (!I)
	continue;
	if (LastInst->getParent() == I->getParent()) {
	if (LastInst->comesBefore(I))
	LastInst = I;
	continue;
	}
	assert(((E->getOpcode() == Instruction::GetElementPtr &&
	!isa<GetElementPtrInst>(I)) \|\|
	(isVectorLikeInstWithConstOps(LastInst) &&
	isVectorLikeInstWithConstOps(I))) &&
	"Expected vector-like or non-GEP in GEP node insts only.");
	if (!DT->isReachableFromEntry(LastInst->getParent())) {
	LastInst = I;
	continue;
	}
	if (!DT->isReachableFromEntry(I->getParent()))
	continue;
	auto *NodeA = DT->getNode(LastInst->getParent());
	auto *NodeB = DT->getNode(I->getParent());
	assert(NodeA && "Should only process reachable instructions");
	assert(NodeB && "Should only process reachable instructions");
	assert((NodeA == NodeB) ==
	(NodeA->getDFSNumIn() == NodeB->getDFSNumIn()) &&
	"Different nodes should have different DFS numbers");
	if (NodeA->getDFSNumIn() < NodeB->getDFSNumIn())
	LastInst = I;
	}
	BB = LastInst->getParent();
	return LastInst;
	};

	auto FindFirstInst = [&]() {
	Instruction *FirstInst = Front;
	for (Value *V : E->Scalars) {
	auto *I = dyn_cast<Instruction>(V);
	if (!I)
	continue;
	if (FirstInst->getParent() == I->getParent()) {
	if (I->comesBefore(FirstInst))
	FirstInst = I;
	continue;
	}
	assert(((E->getOpcode() == Instruction::GetElementPtr &&
	!isa<GetElementPtrInst>(I)) \|\|
	(isVectorLikeInstWithConstOps(FirstInst) &&
	isVectorLikeInstWithConstOps(I))) &&
	"Expected vector-like or non-GEP in GEP node insts only.");
	if (!DT->isReachableFromEntry(FirstInst->getParent())) {
	FirstInst = I;
	continue;
	}
	if (!DT->isReachableFromEntry(I->getParent()))
	continue;
	auto *NodeA = DT->getNode(FirstInst->getParent());
	auto *NodeB = DT->getNode(I->getParent());
	assert(NodeA && "Should only process reachable instructions");
	assert(NodeB && "Should only process reachable instructions");
	assert((NodeA == NodeB) ==
	(NodeA->getDFSNumIn() == NodeB->getDFSNumIn()) &&
	"Different nodes should have different DFS numbers");
	if (NodeA->getDFSNumIn() > NodeB->getDFSNumIn())
	FirstInst = I;
	}
	return FirstInst;
	};

	// Set the insert point to the beginning of the basic block if the entry
	// should not be scheduled.
	if (doesNotNeedToSchedule(E->Scalars) \|\|
	(!E->isGather() && all_of(E->Scalars, isVectorLikeInstWithConstOps))) {
	if ((E->getOpcode() == Instruction::GetElementPtr &&
	any_of(E->Scalars,
	[](Value *V) {
	return !isa<GetElementPtrInst>(V) && isa<Instruction>(V);
	})) \|\|
	all_of(E->Scalars,
	[](Value *V) {
	return !isVectorLikeInstWithConstOps(V) &&
	isUsedOutsideBlock(V);
	}) \|\|
	(E->isGather() && E->Idx == 0 && all_of(E->Scalars, [](Value *V) {
	return isa<ExtractElementInst, UndefValue>(V) \|\|
	areAllOperandsNonInsts(V);
	})))
	Res.second = FindLastInst();
	else
	Res.second = FindFirstInst();
	return *Res.second;
	}

	// Find the last instruction. The common case should be that BB has been
	// scheduled, and the last instruction is VL.back(). So we start with
	// VL.back() and iterate over schedule data until we reach the end of the
	// bundle. The end of the bundle is marked by null ScheduleData.
	if (BlocksSchedules.count(BB)) {
	Value *V = E->isOneOf(E->Scalars.back());
	if (doesNotNeedToBeScheduled(V))
	V = *find_if_not(E->Scalars, doesNotNeedToBeScheduled);
	auto *Bundle = BlocksSchedules[BB]->getScheduleData(V);
	if (Bundle && Bundle->isPartOfBundle())
	for (; Bundle; Bundle = Bundle->NextInBundle)
	if (Bundle->OpValue == Bundle->Inst)
	Res.second = Bundle->Inst;
	}

	// LastInst can still be null at this point if there's either not an entry
	// for BB in BlocksSchedules or there's no ScheduleData available for
	// VL.back(). This can be the case if buildTree_rec aborts for various
	// reasons (e.g., the maximum recursion depth is reached, the maximum region
	// size is reached, etc.). ScheduleData is initialized in the scheduling
	// "dry-run".
	//
	// If this happens, we can still find the last instruction by brute force. We
	// iterate forwards from Front (inclusive) until we either see all
	// instructions in the bundle or reach the end of the block. If Front is the
	// last instruction in program order, LastInst will be set to Front, and we
	// will visit all the remaining instructions in the block.
	//
	// One of the reasons we exit early from buildTree_rec is to place an upper
	// bound on compile-time. Thus, taking an additional compile-time hit here is
	// not ideal. However, this should be exceedingly rare since it requires that
	// we both exit early from buildTree_rec and that the bundle be out-of-order
	// (causing us to iterate all the way to the end of the block).
	if (!Res.second)
	Res.second = FindLastInst();
	assert(Res.second && "Failed to find last instruction in bundle");
	return *Res.second;
	}

	void BoUpSLP::setInsertPointAfterBundle(const TreeEntry *E) {
	auto *Front = E->getMainOp();
	Instruction *LastInst = &getLastInstructionInBundle(E);
	assert(LastInst && "Failed to find last instruction in bundle");
	BasicBlock::iterator LastInstIt = LastInst->getIterator();
	// If the instruction is PHI, set the insert point after all the PHIs.
	bool IsPHI = isa<PHINode>(LastInst);
	if (IsPHI)
	LastInstIt = LastInst->getParent()->getFirstNonPHIIt();
	if (IsPHI \|\| (!E->isGather() && doesNotNeedToSchedule(E->Scalars))) {
	Builder.SetInsertPoint(LastInst->getParent(), LastInstIt);
	} else {
	// Set the insertion point after the last instruction in the bundle. Set the
	// debug location to Front.
	Builder.SetInsertPoint(
	LastInst->getParent(),
	LastInst->getNextNonDebugInstruction()->getIterator());
	}
	Builder.SetCurrentDebugLocation(Front->getDebugLoc());
	}

	Value BoUpSLP::gather(ArrayRef<Value > VL, Value Root, Type ScalarTy) {
	// List of instructions/lanes from current block and/or the blocks which are
	// part of the current loop. These instructions will be inserted at the end to
	// make it possible to optimize loops and hoist invariant instructions out of
	// the loops body with better chances for success.
	SmallVector<std::pair<Value *, unsigned>, 4> PostponedInsts;
	SmallSet<int, 4> PostponedIndices;
	Loop *L = LI->getLoopFor(Builder.GetInsertBlock());
	auto &&CheckPredecessor = [](BasicBlock InstBB, BasicBlock InsertBB) {
	SmallPtrSet<BasicBlock *, 4> Visited;
	while (InsertBB && InsertBB != InstBB && Visited.insert(InsertBB).second)
	InsertBB = InsertBB->getSinglePredecessor();
	return InsertBB && InsertBB == InstBB;
	};
	for (int I = 0, E = VL.size(); I < E; ++I) {
	if (auto *Inst = dyn_cast<Instruction>(VL[I]))
	if ((CheckPredecessor(Inst->getParent(), Builder.GetInsertBlock()) \|\|
	getTreeEntry(Inst) \|\|
	(L && (!Root \|\| L->isLoopInvariant(Root)) && L->contains(Inst))) &&
	PostponedIndices.insert(I).second)
	PostponedInsts.emplace_back(Inst, I);
	}

	auto &&CreateInsertElement = [this](Value Vec, Value V, unsigned Pos,
	Type *Ty) {
	Value *Scalar = V;
	if (Scalar->getType() != Ty) {
	assert(Scalar->getType()->isIntegerTy() && Ty->isIntegerTy() &&
	"Expected integer types only.");
	Value *V = Scalar;
	if (auto *CI = dyn_cast<CastInst>(Scalar);
	isa_and_nonnull<SExtInst, ZExtInst>(CI)) {
	Value *Op = CI->getOperand(0);
	if (auto *IOp = dyn_cast<Instruction>(Op);
	!IOp \|\| !(isDeleted(IOp) \|\| getTreeEntry(IOp)))
	V = Op;
	}
	Scalar = Builder.CreateIntCast(
	V, Ty, !isKnownNonNegative(Scalar, SimplifyQuery(*DL)));
	}

	Vec = Builder.CreateInsertElement(Vec, Scalar, Builder.getInt32(Pos));
	auto *InsElt = dyn_cast<InsertElementInst>(Vec);
	if (!InsElt)
	return Vec;
	GatherShuffleExtractSeq.insert(InsElt);
	CSEBlocks.insert(InsElt->getParent());
	// Add to our 'need-to-extract' list.
	if (isa<Instruction>(V)) {
	if (TreeEntry *Entry = getTreeEntry(V)) {
	// Find which lane we need to extract.
	User *UserOp = nullptr;
	if (Scalar != V) {
	if (auto *SI = dyn_cast<Instruction>(Scalar))
	UserOp = SI;
	} else {
	UserOp = InsElt;
	}
	if (UserOp) {
	unsigned FoundLane = Entry->findLaneForValue(V);
	ExternalUses.emplace_back(V, UserOp, FoundLane);
	}
	}
	}
	return Vec;
	};
	auto *VecTy = getWidenedType(ScalarTy, VL.size());
	Value *Vec = Root ? Root : PoisonValue::get(VecTy);
	SmallVector<int> NonConsts;
	// Insert constant values at first.
	for (int I = 0, E = VL.size(); I < E; ++I) {
	if (PostponedIndices.contains(I))
	continue;
	if (!isConstant(VL[I])) {
	NonConsts.push_back(I);
	continue;
	}
	if (Root) {
	if (!isa<UndefValue>(VL[I])) {
	NonConsts.push_back(I);
	continue;
	}
	if (isa<PoisonValue>(VL[I]))
	continue;
	if (auto *SV = dyn_cast<ShuffleVectorInst>(Root)) {
	if (SV->getMaskValue(I) == PoisonMaskElem)
	continue;
	}
	}
	Vec = CreateInsertElement(Vec, VL[I], I, ScalarTy);
	}
	// Insert non-constant values.
	for (int I : NonConsts)
	Vec = CreateInsertElement(Vec, VL[I], I, ScalarTy);
	// Append instructions, which are/may be part of the loop, in the end to make
	// it possible to hoist non-loop-based instructions.
	for (const std::pair<Value *, unsigned> &Pair : PostponedInsts)
	Vec = CreateInsertElement(Vec, Pair.first, Pair.second, ScalarTy);

	return Vec;
	}

	/// Merges shuffle masks and emits final shuffle instruction, if required. It
	/// supports shuffling of 2 input vectors. It implements lazy shuffles emission,
	/// when the actual shuffle instruction is generated only if this is actually
	/// required. Otherwise, the shuffle instruction emission is delayed till the
	/// end of the process, to reduce the number of emitted instructions and further
	/// analysis/transformations.
	/// The class also will look through the previously emitted shuffle instructions
	/// and properly mark indices in mask as undef.
	/// For example, given the code
	/// \code
	/// %s1 = shufflevector <2 x ty> %0, poison, <1, 0>
	/// %s2 = shufflevector <2 x ty> %1, poison, <1, 0>
	/// \endcode
	/// and if need to emit shuffle of %s1 and %s2 with mask <1, 0, 3, 2>, it will
	/// look through %s1 and %s2 and emit
	/// \code
	/// %res = shufflevector <2 x ty> %0, %1, <0, 1, 2, 3>
	/// \endcode
	/// instead.
	/// If 2 operands are of different size, the smallest one will be resized and
	/// the mask recalculated properly.
	/// For example, given the code
	/// \code
	/// %s1 = shufflevector <2 x ty> %0, poison, <1, 0, 1, 0>
	/// %s2 = shufflevector <2 x ty> %1, poison, <1, 0, 1, 0>
	/// \endcode
	/// and if need to emit shuffle of %s1 and %s2 with mask <1, 0, 5, 4>, it will
	/// look through %s1 and %s2 and emit
	/// \code
	/// %res = shufflevector <2 x ty> %0, %1, <0, 1, 2, 3>
	/// \endcode
	/// instead.
	class BoUpSLP::ShuffleInstructionBuilder final : public BaseShuffleAnalysis {
	bool IsFinalized = false;
	/// Combined mask for all applied operands and masks. It is built during
	/// analysis and actual emission of shuffle vector instructions.
	SmallVector<int> CommonMask;
	/// List of operands for the shuffle vector instruction. It hold at max 2
	/// operands, if the 3rd is going to be added, the first 2 are combined into
	/// shuffle with \p CommonMask mask, the first operand sets to be the
	/// resulting shuffle and the second operand sets to be the newly added
	/// operand. The \p CommonMask is transformed in the proper way after that.
	SmallVector<Value *, 2> InVectors;
	Type *ScalarTy = nullptr;
	IRBuilderBase &Builder;
	BoUpSLP &R;

	class ShuffleIRBuilder {
	IRBuilderBase &Builder;
	/// Holds all of the instructions that we gathered.
	SetVector<Instruction *> &GatherShuffleExtractSeq;
	/// A list of blocks that we are going to CSE.
	DenseSet<BasicBlock *> &CSEBlocks;
	/// Data layout.
	const DataLayout &DL;

	public:
	ShuffleIRBuilder(IRBuilderBase &Builder,
	SetVector<Instruction *> &GatherShuffleExtractSeq,
	DenseSet<BasicBlock *> &CSEBlocks, const DataLayout &DL)
	: Builder(Builder), GatherShuffleExtractSeq(GatherShuffleExtractSeq),
	CSEBlocks(CSEBlocks), DL(DL) {}
	~ShuffleIRBuilder() = default;
	/// Creates shufflevector for the 2 operands with the given mask.
	Value createShuffleVector(Value V1, Value *V2, ArrayRef<int> Mask) {
	if (V1->getType() != V2->getType()) {
	assert(V1->getType()->isIntOrIntVectorTy() &&
	V1->getType()->isIntOrIntVectorTy() &&
	"Expected integer vector types only.");
	if (V1->getType() != V2->getType()) {
	if (cast<VectorType>(V2->getType())
	->getElementType()
	->getIntegerBitWidth() < cast<VectorType>(V1->getType())
	->getElementType()
	->getIntegerBitWidth())
	V2 = Builder.CreateIntCast(
	V2, V1->getType(), !isKnownNonNegative(V2, SimplifyQuery(DL)));
	else
	V1 = Builder.CreateIntCast(
	V1, V2->getType(), !isKnownNonNegative(V1, SimplifyQuery(DL)));
	}
	}
	Value *Vec = Builder.CreateShuffleVector(V1, V2, Mask);
	if (auto *I = dyn_cast<Instruction>(Vec)) {
	GatherShuffleExtractSeq.insert(I);
	CSEBlocks.insert(I->getParent());
	}
	return Vec;
	}
	/// Creates permutation of the single vector operand with the given mask, if
	/// it is not identity mask.
	Value createShuffleVector(Value V1, ArrayRef<int> Mask) {
	if (Mask.empty())
	return V1;
	unsigned VF = Mask.size();
	unsigned LocalVF = cast<FixedVectorType>(V1->getType())->getNumElements();
	if (VF == LocalVF && ShuffleVectorInst::isIdentityMask(Mask, VF))
	return V1;
	Value *Vec = Builder.CreateShuffleVector(V1, Mask);
	if (auto *I = dyn_cast<Instruction>(Vec)) {
	GatherShuffleExtractSeq.insert(I);
	CSEBlocks.insert(I->getParent());
	}
	return Vec;
	}
	Value createIdentity(Value V) { return V; }
	Value createPoison(Type Ty, unsigned VF) {
	return PoisonValue::get(getWidenedType(Ty, VF));
	}
	/// Resizes 2 input vector to match the sizes, if the they are not equal
	/// yet. The smallest vector is resized to the size of the larger vector.
	void resizeToMatch(Value &V1, Value &V2) {
	if (V1->getType() == V2->getType())
	return;
	int V1VF = cast<FixedVectorType>(V1->getType())->getNumElements();
	int V2VF = cast<FixedVectorType>(V2->getType())->getNumElements();
	int VF = std::max(V1VF, V2VF);
	int MinVF = std::min(V1VF, V2VF);
	SmallVector<int> IdentityMask(VF, PoisonMaskElem);
	std::iota(IdentityMask.begin(), std::next(IdentityMask.begin(), MinVF),
	0);
	Value *&Op = MinVF == V1VF ? V1 : V2;
	Op = Builder.CreateShuffleVector(Op, IdentityMask);
	if (auto *I = dyn_cast<Instruction>(Op)) {
	GatherShuffleExtractSeq.insert(I);
	CSEBlocks.insert(I->getParent());
	}
	if (MinVF == V1VF)
	V1 = Op;
	else
	V2 = Op;
	}
	};

	/// Smart shuffle instruction emission, walks through shuffles trees and
	/// tries to find the best matching vector for the actual shuffle
	/// instruction.
	Value createShuffle(Value V1, Value *V2, ArrayRef<int> Mask) {
	assert(V1 && "Expected at least one vector value.");
	ShuffleIRBuilder ShuffleBuilder(Builder, R.GatherShuffleExtractSeq,
	R.CSEBlocks, *R.DL);
	return BaseShuffleAnalysis::createShuffle<Value *>(V1, V2, Mask,
	ShuffleBuilder);
	}

	/// Transforms mask \p CommonMask per given \p Mask to make proper set after
	/// shuffle emission.
	static void transformMaskAfterShuffle(MutableArrayRef<int> CommonMask,
	ArrayRef<int> Mask) {
	for (unsigned Idx = 0, Sz = CommonMask.size(); Idx < Sz; ++Idx)
	if (Mask[Idx] != PoisonMaskElem)
	CommonMask[Idx] = Idx;
	}

	/// Cast value \p V to the vector type with the same number of elements, but
	/// the base type \p ScalarTy.
	Value castToScalarTyElem(Value V,
	std::optional<bool> IsSigned = std::nullopt) {
	auto *VecTy = cast<VectorType>(V->getType());
	assert(getNumElements(VecTy) % getNumElements(ScalarTy) == 0);
	if (VecTy->getElementType() == ScalarTy->getScalarType())
	return V;
	return Builder.CreateIntCast(
	V, VectorType::get(ScalarTy->getScalarType(), VecTy->getElementCount()),
	IsSigned.value_or(!isKnownNonNegative(V, SimplifyQuery(*R.DL))));
	}

	public:
	ShuffleInstructionBuilder(Type *ScalarTy, IRBuilderBase &Builder, BoUpSLP &R)
	: ScalarTy(ScalarTy), Builder(Builder), R(R) {}

	/// Adjusts extractelements after reusing them.
	Value adjustExtracts(const TreeEntry E, MutableArrayRef<int> Mask,
	ArrayRef<std::optional<TTI::ShuffleKind>> ShuffleKinds,
	unsigned NumParts, bool &UseVecBaseAsInput) {
	UseVecBaseAsInput = false;
	SmallPtrSet<Value *, 4> UniqueBases;
	Value *VecBase = nullptr;
	for (int I = 0, Sz = Mask.size(); I < Sz; ++I) {
	int Idx = Mask[I];
	if (Idx == PoisonMaskElem)
	continue;
	auto *EI = cast<ExtractElementInst>(E->Scalars[I]);
	VecBase = EI->getVectorOperand();
	if (const TreeEntry *TE = R.getTreeEntry(VecBase))
	VecBase = TE->VectorizedValue;
	assert(VecBase && "Expected vectorized value.");
	UniqueBases.insert(VecBase);
	// If the only one use is vectorized - can delete the extractelement
	// itself.
	if (!EI->hasOneUse() \|\| (NumParts != 1 && count(E->Scalars, EI) > 1) \|\|
	any_of(EI->users(), [&](User *U) {
	const TreeEntry *UTE = R.getTreeEntry(U);
	return !UTE \|\| R.MultiNodeScalars.contains(U) \|\|
	(isa<GetElementPtrInst>(U) &&
	!R.areAllUsersVectorized(cast<Instruction>(U))) \|\|
	count_if(R.VectorizableTree,
	[&](const std::unique_ptr<TreeEntry> &TE) {
	return any_of(TE->UserTreeIndices,
	[&](const EdgeInfo &Edge) {
	return Edge.UserTE == UTE;
	}) &&
	is_contained(TE->Scalars, EI);
	}) != 1;
	}))
	continue;
	R.eraseInstruction(EI);
	}
	if (NumParts == 1 \|\| UniqueBases.size() == 1) {
	assert(VecBase && "Expected vectorized value.");
	return castToScalarTyElem(VecBase);
	}
	UseVecBaseAsInput = true;
	auto TransformToIdentity = [](MutableArrayRef<int> Mask) {
	for (auto [I, Idx] : enumerate(Mask))
	if (Idx != PoisonMaskElem)
	Idx = I;
	};
	// Perform multi-register vector shuffle, joining them into a single virtual
	// long vector.
	// Need to shuffle each part independently and then insert all this parts
	// into a long virtual vector register, forming the original vector.
	Value *Vec = nullptr;
	SmallVector<int> VecMask(Mask.size(), PoisonMaskElem);
	unsigned SliceSize = getPartNumElems(E->Scalars.size(), NumParts);
	for (unsigned Part : seq<unsigned>(NumParts)) {
	unsigned Limit = getNumElems(E->Scalars.size(), SliceSize, Part);
	ArrayRef<Value *> VL =
	ArrayRef(E->Scalars).slice(Part * SliceSize, Limit);
	MutableArrayRef<int> SubMask = Mask.slice(Part * SliceSize, Limit);
	constexpr int MaxBases = 2;
	SmallVector<Value *, MaxBases> Bases(MaxBases);
	auto VLMask = zip(VL, SubMask);
	const unsigned VF = std::accumulate(
	VLMask.begin(), VLMask.end(), 0U, [&](unsigned S, const auto &D) {
	if (std::get<1>(D) == PoisonMaskElem)
	return S;
	Value *VecOp =
	cast<ExtractElementInst>(std::get<0>(D))->getVectorOperand();
	if (const TreeEntry *TE = R.getTreeEntry(VecOp))
	VecOp = TE->VectorizedValue;
	assert(VecOp && "Expected vectorized value.");
	const unsigned Size =
	cast<FixedVectorType>(VecOp->getType())->getNumElements();
	return std::max(S, Size);
	});
	for (const auto [V, I] : VLMask) {
	if (I == PoisonMaskElem)
	continue;
	Value *VecOp = cast<ExtractElementInst>(V)->getVectorOperand();
	if (const TreeEntry *TE = R.getTreeEntry(VecOp))
	VecOp = TE->VectorizedValue;
	assert(VecOp && "Expected vectorized value.");
	VecOp = castToScalarTyElem(VecOp);
	Bases[I / VF] = VecOp;
	}
	if (!Bases.front())
	continue;
	Value *SubVec;
	if (Bases.back()) {
	SubVec = createShuffle(Bases.front(), Bases.back(), SubMask);
	TransformToIdentity(SubMask);
	} else {
	SubVec = Bases.front();
	}
	if (!Vec) {
	Vec = SubVec;
	assert((Part == 0 \|\| all_of(seq<unsigned>(0, Part),
	[&](unsigned P) {
	ArrayRef<int> SubMask =
	Mask.slice(P * SliceSize,
	getNumElems(Mask.size(),
	SliceSize, P));
	return all_of(SubMask, [](int Idx) {
	return Idx == PoisonMaskElem;
	});
	})) &&
	"Expected first part or all previous parts masked.");
	copy(SubMask, std::next(VecMask.begin(), Part * SliceSize));
	} else {
	unsigned NewVF =
	cast<FixedVectorType>(Vec->getType())->getNumElements();
	if (Vec->getType() != SubVec->getType()) {
	unsigned SubVecVF =
	cast<FixedVectorType>(SubVec->getType())->getNumElements();
	NewVF = std::max(NewVF, SubVecVF);
	}
	// Adjust SubMask.
	for (int &Idx : SubMask)
	if (Idx != PoisonMaskElem)
	Idx += NewVF;
	copy(SubMask, std::next(VecMask.begin(), Part * SliceSize));
	Vec = createShuffle(Vec, SubVec, VecMask);
	TransformToIdentity(VecMask);
	}
	}
	copy(VecMask, Mask.begin());
	return Vec;
	}
	/// Checks if the specified entry \p E needs to be delayed because of its
	/// dependency nodes.
	std::optional<Value *>
	needToDelay(const TreeEntry *E,
	ArrayRef<SmallVector<const TreeEntry *>> Deps) const {
	// No need to delay emission if all deps are ready.
	if (all_of(Deps, [](ArrayRef<const TreeEntry *> TEs) {
	return all_of(
	TEs, [](const TreeEntry *TE) { return TE->VectorizedValue; });
	}))
	return std::nullopt;
	// Postpone gather emission, will be emitted after the end of the
	// process to keep correct order.
	auto *ResVecTy = getWidenedType(ScalarTy, E->getVectorFactor());
	return Builder.CreateAlignedLoad(
	ResVecTy,
	PoisonValue::get(PointerType::getUnqual(ScalarTy->getContext())),
	MaybeAlign());
	}
	/// Adds 2 input vectors (in form of tree entries) and the mask for their
	/// shuffling.
	void add(const TreeEntry &E1, const TreeEntry &E2, ArrayRef<int> Mask) {
	Value *V1 = E1.VectorizedValue;
	if (V1->getType()->isIntOrIntVectorTy())
	V1 = castToScalarTyElem(V1, any_of(E1.Scalars, [&](Value *V) {
	return !isKnownNonNegative(
	V, SimplifyQuery(*R.DL));
	}));
	Value *V2 = E2.VectorizedValue;
	if (V2->getType()->isIntOrIntVectorTy())
	V2 = castToScalarTyElem(V2, any_of(E2.Scalars, [&](Value *V) {
	return !isKnownNonNegative(
	V, SimplifyQuery(*R.DL));
	}));
	add(V1, V2, Mask);
	}
	/// Adds single input vector (in form of tree entry) and the mask for its
	/// shuffling.
	void add(const TreeEntry &E1, ArrayRef<int> Mask) {
	Value *V1 = E1.VectorizedValue;
	if (V1->getType()->isIntOrIntVectorTy())
	V1 = castToScalarTyElem(V1, any_of(E1.Scalars, [&](Value *V) {
	return !isKnownNonNegative(
	V, SimplifyQuery(*R.DL));
	}));
	add(V1, Mask);
	}
	/// Adds 2 input vectors and the mask for their shuffling.
	void add(Value V1, Value V2, ArrayRef<int> Mask) {
	assert(V1 && V2 && !Mask.empty() && "Expected non-empty input vectors.");
	V1 = castToScalarTyElem(V1);
	V2 = castToScalarTyElem(V2);
	if (InVectors.empty()) {
	InVectors.push_back(V1);
	InVectors.push_back(V2);
	CommonMask.assign(Mask.begin(), Mask.end());
	return;
	}
	Value *Vec = InVectors.front();
	if (InVectors.size() == 2) {
	Vec = createShuffle(Vec, InVectors.back(), CommonMask);
	transformMaskAfterShuffle(CommonMask, CommonMask);
	} else if (cast<FixedVectorType>(Vec->getType())->getNumElements() !=
	Mask.size()) {
	Vec = createShuffle(Vec, nullptr, CommonMask);
	transformMaskAfterShuffle(CommonMask, CommonMask);
	}
	V1 = createShuffle(V1, V2, Mask);
	for (unsigned Idx = 0, Sz = CommonMask.size(); Idx < Sz; ++Idx)
	if (Mask[Idx] != PoisonMaskElem)
	CommonMask[Idx] = Idx + Sz;
	InVectors.front() = Vec;
	if (InVectors.size() == 2)
	InVectors.back() = V1;
	else
	InVectors.push_back(V1);
	}
	/// Adds another one input vector and the mask for the shuffling.
	void add(Value *V1, ArrayRef<int> Mask, bool = false) {
	V1 = castToScalarTyElem(V1);
	if (InVectors.empty()) {
	if (!isa<FixedVectorType>(V1->getType())) {
	V1 = createShuffle(V1, nullptr, CommonMask);
	CommonMask.assign(Mask.size(), PoisonMaskElem);
	transformMaskAfterShuffle(CommonMask, Mask);
	}
	InVectors.push_back(V1);
	CommonMask.assign(Mask.begin(), Mask.end());
	return;
	}
	const auto *It = find(InVectors, V1);
	if (It == InVectors.end()) {
	if (InVectors.size() == 2 \|\|
	InVectors.front()->getType() != V1->getType() \|\|
	!isa<FixedVectorType>(V1->getType())) {
	Value *V = InVectors.front();
	if (InVectors.size() == 2) {
	V = createShuffle(InVectors.front(), InVectors.back(), CommonMask);
	transformMaskAfterShuffle(CommonMask, CommonMask);
	} else if (cast<FixedVectorType>(V->getType())->getNumElements() !=
	CommonMask.size()) {
	V = createShuffle(InVectors.front(), nullptr, CommonMask);
	transformMaskAfterShuffle(CommonMask, CommonMask);
	}
	for (unsigned Idx = 0, Sz = CommonMask.size(); Idx < Sz; ++Idx)
	if (CommonMask[Idx] == PoisonMaskElem && Mask[Idx] != PoisonMaskElem)
	CommonMask[Idx] =
	V->getType() != V1->getType()
	? Idx + Sz
	: Mask[Idx] + cast<FixedVectorType>(V1->getType())
	->getNumElements();
	if (V->getType() != V1->getType())
	V1 = createShuffle(V1, nullptr, Mask);
	InVectors.front() = V;
	if (InVectors.size() == 2)
	InVectors.back() = V1;
	else
	InVectors.push_back(V1);
	return;
	}
	// Check if second vector is required if the used elements are already
	// used from the first one.
	for (unsigned Idx = 0, Sz = CommonMask.size(); Idx < Sz; ++Idx)
	if (Mask[Idx] != PoisonMaskElem && CommonMask[Idx] == PoisonMaskElem) {
	InVectors.push_back(V1);
	break;
	}
	}
	int VF = CommonMask.size();
	if (auto *FTy = dyn_cast<FixedVectorType>(V1->getType()))
	VF = FTy->getNumElements();
	for (unsigned Idx = 0, Sz = CommonMask.size(); Idx < Sz; ++Idx)
	if (Mask[Idx] != PoisonMaskElem && CommonMask[Idx] == PoisonMaskElem)
	CommonMask[Idx] = Mask[Idx] + (It == InVectors.begin() ? 0 : VF);
	}
	/// Adds another one input vector and the mask for the shuffling.
	void addOrdered(Value *V1, ArrayRef<unsigned> Order) {
	SmallVector<int> NewMask;
	inversePermutation(Order, NewMask);
	add(V1, NewMask);
	}
	Value gather(ArrayRef<Value > VL, unsigned MaskVF = 0,
	Value *Root = nullptr) {
	return R.gather(VL, Root, ScalarTy);
	}
	Value createFreeze(Value V) { return Builder.CreateFreeze(V); }
	/// Finalize emission of the shuffles.
	/// \param Action the action (if any) to be performed before final applying of
	/// the \p ExtMask mask.
	Value *
	finalize(ArrayRef<int> ExtMask, unsigned VF = 0,
	function_ref<void(Value *&, SmallVectorImpl<int> &)> Action = {}) {
	IsFinalized = true;
	if (Action) {
	Value *Vec = InVectors.front();
	if (InVectors.size() == 2) {
	Vec = createShuffle(Vec, InVectors.back(), CommonMask);
	InVectors.pop_back();
	} else {
	Vec = createShuffle(Vec, nullptr, CommonMask);
	}
	for (unsigned Idx = 0, Sz = CommonMask.size(); Idx < Sz; ++Idx)
	if (CommonMask[Idx] != PoisonMaskElem)
	CommonMask[Idx] = Idx;
	assert(VF > 0 &&
	"Expected vector length for the final value before action.");
	unsigned VecVF = cast<FixedVectorType>(Vec->getType())->getNumElements();
	if (VecVF < VF) {
	SmallVector<int> ResizeMask(VF, PoisonMaskElem);
	std::iota(ResizeMask.begin(), std::next(ResizeMask.begin(), VecVF), 0);
	Vec = createShuffle(Vec, nullptr, ResizeMask);
	}
	Action(Vec, CommonMask);
	InVectors.front() = Vec;
	}
	if (!ExtMask.empty()) {
	if (CommonMask.empty()) {
	CommonMask.assign(ExtMask.begin(), ExtMask.end());
	} else {
	SmallVector<int> NewMask(ExtMask.size(), PoisonMaskElem);
	for (int I = 0, Sz = ExtMask.size(); I < Sz; ++I) {
	if (ExtMask[I] == PoisonMaskElem)
	continue;
	NewMask[I] = CommonMask[ExtMask[I]];
	}
	CommonMask.swap(NewMask);
	}
	}
	if (CommonMask.empty()) {
	assert(InVectors.size() == 1 && "Expected only one vector with no mask");
	return InVectors.front();
	}
	if (InVectors.size() == 2)
	return createShuffle(InVectors.front(), InVectors.back(), CommonMask);
	return createShuffle(InVectors.front(), nullptr, CommonMask);
	}

	~ShuffleInstructionBuilder() {
	assert((IsFinalized \|\| CommonMask.empty()) &&
	"Shuffle construction must be finalized.");
	}
	};

	Value BoUpSLP::vectorizeOperand(TreeEntry E, unsigned NodeIdx,
	bool PostponedPHIs) {
	ValueList &VL = E->getOperand(NodeIdx);
	const unsigned VF = VL.size();
	InstructionsState S = getSameOpcode(VL, *TLI);
	// Special processing for GEPs bundle, which may include non-gep values.
	if (!S.getOpcode() && VL.front()->getType()->isPointerTy()) {
	const auto *It = find_if(VL, IsaPred<GetElementPtrInst>);
	if (It != VL.end())
	S = getSameOpcode(It, TLI);
	}
	if (S.getOpcode()) {
	auto CheckSameVE = [&](const TreeEntry *VE) {
	return VE->isSame(VL) &&
	(any_of(VE->UserTreeIndices,
	[E, NodeIdx](const EdgeInfo &EI) {
	return EI.UserTE == E && EI.EdgeIdx == NodeIdx;
	}) \|\|
	any_of(VectorizableTree,
	[E, NodeIdx, VE](const std::unique_ptr<TreeEntry> &TE) {
	return TE->isOperandGatherNode({E, NodeIdx}) &&
	VE->isSame(TE->Scalars);
	}));
	};
	TreeEntry *VE = getTreeEntry(S.OpValue);
	bool IsSameVE = VE && CheckSameVE(VE);
	if (!IsSameVE) {
	auto It = MultiNodeScalars.find(S.OpValue);
	if (It != MultiNodeScalars.end()) {
	auto I = find_if(It->getSecond(), [&](const TreeEntry TE) {
	return TE != VE && CheckSameVE(TE);
	});
	if (I != It->getSecond().end()) {
	VE = *I;
	IsSameVE = true;
	}
	}
	}
	if (IsSameVE) {
	auto FinalShuffle = [&](Value *V, ArrayRef<int> Mask) {
	ShuffleInstructionBuilder ShuffleBuilder(
	cast<VectorType>(V->getType())->getElementType(), Builder, *this);
	ShuffleBuilder.add(V, Mask);
	return ShuffleBuilder.finalize(std::nullopt);
	};
	Value *V = vectorizeTree(VE, PostponedPHIs);
	if (VF * getNumElements(VL[0]->getType()) !=
	cast<FixedVectorType>(V->getType())->getNumElements()) {
	if (!VE->ReuseShuffleIndices.empty()) {
	// Reshuffle to get only unique values.
	// If some of the scalars are duplicated in the vectorization
	// tree entry, we do not vectorize them but instead generate a
	// mask for the reuses. But if there are several users of the
	// same entry, they may have different vectorization factors.
	// This is especially important for PHI nodes. In this case, we
	// need to adapt the resulting instruction for the user
	// vectorization factor and have to reshuffle it again to take
	// only unique elements of the vector. Without this code the
	// function incorrectly returns reduced vector instruction with
	// the same elements, not with the unique ones.

	// block:
	// %phi = phi <2 x > { .., %entry} {%shuffle, %block}
	// %2 = shuffle <2 x > %phi, poison, <4 x > <1, 1, 0, 0>
	// ... (use %2)
	// %shuffle = shuffle <2 x> %2, poison, <2 x> {2, 0}
	// br %block
	SmallVector<int> Mask(VF, PoisonMaskElem);
	for (auto [I, V] : enumerate(VL)) {
	if (isa<PoisonValue>(V))
	continue;
	Mask[I] = VE->findLaneForValue(V);
	}
	V = FinalShuffle(V, Mask);
	} else {
	assert(VF < cast<FixedVectorType>(V->getType())->getNumElements() &&
	"Expected vectorization factor less "
	"than original vector size.");
	SmallVector<int> UniformMask(VF, 0);
	std::iota(UniformMask.begin(), UniformMask.end(), 0);
	V = FinalShuffle(V, UniformMask);
	}
	}
	// Need to update the operand gather node, if actually the operand is not a
	// vectorized node, but the buildvector/gather node, which matches one of
	// the vectorized nodes.
	if (find_if(VE->UserTreeIndices, [&](const EdgeInfo &EI) {
	return EI.UserTE == E && EI.EdgeIdx == NodeIdx;
	}) == VE->UserTreeIndices.end()) {
	auto *It = find_if(
	VectorizableTree, [&](const std::unique_ptr<TreeEntry> &TE) {
	return TE->isGather() &&
	TE->UserTreeIndices.front().UserTE == E &&
	TE->UserTreeIndices.front().EdgeIdx == NodeIdx;
	});
	assert(It != VectorizableTree.end() && "Expected gather node operand.");
	(*It)->VectorizedValue = V;
	}
	return V;
	}
	}

	// Find the corresponding gather entry and vectorize it.
	// Allows to be more accurate with tree/graph transformations, checks for the
	// correctness of the transformations in many cases.
	auto *I = find_if(VectorizableTree,
	[E, NodeIdx](const std::unique_ptr<TreeEntry> &TE) {
	return TE->isOperandGatherNode({E, NodeIdx});
	});
	assert(I != VectorizableTree.end() && "Gather node is not in the graph.");
	assert(I->get()->UserTreeIndices.size() == 1 &&
	"Expected only single user for the gather node.");
	assert(I->get()->isSame(VL) && "Expected same list of scalars.");
	return vectorizeTree(I->get(), PostponedPHIs);
	}

	template <typename BVTy, typename ResTy, typename... Args>
	ResTy BoUpSLP::processBuildVector(const TreeEntry E, Type ScalarTy,
	Args &...Params) {
	assert(E->isGather() && "Expected gather node.");
	unsigned VF = E->getVectorFactor();

	bool NeedFreeze = false;
	SmallVector<int> ReuseShuffleIndices(E->ReuseShuffleIndices.begin(),
	E->ReuseShuffleIndices.end());
	SmallVector<Value *> GatheredScalars(E->Scalars.begin(), E->Scalars.end());
	// Build a mask out of the reorder indices and reorder scalars per this
	// mask.
	SmallVector<int> ReorderMask;
	inversePermutation(E->ReorderIndices, ReorderMask);
	if (!ReorderMask.empty())
	reorderScalars(GatheredScalars, ReorderMask);
	auto FindReusedSplat = [&](MutableArrayRef<int> Mask, unsigned InputVF,
	unsigned I, unsigned SliceSize) {
	if (!isSplat(E->Scalars) \|\| none_of(E->Scalars, [](Value *V) {
	return isa<UndefValue>(V) && !isa<PoisonValue>(V);
	}))
	return false;
	TreeEntry *UserTE = E->UserTreeIndices.back().UserTE;
	unsigned EdgeIdx = E->UserTreeIndices.back().EdgeIdx;
	if (UserTE->getNumOperands() != 2)
	return false;
	auto *It =
	find_if(VectorizableTree, [=](const std::unique_ptr<TreeEntry> &TE) {
	return find_if(TE->UserTreeIndices, [=](const EdgeInfo &EI) {
	return EI.UserTE == UserTE && EI.EdgeIdx != EdgeIdx;
	}) != TE->UserTreeIndices.end();
	});
	if (It == VectorizableTree.end())
	return false;
	int Idx;
	if ((Mask.size() < InputVF &&
	ShuffleVectorInst::isExtractSubvectorMask(Mask, InputVF, Idx) &&
	Idx == 0) \|\|
	(Mask.size() == InputVF &&
	ShuffleVectorInst::isIdentityMask(Mask, Mask.size()))) {
	std::iota(
	std::next(Mask.begin(), I * SliceSize),
	std::next(Mask.begin(),
	I * SliceSize + getNumElems(Mask.size(), SliceSize, I)),
	0);
	} else {
	unsigned IVal =
	*find_if_not(Mask, [](int Idx) { return Idx == PoisonMaskElem; });
	std::fill(
	std::next(Mask.begin(), I * SliceSize),
	std::next(Mask.begin(),
	I * SliceSize + getNumElems(Mask.size(), SliceSize, I)),
	IVal);
	}
	return true;
	};
	BVTy ShuffleBuilder(ScalarTy, Params...);
	ResTy Res = ResTy();
	SmallVector<int> Mask;
	SmallVector<int> ExtractMask(GatheredScalars.size(), PoisonMaskElem);
	SmallVector<std::optional<TTI::ShuffleKind>> ExtractShuffles;
	Value *ExtractVecBase = nullptr;
	bool UseVecBaseAsInput = false;
	SmallVector<std::optional<TargetTransformInfo::ShuffleKind>> GatherShuffles;
	SmallVector<SmallVector<const TreeEntry *>> Entries;
	Type *OrigScalarTy = GatheredScalars.front()->getType();
	auto *VecTy = getWidenedType(ScalarTy, GatheredScalars.size());
	unsigned NumParts = TTI->getNumberOfParts(VecTy);
	if (NumParts == 0 \|\| NumParts >= GatheredScalars.size())
	NumParts = 1;
	if (!all_of(GatheredScalars, IsaPred<UndefValue>)) {
	// Check for gathered extracts.
	bool Resized = false;
	ExtractShuffles =
	tryToGatherExtractElements(GatheredScalars, ExtractMask, NumParts);
	if (!ExtractShuffles.empty()) {
	SmallVector<const TreeEntry *> ExtractEntries;
	for (auto [Idx, I] : enumerate(ExtractMask)) {
	if (I == PoisonMaskElem)
	continue;
	if (const auto *TE = getTreeEntry(
	cast<ExtractElementInst>(E->Scalars[Idx])->getVectorOperand()))
	ExtractEntries.push_back(TE);
	}
	if (std::optional<ResTy> Delayed =
	ShuffleBuilder.needToDelay(E, ExtractEntries)) {
	// Delay emission of gathers which are not ready yet.
	PostponedGathers.insert(E);
	// Postpone gather emission, will be emitted after the end of the
	// process to keep correct order.
	return *Delayed;
	}
	if (Value *VecBase = ShuffleBuilder.adjustExtracts(
	E, ExtractMask, ExtractShuffles, NumParts, UseVecBaseAsInput)) {
	ExtractVecBase = VecBase;
	if (auto *VecBaseTy = dyn_cast<FixedVectorType>(VecBase->getType()))
	if (VF == VecBaseTy->getNumElements() &&
	GatheredScalars.size() != VF) {
	Resized = true;
	GatheredScalars.append(VF - GatheredScalars.size(),
	PoisonValue::get(OrigScalarTy));
	}
	}
	}
	// Gather extracts after we check for full matched gathers only.
	if (!ExtractShuffles.empty() \|\| E->getOpcode() != Instruction::Load \|\|
	E->isAltShuffle() \|\|
	all_of(E->Scalars, [this](Value *V) { return getTreeEntry(V); }) \|\|
	isSplat(E->Scalars) \|\|
	(E->Scalars != GatheredScalars && GatheredScalars.size() <= 2)) {
	GatherShuffles =
	isGatherShuffledEntry(E, GatheredScalars, Mask, Entries, NumParts);
	}
	if (!GatherShuffles.empty()) {
	if (std::optional<ResTy> Delayed =
	ShuffleBuilder.needToDelay(E, Entries)) {
	// Delay emission of gathers which are not ready yet.
	PostponedGathers.insert(E);
	// Postpone gather emission, will be emitted after the end of the
	// process to keep correct order.
	return *Delayed;
	}
	if (GatherShuffles.size() == 1 &&
	*GatherShuffles.front() == TTI::SK_PermuteSingleSrc &&
	Entries.front().front()->isSame(E->Scalars)) {
	// Perfect match in the graph, will reuse the previously vectorized
	// node. Cost is 0.
	LLVM_DEBUG(
	dbgs()
	<< "SLP: perfect diamond match for gather bundle "
	<< shortBundleName(E->Scalars) << ".\n");
	// Restore the mask for previous partially matched values.
	Mask.resize(E->Scalars.size());
	const TreeEntry *FrontTE = Entries.front().front();
	if (FrontTE->ReorderIndices.empty() &&
	((FrontTE->ReuseShuffleIndices.empty() &&
	E->Scalars.size() == FrontTE->Scalars.size()) \|\|
	(E->Scalars.size() == FrontTE->ReuseShuffleIndices.size()))) {
	std::iota(Mask.begin(), Mask.end(), 0);
	} else {
	for (auto [I, V] : enumerate(E->Scalars)) {
	if (isa<PoisonValue>(V)) {
	Mask[I] = PoisonMaskElem;
	continue;
	}
	Mask[I] = FrontTE->findLaneForValue(V);
	}
	}
	ShuffleBuilder.add(*FrontTE, Mask);
	Res = ShuffleBuilder.finalize(E->getCommonMask());
	return Res;
	}
	if (!Resized) {
	if (GatheredScalars.size() != VF &&
	any_of(Entries, [&](ArrayRef<const TreeEntry *> TEs) {
	return any_of(TEs, [&](const TreeEntry *TE) {
	return TE->getVectorFactor() == VF;
	});
	}))
	GatheredScalars.append(VF - GatheredScalars.size(),
	PoisonValue::get(OrigScalarTy));
	}
	// Remove shuffled elements from list of gathers.
	for (int I = 0, Sz = Mask.size(); I < Sz; ++I) {
	if (Mask[I] != PoisonMaskElem)
	GatheredScalars[I] = PoisonValue::get(OrigScalarTy);
	}
	}
	}
	auto TryPackScalars = [&](SmallVectorImpl<Value *> &Scalars,
	SmallVectorImpl<int> &ReuseMask,
	bool IsRootPoison) {
	// For splats with can emit broadcasts instead of gathers, so try to find
	// such sequences.
	bool IsSplat = IsRootPoison && isSplat(Scalars) &&
	(Scalars.size() > 2 \|\| Scalars.front() == Scalars.back());
	Scalars.append(VF - Scalars.size(), PoisonValue::get(OrigScalarTy));
	SmallVector<int> UndefPos;
	DenseMap<Value *, unsigned> UniquePositions;
	// Gather unique non-const values and all constant values.
	// For repeated values, just shuffle them.
	int NumNonConsts = 0;
	int SinglePos = 0;
	for (auto [I, V] : enumerate(Scalars)) {
	if (isa<UndefValue>(V)) {
	if (!isa<PoisonValue>(V)) {
	ReuseMask[I] = I;
	UndefPos.push_back(I);
	}
	continue;
	}
	if (isConstant(V)) {
	ReuseMask[I] = I;
	continue;
	}
	++NumNonConsts;
	SinglePos = I;
	Value *OrigV = V;
	Scalars[I] = PoisonValue::get(OrigScalarTy);
	if (IsSplat) {
	Scalars.front() = OrigV;
	ReuseMask[I] = 0;
	} else {
	const auto Res = UniquePositions.try_emplace(OrigV, I);
	Scalars[Res.first->second] = OrigV;
	ReuseMask[I] = Res.first->second;
	}
	}
	if (NumNonConsts == 1) {
	// Restore single insert element.
	if (IsSplat) {
	ReuseMask.assign(VF, PoisonMaskElem);
	std::swap(Scalars.front(), Scalars[SinglePos]);
	if (!UndefPos.empty() && UndefPos.front() == 0)
	Scalars.front() = UndefValue::get(OrigScalarTy);
	}
	ReuseMask[SinglePos] = SinglePos;
	} else if (!UndefPos.empty() && IsSplat) {
	// For undef values, try to replace them with the simple broadcast.
	// We can do it if the broadcasted value is guaranteed to be
	// non-poisonous, or by freezing the incoming scalar value first.
	auto It = find_if(Scalars, [this, E](Value V) {
	return !isa<UndefValue>(V) &&
	(getTreeEntry(V) \|\| isGuaranteedNotToBePoison(V) \|\|
	(E->UserTreeIndices.size() == 1 &&
	any_of(V->uses(), [E](const Use &U) {
	// Check if the value already used in the same operation in
	// one of the nodes already.
	return E->UserTreeIndices.front().EdgeIdx !=
	U.getOperandNo() &&
	is_contained(
	E->UserTreeIndices.front().UserTE->Scalars,
	U.getUser());
	})));
	});
	if (It != Scalars.end()) {
	// Replace undefs by the non-poisoned scalars and emit broadcast.
	int Pos = std::distance(Scalars.begin(), It);
	for (int I : UndefPos) {
	// Set the undef position to the non-poisoned scalar.
	ReuseMask[I] = Pos;
	// Replace the undef by the poison, in the mask it is replaced by
	// non-poisoned scalar already.
	if (I != Pos)
	Scalars[I] = PoisonValue::get(OrigScalarTy);
	}
	} else {
	// Replace undefs by the poisons, emit broadcast and then emit
	// freeze.
	for (int I : UndefPos) {
	ReuseMask[I] = PoisonMaskElem;
	if (isa<UndefValue>(Scalars[I]))
	Scalars[I] = PoisonValue::get(OrigScalarTy);
	}
	NeedFreeze = true;
	}
	}
	};
	if (!ExtractShuffles.empty() \|\| !GatherShuffles.empty()) {
	bool IsNonPoisoned = true;
	bool IsUsedInExpr = true;
	Value *Vec1 = nullptr;
	if (!ExtractShuffles.empty()) {
	// Gather of extractelements can be represented as just a shuffle of
	// a single/two vectors the scalars are extracted from.
	// Find input vectors.
	Value *Vec2 = nullptr;
	for (unsigned I = 0, Sz = ExtractMask.size(); I < Sz; ++I) {
	if (!Mask.empty() && Mask[I] != PoisonMaskElem)
	ExtractMask[I] = PoisonMaskElem;
	}
	if (UseVecBaseAsInput) {
	Vec1 = ExtractVecBase;
	} else {
	for (unsigned I = 0, Sz = ExtractMask.size(); I < Sz; ++I) {
	if (ExtractMask[I] == PoisonMaskElem)
	continue;
	if (isa<UndefValue>(E->Scalars[I]))
	continue;
	auto *EI = cast<ExtractElementInst>(E->Scalars[I]);
	Value *VecOp = EI->getVectorOperand();
	if (const auto *TE = getTreeEntry(VecOp))
	if (TE->VectorizedValue)
	VecOp = TE->VectorizedValue;
	if (!Vec1) {
	Vec1 = VecOp;
	} else if (Vec1 != VecOp) {
	assert((!Vec2 \|\| Vec2 == VecOp) &&
	"Expected only 1 or 2 vectors shuffle.");
	Vec2 = VecOp;
	}
	}
	}
	if (Vec2) {
	IsUsedInExpr = false;
	IsNonPoisoned &=
	isGuaranteedNotToBePoison(Vec1) && isGuaranteedNotToBePoison(Vec2);
	ShuffleBuilder.add(Vec1, Vec2, ExtractMask);
	} else if (Vec1) {
	IsUsedInExpr &= FindReusedSplat(
	ExtractMask,
	cast<FixedVectorType>(Vec1->getType())->getNumElements(), 0,
	ExtractMask.size());
	ShuffleBuilder.add(Vec1, ExtractMask, /ForExtracts=/true);
	IsNonPoisoned &= isGuaranteedNotToBePoison(Vec1);
	} else {
	IsUsedInExpr = false;
	ShuffleBuilder.add(PoisonValue::get(VecTy), ExtractMask,
	/ForExtracts=/true);
	}
	}
	if (!GatherShuffles.empty()) {
	unsigned SliceSize = getPartNumElems(E->Scalars.size(), NumParts);
	SmallVector<int> VecMask(Mask.size(), PoisonMaskElem);
	for (const auto [I, TEs] : enumerate(Entries)) {
	if (TEs.empty()) {
	assert(!GatherShuffles[I] &&
	"No shuffles with empty entries list expected.");
	continue;
	}
	assert((TEs.size() == 1 \|\| TEs.size() == 2) &&
	"Expected shuffle of 1 or 2 entries.");
	unsigned Limit = getNumElems(Mask.size(), SliceSize, I);
	auto SubMask = ArrayRef(Mask).slice(I * SliceSize, Limit);
	VecMask.assign(VecMask.size(), PoisonMaskElem);
	copy(SubMask, std::next(VecMask.begin(), I * SliceSize));
	if (TEs.size() == 1) {
	IsUsedInExpr &= FindReusedSplat(
	VecMask, TEs.front()->getVectorFactor(), I, SliceSize);
	ShuffleBuilder.add(*TEs.front(), VecMask);
	if (TEs.front()->VectorizedValue)
	IsNonPoisoned &=
	isGuaranteedNotToBePoison(TEs.front()->VectorizedValue);
	} else {
	IsUsedInExpr = false;
	ShuffleBuilder.add(TEs.front(), TEs.back(), VecMask);
	if (TEs.front()->VectorizedValue && TEs.back()->VectorizedValue)
	IsNonPoisoned &=
	isGuaranteedNotToBePoison(TEs.front()->VectorizedValue) &&
	isGuaranteedNotToBePoison(TEs.back()->VectorizedValue);
	}
	}
	}
	// Try to figure out best way to combine values: build a shuffle and insert
	// elements or just build several shuffles.
	// Insert non-constant scalars.
	SmallVector<Value *> NonConstants(GatheredScalars);
	int EMSz = ExtractMask.size();
	int MSz = Mask.size();
	// Try to build constant vector and shuffle with it only if currently we
	// have a single permutation and more than 1 scalar constants.
	bool IsSingleShuffle = ExtractShuffles.empty() \|\| GatherShuffles.empty();
	bool IsIdentityShuffle =
	((UseVecBaseAsInput \|\|
	all_of(ExtractShuffles,
	[](const std::optional<TTI::ShuffleKind> &SK) {
	return SK.value_or(TTI::SK_PermuteTwoSrc) ==
	TTI::SK_PermuteSingleSrc;
	})) &&
	none_of(ExtractMask, [&](int I) { return I >= EMSz; }) &&
	ShuffleVectorInst::isIdentityMask(ExtractMask, EMSz)) \|\|
	(!GatherShuffles.empty() &&
	all_of(GatherShuffles,
	[](const std::optional<TTI::ShuffleKind> &SK) {
	return SK.value_or(TTI::SK_PermuteTwoSrc) ==
	TTI::SK_PermuteSingleSrc;
	}) &&
	none_of(Mask, [&](int I) { return I >= MSz; }) &&
	ShuffleVectorInst::isIdentityMask(Mask, MSz));
	bool EnoughConstsForShuffle =
	IsSingleShuffle &&
	(none_of(GatheredScalars,
	[](Value *V) {
	return isa<UndefValue>(V) && !isa<PoisonValue>(V);
	}) \|\|
	any_of(GatheredScalars,
	[](Value *V) {
	return isa<Constant>(V) && !isa<UndefValue>(V);
	})) &&
	(!IsIdentityShuffle \|\|
	(GatheredScalars.size() == 2 &&
	any_of(GatheredScalars,
	[](Value *V) { return !isa<UndefValue>(V); })) \|\|
	count_if(GatheredScalars, [](Value *V) {
	return isa<Constant>(V) && !isa<PoisonValue>(V);
	}) > 1);
	// NonConstants array contains just non-constant values, GatheredScalars
	// contains only constant to build final vector and then shuffle.
	for (int I = 0, Sz = GatheredScalars.size(); I < Sz; ++I) {
	if (EnoughConstsForShuffle && isa<Constant>(GatheredScalars[I]))
	NonConstants[I] = PoisonValue::get(OrigScalarTy);
	else
	GatheredScalars[I] = PoisonValue::get(OrigScalarTy);
	}
	// Generate constants for final shuffle and build a mask for them.
	if (!all_of(GatheredScalars, IsaPred<PoisonValue>)) {
	SmallVector<int> BVMask(GatheredScalars.size(), PoisonMaskElem);
	TryPackScalars(GatheredScalars, BVMask, /IsRootPoison=/true);
	Value *BV = ShuffleBuilder.gather(GatheredScalars, BVMask.size());
	ShuffleBuilder.add(BV, BVMask);
	}
	if (all_of(NonConstants, [=](Value *V) {
	return isa<PoisonValue>(V) \|\|
	(IsSingleShuffle && ((IsIdentityShuffle &&
	IsNonPoisoned) \|\| IsUsedInExpr) && isa<UndefValue>(V));
	}))
	Res = ShuffleBuilder.finalize(E->ReuseShuffleIndices);
	else
	Res = ShuffleBuilder.finalize(
	E->ReuseShuffleIndices, E->Scalars.size(),
	[&](Value *&Vec, SmallVectorImpl<int> &Mask) {
	TryPackScalars(NonConstants, Mask, /IsRootPoison=/false);
	Vec = ShuffleBuilder.gather(NonConstants, Mask.size(), Vec);
	});
	} else if (!allConstant(GatheredScalars)) {
	// Gather unique scalars and all constants.
	SmallVector<int> ReuseMask(GatheredScalars.size(), PoisonMaskElem);
	TryPackScalars(GatheredScalars, ReuseMask, /IsRootPoison=/true);
	Value *BV = ShuffleBuilder.gather(GatheredScalars, ReuseMask.size());
	ShuffleBuilder.add(BV, ReuseMask);
	Res = ShuffleBuilder.finalize(E->ReuseShuffleIndices);
	} else {
	// Gather all constants.
	SmallVector<int> Mask(E->Scalars.size(), PoisonMaskElem);
	for (auto [I, V] : enumerate(E->Scalars)) {
	if (!isa<PoisonValue>(V))
	Mask[I] = I;
	}
	Value *BV = ShuffleBuilder.gather(E->Scalars);
	ShuffleBuilder.add(BV, Mask);
	Res = ShuffleBuilder.finalize(E->ReuseShuffleIndices);
	}

	if (NeedFreeze)
	Res = ShuffleBuilder.createFreeze(Res);
	return Res;
	}

	Value BoUpSLP::createBuildVector(const TreeEntry E, Type *ScalarTy) {
	return processBuildVector<ShuffleInstructionBuilder, Value *>(E, ScalarTy,
	Builder, *this);
	}

	Value BoUpSLP::vectorizeTree(TreeEntry E, bool PostponedPHIs) {
	IRBuilderBase::InsertPointGuard Guard(Builder);

	if (E->VectorizedValue &&
	(E->State != TreeEntry::Vectorize \|\| E->getOpcode() != Instruction::PHI \|\|
	E->isAltShuffle())) {
	LLVM_DEBUG(dbgs() << "SLP: Diamond merged for " << *E->Scalars[0] << ".\n");
	return E->VectorizedValue;
	}

	Value *V = E->Scalars.front();
	Type *ScalarTy = V->getType();
	if (auto *Store = dyn_cast<StoreInst>(V))
	ScalarTy = Store->getValueOperand()->getType();
	else if (auto *IE = dyn_cast<InsertElementInst>(V))
	ScalarTy = IE->getOperand(1)->getType();
	auto It = MinBWs.find(E);
	if (It != MinBWs.end())
	ScalarTy = IntegerType::get(F->getContext(), It->second.first);
	auto *VecTy = getWidenedType(ScalarTy, E->Scalars.size());
	if (E->isGather()) {
	// Set insert point for non-reduction initial nodes.
	if (E->getMainOp() && E->Idx == 0 && !UserIgnoreList)
	setInsertPointAfterBundle(E);
	Value *Vec = createBuildVector(E, ScalarTy);
	E->VectorizedValue = Vec;
	return Vec;
	}

	bool IsReverseOrder = isReverseOrder(E->ReorderIndices);
	auto FinalShuffle = [&](Value V, const TreeEntry E, VectorType *VecTy) {
	ShuffleInstructionBuilder ShuffleBuilder(ScalarTy, Builder, *this);
	if (E->getOpcode() == Instruction::Store &&
	E->State == TreeEntry::Vectorize) {
	ArrayRef<int> Mask =
	ArrayRef(reinterpret_cast<const int *>(E->ReorderIndices.begin()),
	E->ReorderIndices.size());
	ShuffleBuilder.add(V, Mask);
	} else if (E->State == TreeEntry::StridedVectorize && IsReverseOrder) {
	ShuffleBuilder.addOrdered(V, std::nullopt);
	} else {
	ShuffleBuilder.addOrdered(V, E->ReorderIndices);
	}
	return ShuffleBuilder.finalize(E->ReuseShuffleIndices);
	};

	assert((E->State == TreeEntry::Vectorize \|\|
	E->State == TreeEntry::ScatterVectorize \|\|
	E->State == TreeEntry::StridedVectorize) &&
	"Unhandled state");
	unsigned ShuffleOrOp =
	E->isAltShuffle() ? (unsigned)Instruction::ShuffleVector : E->getOpcode();
	Instruction *VL0 = E->getMainOp();
	auto GetOperandSignedness = [&](unsigned Idx) {
	const TreeEntry *OpE = getOperandEntry(E, Idx);
	bool IsSigned = false;
	auto It = MinBWs.find(OpE);
	if (It != MinBWs.end())
	IsSigned = It->second.second;
	else
	IsSigned = any_of(OpE->Scalars, [&](Value *R) {
	return !isKnownNonNegative(R, SimplifyQuery(*DL));
	});
	return IsSigned;
	};
	switch (ShuffleOrOp) {
	case Instruction::PHI: {
	assert((E->ReorderIndices.empty() \|\| !E->ReuseShuffleIndices.empty() \|\|
	E != VectorizableTree.front().get() \|\|
	!E->UserTreeIndices.empty()) &&
	"PHI reordering is free.");
	if (PostponedPHIs && E->VectorizedValue)
	return E->VectorizedValue;
	auto *PH = cast<PHINode>(VL0);
	Builder.SetInsertPoint(PH->getParent(),
	PH->getParent()->getFirstNonPHIIt());
	Builder.SetCurrentDebugLocation(PH->getDebugLoc());
	if (PostponedPHIs \|\| !E->VectorizedValue) {
	PHINode *NewPhi = Builder.CreatePHI(VecTy, PH->getNumIncomingValues());
	E->PHI = NewPhi;
	Value *V = NewPhi;

	// Adjust insertion point once all PHI's have been generated.
	Builder.SetInsertPoint(PH->getParent(),
	PH->getParent()->getFirstInsertionPt());
	Builder.SetCurrentDebugLocation(PH->getDebugLoc());

	V = FinalShuffle(V, E, VecTy);

	E->VectorizedValue = V;
	if (PostponedPHIs)
	return V;
	}
	PHINode *NewPhi = cast<PHINode>(E->PHI);
	// If phi node is fully emitted - exit.
	if (NewPhi->getNumIncomingValues() != 0)
	return NewPhi;

	// PHINodes may have multiple entries from the same block. We want to
	// visit every block once.
	SmallPtrSet<BasicBlock *, 4> VisitedBBs;

	for (unsigned I : seq<unsigned>(0, PH->getNumIncomingValues())) {
	ValueList Operands;
	BasicBlock *IBB = PH->getIncomingBlock(I);

	// Stop emission if all incoming values are generated.
	if (NewPhi->getNumIncomingValues() == PH->getNumIncomingValues()) {
	LLVM_DEBUG(dbgs() << "SLP: Diamond merged for " << *VL0 << ".\n");
	return NewPhi;
	}

	if (!VisitedBBs.insert(IBB).second) {
	NewPhi->addIncoming(NewPhi->getIncomingValueForBlock(IBB), IBB);
	continue;
	}

	Builder.SetInsertPoint(IBB->getTerminator());
	Builder.SetCurrentDebugLocation(PH->getDebugLoc());
	Value Vec = vectorizeOperand(E, I, /PostponedPHIs=*/true);
	if (VecTy != Vec->getType()) {
	assert((It != MinBWs.end() \|\| getOperandEntry(E, I)->isGather() \|\|
	MinBWs.contains(getOperandEntry(E, I))) &&
	"Expected item in MinBWs.");
	Vec = Builder.CreateIntCast(Vec, VecTy, GetOperandSignedness(I));
	}
	NewPhi->addIncoming(Vec, IBB);
	}

	assert(NewPhi->getNumIncomingValues() == PH->getNumIncomingValues() &&
	"Invalid number of incoming values");
	return NewPhi;
	}

	case Instruction::ExtractElement: {
	Value *V = E->getSingleOperand(0);
	if (const TreeEntry *TE = getTreeEntry(V))
	V = TE->VectorizedValue;
	setInsertPointAfterBundle(E);
	V = FinalShuffle(V, E, VecTy);
	E->VectorizedValue = V;
	return V;
	}
	case Instruction::ExtractValue: {
	auto *LI = cast<LoadInst>(E->getSingleOperand(0));
	Builder.SetInsertPoint(LI);
	Value *Ptr = LI->getPointerOperand();
	LoadInst *V = Builder.CreateAlignedLoad(VecTy, Ptr, LI->getAlign());
	Value *NewV = propagateMetadata(V, E->Scalars);
	NewV = FinalShuffle(NewV, E, VecTy);
	E->VectorizedValue = NewV;
	return NewV;
	}
	case Instruction::InsertElement: {
	assert(E->ReuseShuffleIndices.empty() && "All inserts should be unique");
	Builder.SetInsertPoint(cast<Instruction>(E->Scalars.back()));
	Value *V = vectorizeOperand(E, 1, PostponedPHIs);
	ArrayRef<Value *> Op = E->getOperand(1);
	Type *ScalarTy = Op.front()->getType();
	if (cast<VectorType>(V->getType())->getElementType() != ScalarTy) {
	assert(ScalarTy->isIntegerTy() && "Expected item in MinBWs.");
	std::pair<unsigned, bool> Res = MinBWs.lookup(getOperandEntry(E, 1));
	assert(Res.first > 0 && "Expected item in MinBWs.");
	V = Builder.CreateIntCast(
	V,
	getWidenedType(
	ScalarTy,
	cast<FixedVectorType>(V->getType())->getNumElements()),
	Res.second);
	}

	// Create InsertVector shuffle if necessary
	auto FirstInsert = cast<Instruction>(find_if(E->Scalars, [E](Value *V) {
	return !is_contained(E->Scalars, cast<Instruction>(V)->getOperand(0));
	}));
	const unsigned NumElts =
	cast<FixedVectorType>(FirstInsert->getType())->getNumElements();
	const unsigned NumScalars = E->Scalars.size();

	unsigned Offset = *getElementIndex(VL0);
	assert(Offset < NumElts && "Failed to find vector index offset");

	// Create shuffle to resize vector
	SmallVector<int> Mask;
	if (!E->ReorderIndices.empty()) {
	inversePermutation(E->ReorderIndices, Mask);
	Mask.append(NumElts - NumScalars, PoisonMaskElem);
	} else {
	Mask.assign(NumElts, PoisonMaskElem);
	std::iota(Mask.begin(), std::next(Mask.begin(), NumScalars), 0);
	}
	// Create InsertVector shuffle if necessary
	bool IsIdentity = true;
	SmallVector<int> PrevMask(NumElts, PoisonMaskElem);
	Mask.swap(PrevMask);
	for (unsigned I = 0; I < NumScalars; ++I) {
	Value *Scalar = E->Scalars[PrevMask[I]];
	unsigned InsertIdx = *getElementIndex(Scalar);
	IsIdentity &= InsertIdx - Offset == I;
	Mask[InsertIdx - Offset] = I;
	}
	if (!IsIdentity \|\| NumElts != NumScalars) {
	Value *V2 = nullptr;
	bool IsVNonPoisonous = isGuaranteedNotToBePoison(V) && !isConstant(V);
	SmallVector<int> InsertMask(Mask);
	if (NumElts != NumScalars && Offset == 0) {
	// Follow all insert element instructions from the current buildvector
	// sequence.
	InsertElementInst *Ins = cast<InsertElementInst>(VL0);
	do {
	std::optional<unsigned> InsertIdx = getElementIndex(Ins);
	if (!InsertIdx)
	break;
	if (InsertMask[*InsertIdx] == PoisonMaskElem)
	InsertMask[InsertIdx] = InsertIdx;
	if (!Ins->hasOneUse())
	break;
	Ins = dyn_cast_or_null<InsertElementInst>(
	Ins->getUniqueUndroppableUser());
	} while (Ins);
	SmallBitVector UseMask =
	buildUseMask(NumElts, InsertMask, UseMask::UndefsAsMask);
	SmallBitVector IsFirstPoison =
	isUndefVector<true>(FirstInsert->getOperand(0), UseMask);
	SmallBitVector IsFirstUndef =
	isUndefVector(FirstInsert->getOperand(0), UseMask);
	if (!IsFirstPoison.all()) {
	unsigned Idx = 0;
	for (unsigned I = 0; I < NumElts; I++) {
	if (InsertMask[I] == PoisonMaskElem && !IsFirstPoison.test(I) &&
	IsFirstUndef.test(I)) {
	if (IsVNonPoisonous) {
	InsertMask[I] = I < NumScalars ? I : 0;
	continue;
	}
	if (!V2)
	V2 = UndefValue::get(V->getType());
	if (Idx >= NumScalars)
	Idx = NumScalars - 1;
	InsertMask[I] = NumScalars + Idx;
	++Idx;
	} else if (InsertMask[I] != PoisonMaskElem &&
	Mask[I] == PoisonMaskElem) {
	InsertMask[I] = PoisonMaskElem;
	}
	}
	} else {
	InsertMask = Mask;
	}
	}
	if (!V2)
	V2 = PoisonValue::get(V->getType());
	V = Builder.CreateShuffleVector(V, V2, InsertMask);
	if (auto *I = dyn_cast<Instruction>(V)) {
	GatherShuffleExtractSeq.insert(I);
	CSEBlocks.insert(I->getParent());
	}
	}

	SmallVector<int> InsertMask(NumElts, PoisonMaskElem);
	for (unsigned I = 0; I < NumElts; I++) {
	if (Mask[I] != PoisonMaskElem)
	InsertMask[Offset + I] = I;
	}
	SmallBitVector UseMask =
	buildUseMask(NumElts, InsertMask, UseMask::UndefsAsMask);
	SmallBitVector IsFirstUndef =
	isUndefVector(FirstInsert->getOperand(0), UseMask);
	if ((!IsIdentity \|\| Offset != 0 \|\| !IsFirstUndef.all()) &&
	NumElts != NumScalars) {
	if (IsFirstUndef.all()) {
	if (!ShuffleVectorInst::isIdentityMask(InsertMask, NumElts)) {
	SmallBitVector IsFirstPoison =
	isUndefVector<true>(FirstInsert->getOperand(0), UseMask);
	if (!IsFirstPoison.all()) {
	for (unsigned I = 0; I < NumElts; I++) {
	if (InsertMask[I] == PoisonMaskElem && !IsFirstPoison.test(I))
	InsertMask[I] = I + NumElts;
	}
	}
	V = Builder.CreateShuffleVector(
	V,
	IsFirstPoison.all() ? PoisonValue::get(V->getType())
	: FirstInsert->getOperand(0),
	InsertMask, cast<Instruction>(E->Scalars.back())->getName());
	if (auto *I = dyn_cast<Instruction>(V)) {
	GatherShuffleExtractSeq.insert(I);
	CSEBlocks.insert(I->getParent());
	}
	}
	} else {
	SmallBitVector IsFirstPoison =
	isUndefVector<true>(FirstInsert->getOperand(0), UseMask);
	for (unsigned I = 0; I < NumElts; I++) {
	if (InsertMask[I] == PoisonMaskElem)
	InsertMask[I] = IsFirstPoison.test(I) ? PoisonMaskElem : I;
	else
	InsertMask[I] += NumElts;
	}
	V = Builder.CreateShuffleVector(
	FirstInsert->getOperand(0), V, InsertMask,
	cast<Instruction>(E->Scalars.back())->getName());
	if (auto *I = dyn_cast<Instruction>(V)) {
	GatherShuffleExtractSeq.insert(I);
	CSEBlocks.insert(I->getParent());
	}
	}
	}

	++NumVectorInstructions;
	E->VectorizedValue = V;
	return V;
	}
	case Instruction::ZExt:
	case Instruction::SExt:
	case Instruction::FPToUI:
	case Instruction::FPToSI:
	case Instruction::FPExt:
	case Instruction::PtrToInt:
	case Instruction::IntToPtr:
	case Instruction::SIToFP:
	case Instruction::UIToFP:
	case Instruction::Trunc:
	case Instruction::FPTrunc:
	case Instruction::BitCast: {
	setInsertPointAfterBundle(E);

	Value *InVec = vectorizeOperand(E, 0, PostponedPHIs);
	if (E->VectorizedValue) {
	LLVM_DEBUG(dbgs() << "SLP: Diamond merged for " << *VL0 << ".\n");
	return E->VectorizedValue;
	}

	auto *CI = cast<CastInst>(VL0);
	Instruction::CastOps VecOpcode = CI->getOpcode();
	Type *SrcScalarTy = cast<VectorType>(InVec->getType())->getElementType();
	auto SrcIt = MinBWs.find(getOperandEntry(E, 0));
	if (!ScalarTy->isFloatingPointTy() && !SrcScalarTy->isFloatingPointTy() &&
	(SrcIt != MinBWs.end() \|\| It != MinBWs.end() \|\|
	SrcScalarTy != CI->getOperand(0)->getType())) {
	// Check if the values are candidates to demote.
	unsigned SrcBWSz = DL->getTypeSizeInBits(SrcScalarTy);
	if (SrcIt != MinBWs.end())
	SrcBWSz = SrcIt->second.first;
	unsigned BWSz = DL->getTypeSizeInBits(ScalarTy);
	if (BWSz == SrcBWSz) {
	VecOpcode = Instruction::BitCast;
	} else if (BWSz < SrcBWSz) {
	VecOpcode = Instruction::Trunc;
	} else if (It != MinBWs.end()) {
	assert(BWSz > SrcBWSz && "Invalid cast!");
	VecOpcode = It->second.second ? Instruction::SExt : Instruction::ZExt;
	} else if (SrcIt != MinBWs.end()) {
	assert(BWSz > SrcBWSz && "Invalid cast!");
	VecOpcode =
	SrcIt->second.second ? Instruction::SExt : Instruction::ZExt;
	}
	} else if (VecOpcode == Instruction::SIToFP && SrcIt != MinBWs.end() &&
	!SrcIt->second.second) {
	VecOpcode = Instruction::UIToFP;
	}
	Value *V = (VecOpcode != ShuffleOrOp && VecOpcode == Instruction::BitCast)
	? InVec
	: Builder.CreateCast(VecOpcode, InVec, VecTy);
	V = FinalShuffle(V, E, VecTy);

	E->VectorizedValue = V;
	++NumVectorInstructions;
	return V;
	}
	case Instruction::FCmp:
	case Instruction::ICmp: {
	setInsertPointAfterBundle(E);

	Value *L = vectorizeOperand(E, 0, PostponedPHIs);
	if (E->VectorizedValue) {
	LLVM_DEBUG(dbgs() << "SLP: Diamond merged for " << *VL0 << ".\n");
	return E->VectorizedValue;
	}
	Value *R = vectorizeOperand(E, 1, PostponedPHIs);
	if (E->VectorizedValue) {
	LLVM_DEBUG(dbgs() << "SLP: Diamond merged for " << *VL0 << ".\n");
	return E->VectorizedValue;
	}
	if (L->getType() != R->getType()) {
	assert((getOperandEntry(E, 0)->isGather() \|\|
	getOperandEntry(E, 1)->isGather() \|\|
	MinBWs.contains(getOperandEntry(E, 0)) \|\|
	MinBWs.contains(getOperandEntry(E, 1))) &&
	"Expected item in MinBWs.");
	if (cast<VectorType>(L->getType())
	->getElementType()
	->getIntegerBitWidth() < cast<VectorType>(R->getType())
	->getElementType()
	->getIntegerBitWidth()) {
	Type *CastTy = R->getType();
	L = Builder.CreateIntCast(L, CastTy, GetOperandSignedness(0));
	} else {
	Type *CastTy = L->getType();
	R = Builder.CreateIntCast(R, CastTy, GetOperandSignedness(1));
	}
	}

	CmpInst::Predicate P0 = cast<CmpInst>(VL0)->getPredicate();
	Value *V = Builder.CreateCmp(P0, L, R);
	propagateIRFlags(V, E->Scalars, VL0);
	// Do not cast for cmps.
	VecTy = cast<FixedVectorType>(V->getType());
	V = FinalShuffle(V, E, VecTy);

	E->VectorizedValue = V;
	++NumVectorInstructions;
	return V;
	}
	case Instruction::Select: {
	setInsertPointAfterBundle(E);

	Value *Cond = vectorizeOperand(E, 0, PostponedPHIs);
	if (E->VectorizedValue) {
	LLVM_DEBUG(dbgs() << "SLP: Diamond merged for " << *VL0 << ".\n");
	return E->VectorizedValue;
	}
	Value *True = vectorizeOperand(E, 1, PostponedPHIs);
	if (E->VectorizedValue) {
	LLVM_DEBUG(dbgs() << "SLP: Diamond merged for " << *VL0 << ".\n");
	return E->VectorizedValue;
	}
	Value *False = vectorizeOperand(E, 2, PostponedPHIs);
	if (E->VectorizedValue) {
	LLVM_DEBUG(dbgs() << "SLP: Diamond merged for " << *VL0 << ".\n");
	return E->VectorizedValue;
	}
	if (True->getType() != VecTy \|\| False->getType() != VecTy) {
	assert((It != MinBWs.end() \|\| getOperandEntry(E, 1)->isGather() \|\|
	getOperandEntry(E, 2)->isGather() \|\|
	MinBWs.contains(getOperandEntry(E, 1)) \|\|
	MinBWs.contains(getOperandEntry(E, 2))) &&
	"Expected item in MinBWs.");
	if (True->getType() != VecTy)
	True = Builder.CreateIntCast(True, VecTy, GetOperandSignedness(1));
	if (False->getType() != VecTy)
	False = Builder.CreateIntCast(False, VecTy, GetOperandSignedness(2));
	}

	Value *V = Builder.CreateSelect(Cond, True, False);
	V = FinalShuffle(V, E, VecTy);

	E->VectorizedValue = V;
	++NumVectorInstructions;
	return V;
	}
	case Instruction::FNeg: {
	setInsertPointAfterBundle(E);

	Value *Op = vectorizeOperand(E, 0, PostponedPHIs);

	if (E->VectorizedValue) {
	LLVM_DEBUG(dbgs() << "SLP: Diamond merged for " << *VL0 << ".\n");
	return E->VectorizedValue;
	}

	Value *V = Builder.CreateUnOp(
	static_cast<Instruction::UnaryOps>(E->getOpcode()), Op);
	propagateIRFlags(V, E->Scalars, VL0);
	if (auto *I = dyn_cast<Instruction>(V))
	V = propagateMetadata(I, E->Scalars);

	V = FinalShuffle(V, E, VecTy);

	E->VectorizedValue = V;
	++NumVectorInstructions;

	return V;
	}
	case Instruction::Add:
	case Instruction::FAdd:
	case Instruction::Sub:
	case Instruction::FSub:
	case Instruction::Mul:
	case Instruction::FMul:
	case Instruction::UDiv:
	case Instruction::SDiv:
	case Instruction::FDiv:
	case Instruction::URem:
	case Instruction::SRem:
	case Instruction::FRem:
	case Instruction::Shl:
	case Instruction::LShr:
	case Instruction::AShr:
	case Instruction::And:
	case Instruction::Or:
	case Instruction::Xor: {
	setInsertPointAfterBundle(E);

	Value *LHS = vectorizeOperand(E, 0, PostponedPHIs);
	if (E->VectorizedValue) {
	LLVM_DEBUG(dbgs() << "SLP: Diamond merged for " << *VL0 << ".\n");
	return E->VectorizedValue;
	}
	Value *RHS = vectorizeOperand(E, 1, PostponedPHIs);
	if (E->VectorizedValue) {
	LLVM_DEBUG(dbgs() << "SLP: Diamond merged for " << *VL0 << ".\n");
	return E->VectorizedValue;
	}
	if (ShuffleOrOp == Instruction::And && It != MinBWs.end()) {
	for (unsigned I : seq<unsigned>(0, E->getNumOperands())) {
	ArrayRef<Value *> Ops = E->getOperand(I);
	if (all_of(Ops, [&](Value *Op) {
	auto *CI = dyn_cast<ConstantInt>(Op);
	return CI && CI->getValue().countr_one() >= It->second.first;
	})) {
	V = FinalShuffle(I == 0 ? RHS : LHS, E, VecTy);
	E->VectorizedValue = V;
	++NumVectorInstructions;
	return V;
	}
	}
	}
	if (LHS->getType() != VecTy \|\| RHS->getType() != VecTy) {
	assert((It != MinBWs.end() \|\| getOperandEntry(E, 0)->isGather() \|\|
	getOperandEntry(E, 1)->isGather() \|\|
	MinBWs.contains(getOperandEntry(E, 0)) \|\|
	MinBWs.contains(getOperandEntry(E, 1))) &&
	"Expected item in MinBWs.");
	if (LHS->getType() != VecTy)
	LHS = Builder.CreateIntCast(LHS, VecTy, GetOperandSignedness(0));
	if (RHS->getType() != VecTy)
	RHS = Builder.CreateIntCast(RHS, VecTy, GetOperandSignedness(1));
	}

	Value *V = Builder.CreateBinOp(
	static_cast<Instruction::BinaryOps>(E->getOpcode()), LHS,
	RHS);
	propagateIRFlags(V, E->Scalars, VL0, It == MinBWs.end());
	if (auto *I = dyn_cast<Instruction>(V)) {
	V = propagateMetadata(I, E->Scalars);
	// Drop nuw flags for abs(sub(commutative), true).
	if (!MinBWs.contains(E) && ShuffleOrOp == Instruction::Sub &&
	any_of(E->Scalars, [](Value *V) {
	return isCommutative(cast<Instruction>(V));
	}))
	I->setHasNoUnsignedWrap(/b=/false);
	}

	V = FinalShuffle(V, E, VecTy);

	E->VectorizedValue = V;
	++NumVectorInstructions;

	return V;
	}
	case Instruction::Load: {
	// Loads are inserted at the head of the tree because we don't want to
	// sink them all the way down past store instructions.
	setInsertPointAfterBundle(E);

	LoadInst *LI = cast<LoadInst>(VL0);
	Instruction *NewLI;
	Value *PO = LI->getPointerOperand();
	if (E->State == TreeEntry::Vectorize) {
	NewLI = Builder.CreateAlignedLoad(VecTy, PO, LI->getAlign());
	} else if (E->State == TreeEntry::StridedVectorize) {
	Value *Ptr0 = cast<LoadInst>(E->Scalars.front())->getPointerOperand();
	Value *PtrN = cast<LoadInst>(E->Scalars.back())->getPointerOperand();
	PO = IsReverseOrder ? PtrN : Ptr0;
	std::optional<int> Diff = getPointersDiff(
	VL0->getType(), Ptr0, VL0->getType(), PtrN, DL, SE);
	Type *StrideTy = DL->getIndexType(PO->getType());
	Value *StrideVal;
	if (Diff) {
	int Stride = *Diff / (static_cast<int>(E->Scalars.size()) - 1);
	StrideVal =
	ConstantInt::get(StrideTy, (IsReverseOrder ? -1 : 1) * Stride *
	DL->getTypeAllocSize(ScalarTy));
	} else {
	SmallVector<Value *> PointerOps(E->Scalars.size(), nullptr);
	transform(E->Scalars, PointerOps.begin(), [](Value *V) {
	return cast<LoadInst>(V)->getPointerOperand();
	});
	OrdersType Order;
	std::optional<Value *> Stride =
	calculateRtStride(PointerOps, ScalarTy, DL, SE, Order,
	&*Builder.GetInsertPoint());
	Value *NewStride =
	Builder.CreateIntCast(Stride, StrideTy, /isSigned=*/true);
	StrideVal = Builder.CreateMul(
	NewStride,
	ConstantInt::get(
	StrideTy,
	(IsReverseOrder ? -1 : 1) *
	static_cast<int>(DL->getTypeAllocSize(ScalarTy))));
	}
	Align CommonAlignment = computeCommonAlignment<LoadInst>(E->Scalars);
	auto *Inst = Builder.CreateIntrinsic(
	Intrinsic::experimental_vp_strided_load,
	{VecTy, PO->getType(), StrideTy},
	{PO, StrideVal, Builder.getAllOnesMask(VecTy->getElementCount()),
	Builder.getInt32(E->Scalars.size())});
	Inst->addParamAttr(
	/ArgNo=/0,
	Attribute::getWithAlignment(Inst->getContext(), CommonAlignment));
	NewLI = Inst;
	} else {
	assert(E->State == TreeEntry::ScatterVectorize && "Unhandled state");
	Value *VecPtr = vectorizeOperand(E, 0, PostponedPHIs);
	if (E->VectorizedValue) {
	LLVM_DEBUG(dbgs() << "SLP: Diamond merged for " << *VL0 << ".\n");
	return E->VectorizedValue;
	}
	// Use the minimum alignment of the gathered loads.
	Align CommonAlignment = computeCommonAlignment<LoadInst>(E->Scalars);
	NewLI = Builder.CreateMaskedGather(VecTy, VecPtr, CommonAlignment);
	}
	Value *V = propagateMetadata(NewLI, E->Scalars);

	V = FinalShuffle(V, E, VecTy);
	E->VectorizedValue = V;
	++NumVectorInstructions;
	return V;
	}
	case Instruction::Store: {
	auto *SI = cast<StoreInst>(VL0);

	setInsertPointAfterBundle(E);

	Value *VecValue = vectorizeOperand(E, 0, PostponedPHIs);
	if (VecValue->getType() != VecTy)
	VecValue =
	Builder.CreateIntCast(VecValue, VecTy, GetOperandSignedness(0));
	VecValue = FinalShuffle(VecValue, E, VecTy);

	Value *Ptr = SI->getPointerOperand();
	Instruction *ST;
	if (E->State == TreeEntry::Vectorize) {
	ST = Builder.CreateAlignedStore(VecValue, Ptr, SI->getAlign());
	} else {
	assert(E->State == TreeEntry::StridedVectorize &&
	"Expected either strided or conseutive stores.");
	if (!E->ReorderIndices.empty()) {
	SI = cast<StoreInst>(E->Scalars[E->ReorderIndices.front()]);
	Ptr = SI->getPointerOperand();
	}
	Align CommonAlignment = computeCommonAlignment<StoreInst>(E->Scalars);
	Type *StrideTy = DL->getIndexType(SI->getPointerOperandType());
	auto *Inst = Builder.CreateIntrinsic(
	Intrinsic::experimental_vp_strided_store,
	{VecTy, Ptr->getType(), StrideTy},
	{VecValue, Ptr,
	ConstantInt::get(
	StrideTy, -static_cast<int>(DL->getTypeAllocSize(ScalarTy))),
	Builder.getAllOnesMask(VecTy->getElementCount()),
	Builder.getInt32(E->Scalars.size())});
	Inst->addParamAttr(
	/ArgNo=/1,
	Attribute::getWithAlignment(Inst->getContext(), CommonAlignment));
	ST = Inst;
	}

	Value *V = propagateMetadata(ST, E->Scalars);

	E->VectorizedValue = V;
	++NumVectorInstructions;
	return V;
	}
	case Instruction::GetElementPtr: {
	auto *GEP0 = cast<GetElementPtrInst>(VL0);
	setInsertPointAfterBundle(E);

	Value *Op0 = vectorizeOperand(E, 0, PostponedPHIs);
	if (E->VectorizedValue) {
	LLVM_DEBUG(dbgs() << "SLP: Diamond merged for " << *VL0 << ".\n");
	return E->VectorizedValue;
	}

	SmallVector<Value *> OpVecs;
	for (int J = 1, N = GEP0->getNumOperands(); J < N; ++J) {
	Value *OpVec = vectorizeOperand(E, J, PostponedPHIs);
	if (E->VectorizedValue) {
	LLVM_DEBUG(dbgs() << "SLP: Diamond merged for " << *VL0 << ".\n");
	return E->VectorizedValue;
	}
	OpVecs.push_back(OpVec);
	}

	Value *V = Builder.CreateGEP(GEP0->getSourceElementType(), Op0, OpVecs);
	if (Instruction *I = dyn_cast<GetElementPtrInst>(V)) {
	SmallVector<Value *> GEPs;
	for (Value *V : E->Scalars) {
	if (isa<GetElementPtrInst>(V))
	GEPs.push_back(V);
	}
	V = propagateMetadata(I, GEPs);
	}

	V = FinalShuffle(V, E, VecTy);

	E->VectorizedValue = V;
	++NumVectorInstructions;

	return V;
	}
	case Instruction::Call: {
	CallInst *CI = cast<CallInst>(VL0);
	setInsertPointAfterBundle(E);

	Intrinsic::ID ID = getVectorIntrinsicIDForCall(CI, TLI);

	SmallVector<Type *> ArgTys =
	buildIntrinsicArgTypes(CI, ID, VecTy->getNumElements(),
	It != MinBWs.end() ? It->second.first : 0);
	auto VecCallCosts = getVectorCallCosts(CI, VecTy, TTI, TLI, ArgTys);
	bool UseIntrinsic = ID != Intrinsic::not_intrinsic &&
	VecCallCosts.first <= VecCallCosts.second;

	Value *ScalarArg = nullptr;
	SmallVector<Value *> OpVecs;
	SmallVector<Type *, 2> TysForDecl;
	// Add return type if intrinsic is overloaded on it.
	if (UseIntrinsic && isVectorIntrinsicWithOverloadTypeAtArg(ID, -1))
	TysForDecl.push_back(VecTy);
	auto *CEI = cast<CallInst>(VL0);
	for (unsigned I : seq<unsigned>(0, CI->arg_size())) {
	ValueList OpVL;
	// Some intrinsics have scalar arguments. This argument should not be
	// vectorized.
	if (UseIntrinsic && isVectorIntrinsicWithScalarOpAtArg(ID, I)) {
	ScalarArg = CEI->getArgOperand(I);
	// if decided to reduce bitwidth of abs intrinsic, it second argument
	// must be set false (do not return poison, if value issigned min).
	if (ID == Intrinsic::abs && It != MinBWs.end() &&
	It->second.first < DL->getTypeSizeInBits(CEI->getType()))
	ScalarArg = Builder.getFalse();
	OpVecs.push_back(ScalarArg);
	if (isVectorIntrinsicWithOverloadTypeAtArg(ID, I))
	TysForDecl.push_back(ScalarArg->getType());
	continue;
	}

	Value *OpVec = vectorizeOperand(E, I, PostponedPHIs);
	if (E->VectorizedValue) {
	LLVM_DEBUG(dbgs() << "SLP: Diamond merged for " << *VL0 << ".\n");
	return E->VectorizedValue;
	}
	ScalarArg = CEI->getArgOperand(I);
	if (cast<VectorType>(OpVec->getType())->getElementType() !=
	ScalarArg->getType()->getScalarType() &&
	It == MinBWs.end()) {
	auto *CastTy =
	getWidenedType(ScalarArg->getType(), VecTy->getNumElements());
	OpVec = Builder.CreateIntCast(OpVec, CastTy, GetOperandSignedness(I));
	} else if (It != MinBWs.end()) {
	OpVec = Builder.CreateIntCast(OpVec, VecTy, GetOperandSignedness(I));
	}
	LLVM_DEBUG(dbgs() << "SLP: OpVec[" << I << "]: " << *OpVec << "\n");
	OpVecs.push_back(OpVec);
	if (UseIntrinsic && isVectorIntrinsicWithOverloadTypeAtArg(ID, I))
	TysForDecl.push_back(OpVec->getType());
	}

	Function *CF;
	if (!UseIntrinsic) {
	VFShape Shape =
	VFShape::get(CI->getFunctionType(),
	ElementCount::getFixed(
	static_cast<unsigned>(VecTy->getNumElements())),
	false /HasGlobalPred/);
	CF = VFDatabase(*CI).getVectorizedFunction(Shape);
	} else {
	CF = Intrinsic::getDeclaration(F->getParent(), ID, TysForDecl);
	}

	SmallVector<OperandBundleDef, 1> OpBundles;
	CI->getOperandBundlesAsDefs(OpBundles);
	Value *V = Builder.CreateCall(CF, OpVecs, OpBundles);

	propagateIRFlags(V, E->Scalars, VL0);
	V = FinalShuffle(V, E, VecTy);

	E->VectorizedValue = V;
	++NumVectorInstructions;
	return V;
	}
	case Instruction::ShuffleVector: {
	assert(E->isAltShuffle() &&
	((Instruction::isBinaryOp(E->getOpcode()) &&
	Instruction::isBinaryOp(E->getAltOpcode())) \|\|
	(Instruction::isCast(E->getOpcode()) &&
	Instruction::isCast(E->getAltOpcode())) \|\|
	(isa<CmpInst>(VL0) && isa<CmpInst>(E->getAltOp()))) &&
	"Invalid Shuffle Vector Operand");

	Value LHS = nullptr, RHS = nullptr;
	if (Instruction::isBinaryOp(E->getOpcode()) \|\| isa<CmpInst>(VL0)) {
	setInsertPointAfterBundle(E);
	LHS = vectorizeOperand(E, 0, PostponedPHIs);
	if (E->VectorizedValue) {
	LLVM_DEBUG(dbgs() << "SLP: Diamond merged for " << *VL0 << ".\n");
	return E->VectorizedValue;
	}
	RHS = vectorizeOperand(E, 1, PostponedPHIs);
	} else {
	setInsertPointAfterBundle(E);
	LHS = vectorizeOperand(E, 0, PostponedPHIs);
	}
	if (E->VectorizedValue) {
	LLVM_DEBUG(dbgs() << "SLP: Diamond merged for " << *VL0 << ".\n");
	return E->VectorizedValue;
	}
	if (LHS && RHS &&
	((Instruction::isBinaryOp(E->getOpcode()) &&
	(LHS->getType() != VecTy \|\| RHS->getType() != VecTy)) \|\|
	(isa<CmpInst>(VL0) && LHS->getType() != RHS->getType()))) {
	assert((It != MinBWs.end() \|\| getOperandEntry(E, 0)->isGather() \|\|
	getOperandEntry(E, 1)->isGather() \|\|
	MinBWs.contains(getOperandEntry(E, 0)) \|\|
	MinBWs.contains(getOperandEntry(E, 1))) &&
	"Expected item in MinBWs.");
	Type *CastTy = VecTy;
	if (isa<CmpInst>(VL0) && LHS->getType() != RHS->getType()) {
	if (cast<VectorType>(LHS->getType())
	->getElementType()
	->getIntegerBitWidth() < cast<VectorType>(RHS->getType())
	->getElementType()
	->getIntegerBitWidth())
	CastTy = RHS->getType();
	else
	CastTy = LHS->getType();
	}
	if (LHS->getType() != CastTy)
	LHS = Builder.CreateIntCast(LHS, CastTy, GetOperandSignedness(0));
	if (RHS->getType() != CastTy)
	RHS = Builder.CreateIntCast(RHS, CastTy, GetOperandSignedness(1));
	}

	Value V0, V1;
	if (Instruction::isBinaryOp(E->getOpcode())) {
	V0 = Builder.CreateBinOp(
	static_cast<Instruction::BinaryOps>(E->getOpcode()), LHS, RHS);
	V1 = Builder.CreateBinOp(
	static_cast<Instruction::BinaryOps>(E->getAltOpcode()), LHS, RHS);
	} else if (auto *CI0 = dyn_cast<CmpInst>(VL0)) {
	V0 = Builder.CreateCmp(CI0->getPredicate(), LHS, RHS);
	auto *AltCI = cast<CmpInst>(E->getAltOp());
	CmpInst::Predicate AltPred = AltCI->getPredicate();
	V1 = Builder.CreateCmp(AltPred, LHS, RHS);
	} else {
	if (LHS->getType()->isIntOrIntVectorTy() && ScalarTy->isIntegerTy()) {
	unsigned SrcBWSz = DL->getTypeSizeInBits(
	cast<VectorType>(LHS->getType())->getElementType());
	unsigned BWSz = DL->getTypeSizeInBits(ScalarTy);
	if (BWSz <= SrcBWSz) {
	if (BWSz < SrcBWSz)
	LHS = Builder.CreateIntCast(LHS, VecTy, It->second.first);
	assert(LHS->getType() == VecTy && "Expected same type as operand.");
	if (auto *I = dyn_cast<Instruction>(LHS))
	LHS = propagateMetadata(I, E->Scalars);
	E->VectorizedValue = LHS;
	++NumVectorInstructions;
	return LHS;
	}
	}
	V0 = Builder.CreateCast(
	static_cast<Instruction::CastOps>(E->getOpcode()), LHS, VecTy);
	V1 = Builder.CreateCast(
	static_cast<Instruction::CastOps>(E->getAltOpcode()), LHS, VecTy);
	}
	// Add V0 and V1 to later analysis to try to find and remove matching
	// instruction, if any.
	for (Value *V : {V0, V1}) {
	if (auto *I = dyn_cast<Instruction>(V)) {
	GatherShuffleExtractSeq.insert(I);
	CSEBlocks.insert(I->getParent());
	}
	}

	// Create shuffle to take alternate operations from the vector.
	// Also, gather up main and alt scalar ops to propagate IR flags to
	// each vector operation.
	ValueList OpScalars, AltScalars;
	SmallVector<int> Mask;
	E->buildAltOpShuffleMask(
	[E, this](Instruction *I) {
	assert(E->isOpcodeOrAlt(I) && "Unexpected main/alternate opcode");
	return isAlternateInstruction(I, E->getMainOp(), E->getAltOp(),
	*TLI);
	},
	Mask, &OpScalars, &AltScalars);

	propagateIRFlags(V0, OpScalars, E->getMainOp(), It == MinBWs.end());
	propagateIRFlags(V1, AltScalars, E->getAltOp(), It == MinBWs.end());
	auto DropNuwFlag = [&](Value *Vec, unsigned Opcode) {
	// Drop nuw flags for abs(sub(commutative), true).
	if (auto *I = dyn_cast<Instruction>(Vec);
	I && Opcode == Instruction::Sub && !MinBWs.contains(E) &&
	any_of(E->Scalars, [](Value *V) {
	auto *IV = cast<Instruction>(V);
	return IV->getOpcode() == Instruction::Sub &&
	isCommutative(cast<Instruction>(IV));
	}))
	I->setHasNoUnsignedWrap(/b=/false);
	};
	DropNuwFlag(V0, E->getOpcode());
	DropNuwFlag(V1, E->getAltOpcode());

	Value *V = Builder.CreateShuffleVector(V0, V1, Mask);
	if (auto *I = dyn_cast<Instruction>(V)) {
	V = propagateMetadata(I, E->Scalars);
	GatherShuffleExtractSeq.insert(I);
	CSEBlocks.insert(I->getParent());
	}

	E->VectorizedValue = V;
	++NumVectorInstructions;

	return V;
	}
	default:
	llvm_unreachable("unknown inst");
	}
	return nullptr;
	}

	Value *BoUpSLP::vectorizeTree() {
	ExtraValueToDebugLocsMap ExternallyUsedValues;
	SmallVector<std::pair<Value , Value >> ReplacedExternals;
	return vectorizeTree(ExternallyUsedValues, ReplacedExternals);
	}

	namespace {
	/// Data type for handling buildvector sequences with the reused scalars from
	/// other tree entries.
	struct ShuffledInsertData {
	/// List of insertelements to be replaced by shuffles.
	SmallVector<InsertElementInst *> InsertElements;
	/// The parent vectors and shuffle mask for the given list of inserts.
	MapVector<Value *, SmallVector<int>> ValueMasks;
	};
	} // namespace

	Value *BoUpSLP::vectorizeTree(
	const ExtraValueToDebugLocsMap &ExternallyUsedValues,
	SmallVectorImpl<std::pair<Value , Value >> &ReplacedExternals,
	Instruction *ReductionRoot) {
	// All blocks must be scheduled before any instructions are inserted.
	for (auto &BSIter : BlocksSchedules) {
	scheduleBlock(BSIter.second.get());
	}
	// Clean Entry-to-LastInstruction table. It can be affected after scheduling,
	// need to rebuild it.
	EntryToLastInstruction.clear();

	if (ReductionRoot)
	Builder.SetInsertPoint(ReductionRoot->getParent(),
	ReductionRoot->getIterator());
	else
	Builder.SetInsertPoint(&F->getEntryBlock(), F->getEntryBlock().begin());

	// Postpone emission of PHIs operands to avoid cyclic dependencies issues.
	(void)vectorizeTree(VectorizableTree[0].get(), /PostponedPHIs=/true);
	for (const std::unique_ptr<TreeEntry> &TE : VectorizableTree)
	if (TE->State == TreeEntry::Vectorize &&
	TE->getOpcode() == Instruction::PHI && !TE->isAltShuffle() &&
	TE->VectorizedValue)
	(void)vectorizeTree(TE.get(), /PostponedPHIs=/false);
	// Run through the list of postponed gathers and emit them, replacing the temp
	// emitted allocas with actual vector instructions.
	ArrayRef<const TreeEntry *> PostponedNodes = PostponedGathers.getArrayRef();
	DenseMap<Value , SmallVector<TreeEntry >> PostponedValues;
	for (const TreeEntry *E : PostponedNodes) {
	auto TE = const_cast<TreeEntry >(E);
	if (auto *VecTE = getTreeEntry(TE->Scalars.front()))
	if (VecTE->isSame(TE->UserTreeIndices.front().UserTE->getOperand(
	TE->UserTreeIndices.front().EdgeIdx)) &&
	VecTE->isSame(TE->Scalars))
	// Found gather node which is absolutely the same as one of the
	// vectorized nodes. It may happen after reordering.
	continue;
	auto *PrevVec = cast<Instruction>(TE->VectorizedValue);
	TE->VectorizedValue = nullptr;
	auto *UserI =
	cast<Instruction>(TE->UserTreeIndices.front().UserTE->VectorizedValue);
	// If user is a PHI node, its vector code have to be inserted right before
	// block terminator. Since the node was delayed, there were some unresolved
	// dependencies at the moment when stab instruction was emitted. In a case
	// when any of these dependencies turn out an operand of another PHI, coming
	// from this same block, position of a stab instruction will become invalid.
	// The is because source vector that supposed to feed this gather node was
	// inserted at the end of the block [after stab instruction]. So we need
	// to adjust insertion point again to the end of block.
	if (isa<PHINode>(UserI)) {
	// Insert before all users.
	Instruction *InsertPt = PrevVec->getParent()->getTerminator();
	for (User *U : PrevVec->users()) {
	if (U == UserI)
	continue;
	auto *UI = dyn_cast<Instruction>(U);
	if (!UI \|\| isa<PHINode>(UI) \|\| UI->getParent() != InsertPt->getParent())
	continue;
	if (UI->comesBefore(InsertPt))
	InsertPt = UI;
	}
	Builder.SetInsertPoint(InsertPt);
	} else {
	Builder.SetInsertPoint(PrevVec);
	}
	Builder.SetCurrentDebugLocation(UserI->getDebugLoc());
	Value Vec = vectorizeTree(TE, /PostponedPHIs=*/false);
	if (Vec->getType() != PrevVec->getType()) {
	assert(Vec->getType()->isIntOrIntVectorTy() &&
	PrevVec->getType()->isIntOrIntVectorTy() &&
	"Expected integer vector types only.");
	std::optional<bool> IsSigned;
	for (Value *V : TE->Scalars) {
	if (const TreeEntry *BaseTE = getTreeEntry(V)) {
	auto It = MinBWs.find(BaseTE);
	if (It != MinBWs.end()) {
	IsSigned = IsSigned.value_or(false) \|\| It->second.second;
	if (*IsSigned)
	break;
	}
	for (const TreeEntry *MNTE : MultiNodeScalars.lookup(V)) {
	auto It = MinBWs.find(MNTE);
	if (It != MinBWs.end()) {
	IsSigned = IsSigned.value_or(false) \|\| It->second.second;
	if (*IsSigned)
	break;
	}
	}
	if (IsSigned.value_or(false))
	break;
	// Scan through gather nodes.
	for (const TreeEntry *BVE : ValueToGatherNodes.lookup(V)) {
	auto It = MinBWs.find(BVE);
	if (It != MinBWs.end()) {
	IsSigned = IsSigned.value_or(false) \|\| It->second.second;
	if (*IsSigned)
	break;
	}
	}
	if (IsSigned.value_or(false))
	break;
	if (auto *EE = dyn_cast<ExtractElementInst>(V)) {
	IsSigned =
	IsSigned.value_or(false) \|\|
	!isKnownNonNegative(EE->getVectorOperand(), SimplifyQuery(*DL));
	continue;
	}
	if (IsSigned.value_or(false))
	break;
	}
	}
	if (IsSigned.value_or(false)) {
	// Final attempt - check user node.
	auto It = MinBWs.find(TE->UserTreeIndices.front().UserTE);
	if (It != MinBWs.end())
	IsSigned = It->second.second;
	}
	assert(IsSigned &&
	"Expected user node or perfect diamond match in MinBWs.");
	Vec = Builder.CreateIntCast(Vec, PrevVec->getType(), *IsSigned);
	}
	PrevVec->replaceAllUsesWith(Vec);
	PostponedValues.try_emplace(Vec).first->second.push_back(TE);
	// Replace the stub vector node, if it was used before for one of the
	// buildvector nodes already.
	auto It = PostponedValues.find(PrevVec);
	if (It != PostponedValues.end()) {
	for (TreeEntry *VTE : It->getSecond())
	VTE->VectorizedValue = Vec;
	}
	eraseInstruction(PrevVec);
	}

	LLVM_DEBUG(dbgs() << "SLP: Extracting " << ExternalUses.size()
	<< " values .\n");

	SmallVector<ShuffledInsertData> ShuffledInserts;
	// Maps vector instruction to original insertelement instruction
	DenseMap<Value , InsertElementInst > VectorToInsertElement;
	// Maps extract Scalar to the corresponding extractelement instruction in the
	// basic block. Only one extractelement per block should be emitted.
	DenseMap<Value *,
	DenseMap<BasicBlock , std::pair<Instruction , Instruction *>>>
	ScalarToEEs;
	SmallDenseSet<Value *, 4> UsedInserts;
	DenseMap<std::pair<Value , Type >, Value *> VectorCasts;
	SmallDenseSet<Value *, 4> ScalarsWithNullptrUser;
	// Extract all of the elements with the external uses.
	for (const auto &ExternalUse : ExternalUses) {
	Value *Scalar = ExternalUse.Scalar;
	llvm::User *User = ExternalUse.User;

	// Skip users that we already RAUW. This happens when one instruction
	// has multiple uses of the same value.
	if (User && !is_contained(Scalar->users(), User))
	continue;
	TreeEntry *E = getTreeEntry(Scalar);
	assert(E && "Invalid scalar");
	assert(!E->isGather() && "Extracting from a gather list");
	// Non-instruction pointers are not deleted, just skip them.
	if (E->getOpcode() == Instruction::GetElementPtr &&
	!isa<GetElementPtrInst>(Scalar))
	continue;

	Value *Vec = E->VectorizedValue;
	assert(Vec && "Can't find vectorizable value");

	Value *Lane = Builder.getInt32(ExternalUse.Lane);
	auto ExtractAndExtendIfNeeded = [&](Value *Vec) {
	if (Scalar->getType() != Vec->getType()) {
	Value *Ex = nullptr;
	Value *ExV = nullptr;
	auto *GEP = dyn_cast<GetElementPtrInst>(Scalar);
	bool ReplaceGEP = GEP && ExternalUsesAsGEPs.contains(GEP);
	auto It = ScalarToEEs.find(Scalar);
	if (It != ScalarToEEs.end()) {
	// No need to emit many extracts, just move the only one in the
	// current block.
	auto EEIt = It->second.find(Builder.GetInsertBlock());
	if (EEIt != It->second.end()) {
	Instruction *I = EEIt->second.first;
	if (Builder.GetInsertPoint() != Builder.GetInsertBlock()->end() &&
	Builder.GetInsertPoint()->comesBefore(I)) {
	I->moveBefore(*Builder.GetInsertPoint()->getParent(),
	Builder.GetInsertPoint());
	if (auto *CI = EEIt->second.second)
	CI->moveAfter(I);
	}
	Ex = I;
	ExV = EEIt->second.second ? EEIt->second.second : Ex;
	}
	}
	if (!Ex) {
	// "Reuse" the existing extract to improve final codegen.
	if (auto *ES = dyn_cast<ExtractElementInst>(Scalar)) {
	Value *V = ES->getVectorOperand();
	if (const TreeEntry *ETE = getTreeEntry(V))
	V = ETE->VectorizedValue;
	Ex = Builder.CreateExtractElement(V, ES->getIndexOperand());
	} else if (ReplaceGEP) {
	// Leave the GEPs as is, they are free in most cases and better to
	// keep them as GEPs.
	auto *CloneGEP = GEP->clone();
	if (isa<Instruction>(Vec))
	CloneGEP->insertBefore(*Builder.GetInsertBlock(),
	Builder.GetInsertPoint());
	else
	CloneGEP->insertBefore(GEP);
	if (GEP->hasName())
	CloneGEP->takeName(GEP);
	Ex = CloneGEP;
	} else {
	Ex = Builder.CreateExtractElement(Vec, Lane);
	}
	// If necessary, sign-extend or zero-extend ScalarRoot
	// to the larger type.
	ExV = Ex;
	if (Scalar->getType() != Ex->getType())
	ExV = Builder.CreateIntCast(Ex, Scalar->getType(),
	MinBWs.find(E)->second.second);
	if (auto *I = dyn_cast<Instruction>(Ex))
	ScalarToEEs[Scalar].try_emplace(
	Builder.GetInsertBlock(),
	std::make_pair(I, cast<Instruction>(ExV)));
	}
	// The then branch of the previous if may produce constants, since 0
	// operand might be a constant.
	if (auto *ExI = dyn_cast<Instruction>(Ex)) {
	GatherShuffleExtractSeq.insert(ExI);
	CSEBlocks.insert(ExI->getParent());
	}
	return ExV;
	}
	assert(isa<FixedVectorType>(Scalar->getType()) &&
	isa<InsertElementInst>(Scalar) &&
	"In-tree scalar of vector type is not insertelement?");
	auto *IE = cast<InsertElementInst>(Scalar);
	VectorToInsertElement.try_emplace(Vec, IE);
	return Vec;
	};
	// If User == nullptr, the Scalar remains as scalar in vectorized
	// instructions or is used as extra arg. Generate ExtractElement instruction
	// and update the record for this scalar in ExternallyUsedValues.
	if (!User) {
	if (!ScalarsWithNullptrUser.insert(Scalar).second)
	continue;
	assert((ExternallyUsedValues.count(Scalar) \|\|
	Scalar->hasNUsesOrMore(UsesLimit) \|\|
	any_of(Scalar->users(),
	[&](llvm::User *U) {
	if (ExternalUsesAsGEPs.contains(U))
	return true;
	TreeEntry *UseEntry = getTreeEntry(U);
	return UseEntry &&
	(UseEntry->State == TreeEntry::Vectorize \|\|
	UseEntry->State ==
	TreeEntry::StridedVectorize) &&
	(E->State == TreeEntry::Vectorize \|\|
	E->State == TreeEntry::StridedVectorize) &&
	doesInTreeUserNeedToExtract(
	Scalar,
	cast<Instruction>(UseEntry->Scalars.front()),
	TLI);
	})) &&
	"Scalar with nullptr User must be registered in "
	"ExternallyUsedValues map or remain as scalar in vectorized "
	"instructions");
	if (auto *VecI = dyn_cast<Instruction>(Vec)) {
	if (auto *PHI = dyn_cast<PHINode>(VecI))
	Builder.SetInsertPoint(PHI->getParent(),
	PHI->getParent()->getFirstNonPHIIt());
	else
	Builder.SetInsertPoint(VecI->getParent(),
	std::next(VecI->getIterator()));
	} else {
	Builder.SetInsertPoint(&F->getEntryBlock(), F->getEntryBlock().begin());
	}
	Value *NewInst = ExtractAndExtendIfNeeded(Vec);
	// Required to update internally referenced instructions.
	Scalar->replaceAllUsesWith(NewInst);
	ReplacedExternals.emplace_back(Scalar, NewInst);
	continue;
	}

	if (auto *VU = dyn_cast<InsertElementInst>(User);
	VU && VU->getOperand(1) == Scalar) {
	// Skip if the scalar is another vector op or Vec is not an instruction.
	if (!Scalar->getType()->isVectorTy() && isa<Instruction>(Vec)) {
	if (auto *FTy = dyn_cast<FixedVectorType>(User->getType())) {
	if (!UsedInserts.insert(VU).second)
	continue;
	// Need to use original vector, if the root is truncated.
	auto BWIt = MinBWs.find(E);
	if (BWIt != MinBWs.end() && Vec->getType() != VU->getType()) {
	auto *ScalarTy = FTy->getElementType();
	auto Key = std::make_pair(Vec, ScalarTy);
	auto VecIt = VectorCasts.find(Key);
	if (VecIt == VectorCasts.end()) {
	IRBuilderBase::InsertPointGuard Guard(Builder);
	if (auto *IVec = dyn_cast<PHINode>(Vec))
	Builder.SetInsertPoint(
	IVec->getParent()->getFirstNonPHIOrDbgOrLifetime());
	else if (auto *IVec = dyn_cast<Instruction>(Vec))
	Builder.SetInsertPoint(IVec->getNextNonDebugInstruction());
	Vec = Builder.CreateIntCast(
	Vec,
	getWidenedType(
	ScalarTy,
	cast<FixedVectorType>(Vec->getType())->getNumElements()),
	BWIt->second.second);
	VectorCasts.try_emplace(Key, Vec);
	} else {
	Vec = VecIt->second;
	}
	}

	std::optional<unsigned> InsertIdx = getElementIndex(VU);
	if (InsertIdx) {
	auto *It =
	find_if(ShuffledInserts, [VU](const ShuffledInsertData &Data) {
	// Checks if 2 insertelements are from the same buildvector.
	InsertElementInst *VecInsert = Data.InsertElements.front();
	return areTwoInsertFromSameBuildVector(
	VU, VecInsert,
	[](InsertElementInst *II) { return II->getOperand(0); });
	});
	unsigned Idx = *InsertIdx;
	if (It == ShuffledInserts.end()) {
	(void)ShuffledInserts.emplace_back();
	It = std::next(ShuffledInserts.begin(),
	ShuffledInserts.size() - 1);
	SmallVectorImpl<int> &Mask = It->ValueMasks[Vec];
	if (Mask.empty())
	Mask.assign(FTy->getNumElements(), PoisonMaskElem);
	// Find the insertvector, vectorized in tree, if any.
	Value *Base = VU;
	while (auto *IEBase = dyn_cast<InsertElementInst>(Base)) {
	if (IEBase != User &&
	(!IEBase->hasOneUse() \|\|
	getElementIndex(IEBase).value_or(Idx) == Idx))
	break;
	// Build the mask for the vectorized insertelement instructions.
	if (const TreeEntry *E = getTreeEntry(IEBase)) {
	do {
	IEBase = cast<InsertElementInst>(Base);
	int IEIdx = *getElementIndex(IEBase);
	assert(Mask[IEIdx] == PoisonMaskElem &&
	"InsertElementInstruction used already.");
	Mask[IEIdx] = IEIdx;
	Base = IEBase->getOperand(0);
	} while (E == getTreeEntry(Base));
	break;
	}
	Base = cast<InsertElementInst>(Base)->getOperand(0);
	// After the vectorization the def-use chain has changed, need
	// to look through original insertelement instructions, if they
	// get replaced by vector instructions.
	auto It = VectorToInsertElement.find(Base);
	if (It != VectorToInsertElement.end())
	Base = It->second;
	}
	}
	SmallVectorImpl<int> &Mask = It->ValueMasks[Vec];
	if (Mask.empty())
	Mask.assign(FTy->getNumElements(), PoisonMaskElem);
	Mask[Idx] = ExternalUse.Lane;
	It->InsertElements.push_back(cast<InsertElementInst>(User));
	continue;
	}
	}
	}
	}

	// Generate extracts for out-of-tree users.
	// Find the insertion point for the extractelement lane.
	if (auto *VecI = dyn_cast<Instruction>(Vec)) {
	if (PHINode *PH = dyn_cast<PHINode>(User)) {
	for (unsigned I : seq<unsigned>(0, PH->getNumIncomingValues())) {
	if (PH->getIncomingValue(I) == Scalar) {
	Instruction *IncomingTerminator =
	PH->getIncomingBlock(I)->getTerminator();
	if (isa<CatchSwitchInst>(IncomingTerminator)) {
	Builder.SetInsertPoint(VecI->getParent(),
	std::next(VecI->getIterator()));
	} else {
	Builder.SetInsertPoint(PH->getIncomingBlock(I)->getTerminator());
	}
	Value *NewInst = ExtractAndExtendIfNeeded(Vec);
	PH->setOperand(I, NewInst);
	}
	}
	} else {
	Builder.SetInsertPoint(cast<Instruction>(User));
	Value *NewInst = ExtractAndExtendIfNeeded(Vec);
	User->replaceUsesOfWith(Scalar, NewInst);
	}
	} else {
	Builder.SetInsertPoint(&F->getEntryBlock(), F->getEntryBlock().begin());
	Value *NewInst = ExtractAndExtendIfNeeded(Vec);
	User->replaceUsesOfWith(Scalar, NewInst);
	}

	LLVM_DEBUG(dbgs() << "SLP: Replaced:" << *User << ".\n");
	}

	auto CreateShuffle = [&](Value V1, Value V2, ArrayRef<int> Mask) {
	SmallVector<int> CombinedMask1(Mask.size(), PoisonMaskElem);
	SmallVector<int> CombinedMask2(Mask.size(), PoisonMaskElem);
	int VF = cast<FixedVectorType>(V1->getType())->getNumElements();
	for (int I = 0, E = Mask.size(); I < E; ++I) {
	if (Mask[I] < VF)
	CombinedMask1[I] = Mask[I];
	else
	CombinedMask2[I] = Mask[I] - VF;
	}
	ShuffleInstructionBuilder ShuffleBuilder(
	cast<VectorType>(V1->getType())->getElementType(), Builder, *this);
	ShuffleBuilder.add(V1, CombinedMask1);
	if (V2)
	ShuffleBuilder.add(V2, CombinedMask2);
	return ShuffleBuilder.finalize(std::nullopt);
	};

	auto &&ResizeToVF = [&CreateShuffle](Value *Vec, ArrayRef<int> Mask,
	bool ForSingleMask) {
	unsigned VF = Mask.size();
	unsigned VecVF = cast<FixedVectorType>(Vec->getType())->getNumElements();
	if (VF != VecVF) {
	if (any_of(Mask, [VF](int Idx) { return Idx >= static_cast<int>(VF); })) {
	Vec = CreateShuffle(Vec, nullptr, Mask);
	return std::make_pair(Vec, true);
	}
	if (!ForSingleMask) {
	SmallVector<int> ResizeMask(VF, PoisonMaskElem);
	for (unsigned I = 0; I < VF; ++I) {
	if (Mask[I] != PoisonMaskElem)
	ResizeMask[Mask[I]] = Mask[I];
	}
	Vec = CreateShuffle(Vec, nullptr, ResizeMask);
	}
	}

	return std::make_pair(Vec, false);
	};
	// Perform shuffling of the vectorize tree entries for better handling of
	// external extracts.
	for (int I = 0, E = ShuffledInserts.size(); I < E; ++I) {
	// Find the first and the last instruction in the list of insertelements.
	sort(ShuffledInserts[I].InsertElements, isFirstInsertElement);
	InsertElementInst *FirstInsert = ShuffledInserts[I].InsertElements.front();
	InsertElementInst *LastInsert = ShuffledInserts[I].InsertElements.back();
	Builder.SetInsertPoint(LastInsert);
	auto Vector = ShuffledInserts[I].ValueMasks.takeVector();
	Value *NewInst = performExtractsShuffleAction<Value>(
	MutableArrayRef(Vector.data(), Vector.size()),
	FirstInsert->getOperand(0),
	[](Value *Vec) {
	return cast<VectorType>(Vec->getType())
	->getElementCount()
	.getKnownMinValue();
	},
	ResizeToVF,
	[FirstInsert, &CreateShuffle](ArrayRef<int> Mask,
	ArrayRef<Value *> Vals) {
	assert((Vals.size() == 1 \|\| Vals.size() == 2) &&
	"Expected exactly 1 or 2 input values.");
	if (Vals.size() == 1) {
	// Do not create shuffle if the mask is a simple identity
	// non-resizing mask.
	if (Mask.size() != cast<FixedVectorType>(Vals.front()->getType())
	->getNumElements() \|\|
	!ShuffleVectorInst::isIdentityMask(Mask, Mask.size()))
	return CreateShuffle(Vals.front(), nullptr, Mask);
	return Vals.front();
	}
	return CreateShuffle(Vals.front() ? Vals.front()
	: FirstInsert->getOperand(0),
	Vals.back(), Mask);
	});
	auto It = ShuffledInserts[I].InsertElements.rbegin();
	// Rebuild buildvector chain.
	InsertElementInst *II = nullptr;
	if (It != ShuffledInserts[I].InsertElements.rend())
	II = *It;
	SmallVector<Instruction *> Inserts;
	while (It != ShuffledInserts[I].InsertElements.rend()) {
	assert(II && "Must be an insertelement instruction.");
	if (*It == II)
	++It;
	else
	Inserts.push_back(cast<Instruction>(II));
	II = dyn_cast<InsertElementInst>(II->getOperand(0));
	}
	for (Instruction *II : reverse(Inserts)) {
	II->replaceUsesOfWith(II->getOperand(0), NewInst);
	if (auto *NewI = dyn_cast<Instruction>(NewInst))
	if (II->getParent() == NewI->getParent() && II->comesBefore(NewI))
	II->moveAfter(NewI);
	NewInst = II;
	}
	LastInsert->replaceAllUsesWith(NewInst);
	for (InsertElementInst *IE : reverse(ShuffledInserts[I].InsertElements)) {
	IE->replaceUsesOfWith(IE->getOperand(0),
	PoisonValue::get(IE->getOperand(0)->getType()));
	IE->replaceUsesOfWith(IE->getOperand(1),
	PoisonValue::get(IE->getOperand(1)->getType()));
	eraseInstruction(IE);
	}
	CSEBlocks.insert(LastInsert->getParent());
	}

	SmallVector<Instruction *> RemovedInsts;
	// For each vectorized value:
	for (auto &TEPtr : VectorizableTree) {
	TreeEntry *Entry = TEPtr.get();

	// No need to handle users of gathered values.
	if (Entry->isGather())
	continue;

	assert(Entry->VectorizedValue && "Can't find vectorizable value");

	// For each lane:
	for (int Lane = 0, LE = Entry->Scalars.size(); Lane != LE; ++Lane) {
	Value *Scalar = Entry->Scalars[Lane];

	if (Entry->getOpcode() == Instruction::GetElementPtr &&
	!isa<GetElementPtrInst>(Scalar))
	continue;
	#ifndef NDEBUG
	Type *Ty = Scalar->getType();
	if (!Ty->isVoidTy()) {
	for (User *U : Scalar->users()) {
	LLVM_DEBUG(dbgs() << "SLP: \tvalidating user:" << *U << ".\n");

	// It is legal to delete users in the ignorelist.
	assert((getTreeEntry(U) \|\|
	(UserIgnoreList && UserIgnoreList->contains(U)) \|\|
	(isa_and_nonnull<Instruction>(U) &&
	isDeleted(cast<Instruction>(U)))) &&
	"Deleting out-of-tree value");
	}
	}
	#endif
	LLVM_DEBUG(dbgs() << "SLP: \tErasing scalar:" << *Scalar << ".\n");
	auto *I = cast<Instruction>(Scalar);
	RemovedInsts.push_back(I);
	}
	}

	// Merge the DIAssignIDs from the about-to-be-deleted instructions into the
	// new vector instruction.
	if (auto *V = dyn_cast<Instruction>(VectorizableTree[0]->VectorizedValue))
	V->mergeDIAssignID(RemovedInsts);

	// Clear up reduction references, if any.
	if (UserIgnoreList) {
	for (Instruction *I : RemovedInsts) {
	if (getTreeEntry(I)->Idx != 0)
	continue;
	SmallVector<SelectInst *> LogicalOpSelects;
	I->replaceUsesWithIf(PoisonValue::get(I->getType()), [&](Use &U) {
	// Do not replace condition of the logical op in form select <cond>.
	bool IsPoisoningLogicalOp = isa<SelectInst>(U.getUser()) &&
	(match(U.getUser(), m_LogicalAnd()) \|\|
	match(U.getUser(), m_LogicalOr())) &&
	U.getOperandNo() == 0;
	if (IsPoisoningLogicalOp) {
	LogicalOpSelects.push_back(cast<SelectInst>(U.getUser()));
	return false;
	}
	return UserIgnoreList->contains(U.getUser());
	});
	// Replace conditions of the poisoning logical ops with the non-poison
	// constant value.
	for (SelectInst *SI : LogicalOpSelects)
	SI->setCondition(Constant::getNullValue(SI->getCondition()->getType()));
	}
	}
	// Retain to-be-deleted instructions for some debug-info bookkeeping and alias
	// cache correctness.
	// NOTE: removeInstructionAndOperands only marks the instruction for deletion
	// - instructions are not deleted until later.
	removeInstructionsAndOperands(ArrayRef(RemovedInsts));

	Builder.ClearInsertionPoint();
	InstrElementSize.clear();

	const TreeEntry &RootTE = *VectorizableTree.front();
	Value *Vec = RootTE.VectorizedValue;
	if (auto It = MinBWs.find(&RootTE); ReductionBitWidth != 0 &&
	It != MinBWs.end() &&
	ReductionBitWidth != It->second.first) {
	IRBuilder<>::InsertPointGuard Guard(Builder);
	Builder.SetInsertPoint(ReductionRoot->getParent(),
	ReductionRoot->getIterator());
	Vec = Builder.CreateIntCast(
	Vec,
	VectorType::get(Builder.getIntNTy(ReductionBitWidth),
	cast<VectorType>(Vec->getType())->getElementCount()),
	It->second.second);
	}
	return Vec;
	}

	void BoUpSLP::optimizeGatherSequence() {
	LLVM_DEBUG(dbgs() << "SLP: Optimizing " << GatherShuffleExtractSeq.size()
	<< " gather sequences instructions.\n");
	// LICM InsertElementInst sequences.
	for (Instruction *I : GatherShuffleExtractSeq) {
	if (isDeleted(I))
	continue;

	// Check if this block is inside a loop.
	Loop *L = LI->getLoopFor(I->getParent());
	if (!L)
	continue;

	// Check if it has a preheader.
	BasicBlock *PreHeader = L->getLoopPreheader();
	if (!PreHeader)
	continue;

	// If the vector or the element that we insert into it are
	// instructions that are defined in this basic block then we can't
	// hoist this instruction.
	if (any_of(I->operands(), [L](Value *V) {
	auto *OpI = dyn_cast<Instruction>(V);
	return OpI && L->contains(OpI);
	}))
	continue;

	// We can hoist this instruction. Move it to the pre-header.
	I->moveBefore(PreHeader->getTerminator());
	CSEBlocks.insert(PreHeader);
	}

	// Make a list of all reachable blocks in our CSE queue.
	SmallVector<const DomTreeNode *, 8> CSEWorkList;
	CSEWorkList.reserve(CSEBlocks.size());
	for (BasicBlock *BB : CSEBlocks)
	if (DomTreeNode *N = DT->getNode(BB)) {
	assert(DT->isReachableFromEntry(N));
	CSEWorkList.push_back(N);
	}

	// Sort blocks by domination. This ensures we visit a block after all blocks
	// dominating it are visited.
	llvm::sort(CSEWorkList, [](const DomTreeNode A, const DomTreeNode B) {
	assert((A == B) == (A->getDFSNumIn() == B->getDFSNumIn()) &&
	"Different nodes should have different DFS numbers");
	return A->getDFSNumIn() < B->getDFSNumIn();
	});

	// Less defined shuffles can be replaced by the more defined copies.
	// Between two shuffles one is less defined if it has the same vector operands
	// and its mask indeces are the same as in the first one or undefs. E.g.
	// shuffle %0, poison, <0, 0, 0, undef> is less defined than shuffle %0,
	// poison, <0, 0, 0, 0>.
	auto &&IsIdenticalOrLessDefined = [this](Instruction I1, Instruction I2,
	SmallVectorImpl<int> &NewMask) {
	if (I1->getType() != I2->getType())
	return false;
	auto *SI1 = dyn_cast<ShuffleVectorInst>(I1);
	auto *SI2 = dyn_cast<ShuffleVectorInst>(I2);
	if (!SI1 \|\| !SI2)
	return I1->isIdenticalTo(I2);
	if (SI1->isIdenticalTo(SI2))
	return true;
	for (int I = 0, E = SI1->getNumOperands(); I < E; ++I)
	if (SI1->getOperand(I) != SI2->getOperand(I))
	return false;
	// Check if the second instruction is more defined than the first one.
	NewMask.assign(SI2->getShuffleMask().begin(), SI2->getShuffleMask().end());
	ArrayRef<int> SM1 = SI1->getShuffleMask();
	// Count trailing undefs in the mask to check the final number of used
	// registers.
	unsigned LastUndefsCnt = 0;
	for (int I = 0, E = NewMask.size(); I < E; ++I) {
	if (SM1[I] == PoisonMaskElem)
	++LastUndefsCnt;
	else
	LastUndefsCnt = 0;
	if (NewMask[I] != PoisonMaskElem && SM1[I] != PoisonMaskElem &&
	NewMask[I] != SM1[I])
	return false;
	if (NewMask[I] == PoisonMaskElem)
	NewMask[I] = SM1[I];
	}
	// Check if the last undefs actually change the final number of used vector
	// registers.
	return SM1.size() - LastUndefsCnt > 1 &&
	TTI->getNumberOfParts(SI1->getType()) ==
	TTI->getNumberOfParts(
	getWidenedType(SI1->getType()->getElementType(),
	SM1.size() - LastUndefsCnt));
	};
	// Perform O(N^2) search over the gather/shuffle sequences and merge identical
	// instructions. TODO: We can further optimize this scan if we split the
	// instructions into different buckets based on the insert lane.
	SmallVector<Instruction *, 16> Visited;
	for (auto I = CSEWorkList.begin(), E = CSEWorkList.end(); I != E; ++I) {
	assert(*I &&
	(I == CSEWorkList.begin() \|\| !DT->dominates(I, std::prev(I))) &&
	"Worklist not sorted properly!");
	BasicBlock BB = (I)->getBlock();
	// For all instructions in blocks containing gather sequences:
	for (Instruction &In : llvm::make_early_inc_range(*BB)) {
	if (isDeleted(&In))
	continue;
	if (!isa<InsertElementInst, ExtractElementInst, ShuffleVectorInst>(&In) &&
	!GatherShuffleExtractSeq.contains(&In))
	continue;

	// Check if we can replace this instruction with any of the
	// visited instructions.
	bool Replaced = false;
	for (Instruction *&V : Visited) {
	SmallVector<int> NewMask;
	if (IsIdenticalOrLessDefined(&In, V, NewMask) &&
	DT->dominates(V->getParent(), In.getParent())) {
	In.replaceAllUsesWith(V);
	eraseInstruction(&In);
	if (auto *SI = dyn_cast<ShuffleVectorInst>(V))
	if (!NewMask.empty())
	SI->setShuffleMask(NewMask);
	Replaced = true;
	break;
	}
	if (isa<ShuffleVectorInst>(In) && isa<ShuffleVectorInst>(V) &&
	GatherShuffleExtractSeq.contains(V) &&
	IsIdenticalOrLessDefined(V, &In, NewMask) &&
	DT->dominates(In.getParent(), V->getParent())) {
	In.moveAfter(V);
	V->replaceAllUsesWith(&In);
	eraseInstruction(V);
	if (auto *SI = dyn_cast<ShuffleVectorInst>(&In))
	if (!NewMask.empty())
	SI->setShuffleMask(NewMask);
	V = &In;
	Replaced = true;
	break;
	}
	}
	if (!Replaced) {
	assert(!is_contained(Visited, &In));
	Visited.push_back(&In);
	}
	}
	}
	CSEBlocks.clear();
	GatherShuffleExtractSeq.clear();
	}

	BoUpSLP::ScheduleData *
	BoUpSLP::BlockScheduling::buildBundle(ArrayRef<Value *> VL) {
	ScheduleData *Bundle = nullptr;
	ScheduleData *PrevInBundle = nullptr;
	for (Value *V : VL) {
	if (doesNotNeedToBeScheduled(V))
	continue;
	ScheduleData *BundleMember = getScheduleData(V);
	assert(BundleMember &&
	"no ScheduleData for bundle member "
	"(maybe not in same basic block)");
	assert(BundleMember->isSchedulingEntity() &&
	"bundle member already part of other bundle");
	if (PrevInBundle) {
	PrevInBundle->NextInBundle = BundleMember;
	} else {
	Bundle = BundleMember;
	}

	// Group the instructions to a bundle.
	BundleMember->FirstInBundle = Bundle;
	PrevInBundle = BundleMember;
	}
	assert(Bundle && "Failed to find schedule bundle");
	return Bundle;
	}

	// Groups the instructions to a bundle (which is then a single scheduling entity)
	// and schedules instructions until the bundle gets ready.
	std::optional<BoUpSLP::ScheduleData *>
	BoUpSLP::BlockScheduling::tryScheduleBundle(ArrayRef<Value > VL, BoUpSLP SLP,
	const InstructionsState &S) {
	// No need to schedule PHIs, insertelement, extractelement and extractvalue
	// instructions.
	if (isa<PHINode>(S.OpValue) \|\| isVectorLikeInstWithConstOps(S.OpValue) \|\|
	doesNotNeedToSchedule(VL))
	return nullptr;

	// Initialize the instruction bundle.
	Instruction *OldScheduleEnd = ScheduleEnd;
	LLVM_DEBUG(dbgs() << "SLP: bundle: " << *S.OpValue << "\n");

	auto TryScheduleBundleImpl = [this, OldScheduleEnd, SLP](bool ReSchedule,
	ScheduleData *Bundle) {
	// The scheduling region got new instructions at the lower end (or it is a
	// new region for the first bundle). This makes it necessary to
	// recalculate all dependencies.
	// It is seldom that this needs to be done a second time after adding the
	// initial bundle to the region.
	if (ScheduleEnd != OldScheduleEnd) {
	for (auto *I = ScheduleStart; I != ScheduleEnd; I = I->getNextNode())
	doForAllOpcodes(I, [](ScheduleData *SD) { SD->clearDependencies(); });
	ReSchedule = true;
	}
	if (Bundle) {
	LLVM_DEBUG(dbgs() << "SLP: try schedule bundle " << *Bundle
	<< " in block " << BB->getName() << "\n");
	calculateDependencies(Bundle, /InsertInReadyList=/true, SLP);
	}

	if (ReSchedule) {
	resetSchedule();
	initialFillReadyList(ReadyInsts);
	}

	// Now try to schedule the new bundle or (if no bundle) just calculate
	// dependencies. As soon as the bundle is "ready" it means that there are no
	// cyclic dependencies and we can schedule it. Note that's important that we
	// don't "schedule" the bundle yet (see cancelScheduling).
	while (((!Bundle && ReSchedule) \|\| (Bundle && !Bundle->isReady())) &&
	!ReadyInsts.empty()) {
	ScheduleData *Picked = ReadyInsts.pop_back_val();
	assert(Picked->isSchedulingEntity() && Picked->isReady() &&
	"must be ready to schedule");
	schedule(Picked, ReadyInsts);
	}
	};

	// Make sure that the scheduling region contains all
	// instructions of the bundle.
	for (Value *V : VL) {
	if (doesNotNeedToBeScheduled(V))
	continue;
	if (!extendSchedulingRegion(V, S)) {
	// If the scheduling region got new instructions at the lower end (or it
	// is a new region for the first bundle). This makes it necessary to
	// recalculate all dependencies.
	// Otherwise the compiler may crash trying to incorrectly calculate
	// dependencies and emit instruction in the wrong order at the actual
	// scheduling.
	TryScheduleBundleImpl(/ReSchedule=/false, nullptr);
	return std::nullopt;
	}
	}

	bool ReSchedule = false;
	for (Value *V : VL) {
	if (doesNotNeedToBeScheduled(V))
	continue;
	ScheduleData *BundleMember = getScheduleData(V);
	assert(BundleMember &&
	"no ScheduleData for bundle member (maybe not in same basic block)");

	// Make sure we don't leave the pieces of the bundle in the ready list when
	// whole bundle might not be ready.
	ReadyInsts.remove(BundleMember);

	if (!BundleMember->IsScheduled)
	continue;
	// A bundle member was scheduled as single instruction before and now
	// needs to be scheduled as part of the bundle. We just get rid of the
	// existing schedule.
	LLVM_DEBUG(dbgs() << "SLP: reset schedule because " << *BundleMember
	<< " was already scheduled\n");
	ReSchedule = true;
	}

	auto *Bundle = buildBundle(VL);
	TryScheduleBundleImpl(ReSchedule, Bundle);
	if (!Bundle->isReady()) {
	cancelScheduling(VL, S.OpValue);
	return std::nullopt;
	}
	return Bundle;
	}

	void BoUpSLP::BlockScheduling::cancelScheduling(ArrayRef<Value *> VL,
	Value *OpValue) {
	if (isa<PHINode>(OpValue) \|\| isVectorLikeInstWithConstOps(OpValue) \|\|
	doesNotNeedToSchedule(VL))
	return;

	if (doesNotNeedToBeScheduled(OpValue))
	OpValue = *find_if_not(VL, doesNotNeedToBeScheduled);
	ScheduleData *Bundle = getScheduleData(OpValue);
	LLVM_DEBUG(dbgs() << "SLP: cancel scheduling of " << *Bundle << "\n");
	assert(!Bundle->IsScheduled &&
	"Can't cancel bundle which is already scheduled");
	assert(Bundle->isSchedulingEntity() &&
	(Bundle->isPartOfBundle() \|\| needToScheduleSingleInstruction(VL)) &&
	"tried to unbundle something which is not a bundle");

	// Remove the bundle from the ready list.
	if (Bundle->isReady())
	ReadyInsts.remove(Bundle);

	// Un-bundle: make single instructions out of the bundle.
	ScheduleData *BundleMember = Bundle;
	while (BundleMember) {
	assert(BundleMember->FirstInBundle == Bundle && "corrupt bundle links");
	BundleMember->FirstInBundle = BundleMember;
	ScheduleData *Next = BundleMember->NextInBundle;
	BundleMember->NextInBundle = nullptr;
	BundleMember->TE = nullptr;
	if (BundleMember->unscheduledDepsInBundle() == 0) {
	ReadyInsts.insert(BundleMember);
	}
	BundleMember = Next;
	}
	}

	BoUpSLP::ScheduleData *BoUpSLP::BlockScheduling::allocateScheduleDataChunks() {
	// Allocate a new ScheduleData for the instruction.
	if (ChunkPos >= ChunkSize) {
	ScheduleDataChunks.push_back(std::make_unique<ScheduleData[]>(ChunkSize));
	ChunkPos = 0;
	}
	return &(ScheduleDataChunks.back()[ChunkPos++]);
	}

	bool BoUpSLP::BlockScheduling::extendSchedulingRegion(Value *V,
	const InstructionsState &S) {
	if (getScheduleData(V, isOneOf(S, V)))
	return true;
	Instruction *I = dyn_cast<Instruction>(V);
	assert(I && "bundle member must be an instruction");
	assert(!isa<PHINode>(I) && !isVectorLikeInstWithConstOps(I) &&
	!doesNotNeedToBeScheduled(I) &&
	"phi nodes/insertelements/extractelements/extractvalues don't need to "
	"be scheduled");
	auto &&CheckScheduleForI = [this, &S](Instruction *I) -> bool {
	ScheduleData *ISD = getScheduleData(I);
	if (!ISD)
	return false;
	assert(isInSchedulingRegion(ISD) &&
	"ScheduleData not in scheduling region");
	ScheduleData *SD = allocateScheduleDataChunks();
	SD->Inst = I;
	SD->init(SchedulingRegionID, S.OpValue);
	ExtraScheduleDataMap[I][S.OpValue] = SD;
	return true;
	};
	if (CheckScheduleForI(I))
	return true;
	if (!ScheduleStart) {
	// It's the first instruction in the new region.
	initScheduleData(I, I->getNextNode(), nullptr, nullptr);
	ScheduleStart = I;
	ScheduleEnd = I->getNextNode();
	if (isOneOf(S, I) != I)
	CheckScheduleForI(I);
	assert(ScheduleEnd && "tried to vectorize a terminator?");
	LLVM_DEBUG(dbgs() << "SLP: initialize schedule region to " << *I << "\n");
	return true;
	}
	// Search up and down at the same time, because we don't know if the new
	// instruction is above or below the existing scheduling region.
	// Ignore debug info (and other "AssumeLike" intrinsics) so that's not counted
	// against the budget. Otherwise debug info could affect codegen.
	BasicBlock::reverse_iterator UpIter =
	++ScheduleStart->getIterator().getReverse();
	BasicBlock::reverse_iterator UpperEnd = BB->rend();
	BasicBlock::iterator DownIter = ScheduleEnd->getIterator();
	BasicBlock::iterator LowerEnd = BB->end();
	auto IsAssumeLikeIntr = [](const Instruction &I) {
	if (auto *II = dyn_cast<IntrinsicInst>(&I))
	return II->isAssumeLikeIntrinsic();
	return false;
	};
	UpIter = std::find_if_not(UpIter, UpperEnd, IsAssumeLikeIntr);
	DownIter = std::find_if_not(DownIter, LowerEnd, IsAssumeLikeIntr);
	while (UpIter != UpperEnd && DownIter != LowerEnd && &*UpIter != I &&
	&*DownIter != I) {
	if (++ScheduleRegionSize > ScheduleRegionSizeLimit) {
	LLVM_DEBUG(dbgs() << "SLP: exceeded schedule region size limit\n");
	return false;
	}

	++UpIter;
	++DownIter;

	UpIter = std::find_if_not(UpIter, UpperEnd, IsAssumeLikeIntr);
	DownIter = std::find_if_not(DownIter, LowerEnd, IsAssumeLikeIntr);
	}
	if (DownIter == LowerEnd \|\| (UpIter != UpperEnd && &*UpIter == I)) {
	assert(I->getParent() == ScheduleStart->getParent() &&
	"Instruction is in wrong basic block.");
	initScheduleData(I, ScheduleStart, nullptr, FirstLoadStoreInRegion);
	ScheduleStart = I;
	if (isOneOf(S, I) != I)
	CheckScheduleForI(I);
	LLVM_DEBUG(dbgs() << "SLP: extend schedule region start to " << *I
	<< "\n");
	return true;
	}
	assert((UpIter == UpperEnd \|\| (DownIter != LowerEnd && &*DownIter == I)) &&
	"Expected to reach top of the basic block or instruction down the "
	"lower end.");
	assert(I->getParent() == ScheduleEnd->getParent() &&
	"Instruction is in wrong basic block.");
	initScheduleData(ScheduleEnd, I->getNextNode(), LastLoadStoreInRegion,
	nullptr);
	ScheduleEnd = I->getNextNode();
	if (isOneOf(S, I) != I)
	CheckScheduleForI(I);
	assert(ScheduleEnd && "tried to vectorize a terminator?");
	LLVM_DEBUG(dbgs() << "SLP: extend schedule region end to " << *I << "\n");
	return true;
	}

	void BoUpSLP::BlockScheduling::initScheduleData(Instruction *FromI,
	Instruction *ToI,
	ScheduleData *PrevLoadStore,
	ScheduleData *NextLoadStore) {
	ScheduleData *CurrentLoadStore = PrevLoadStore;
	for (Instruction *I = FromI; I != ToI; I = I->getNextNode()) {
	// No need to allocate data for non-schedulable instructions.
	if (doesNotNeedToBeScheduled(I))
	continue;
	ScheduleData *SD = ScheduleDataMap.lookup(I);
	if (!SD) {
	SD = allocateScheduleDataChunks();
	ScheduleDataMap[I] = SD;
	SD->Inst = I;
	}
	assert(!isInSchedulingRegion(SD) &&
	"new ScheduleData already in scheduling region");
	SD->init(SchedulingRegionID, I);

	if (I->mayReadOrWriteMemory() &&
	(!isa<IntrinsicInst>(I) \|\|
	(cast<IntrinsicInst>(I)->getIntrinsicID() != Intrinsic::sideeffect &&
	cast<IntrinsicInst>(I)->getIntrinsicID() !=
	Intrinsic::pseudoprobe))) {
	// Update the linked list of memory accessing instructions.
	if (CurrentLoadStore) {
	CurrentLoadStore->NextLoadStore = SD;
	} else {
	FirstLoadStoreInRegion = SD;
	}
	CurrentLoadStore = SD;
	}

	if (match(I, m_Intrinsic<Intrinsic::stacksave>()) \|\|
	match(I, m_Intrinsic<Intrinsic::stackrestore>()))
	RegionHasStackSave = true;
	}
	if (NextLoadStore) {
	if (CurrentLoadStore)
	CurrentLoadStore->NextLoadStore = NextLoadStore;
	} else {
	LastLoadStoreInRegion = CurrentLoadStore;
	}
	}

	void BoUpSLP::BlockScheduling::calculateDependencies(ScheduleData *SD,
	bool InsertInReadyList,
	BoUpSLP *SLP) {
	assert(SD->isSchedulingEntity());

	SmallVector<ScheduleData *, 10> WorkList;
	WorkList.push_back(SD);

	while (!WorkList.empty()) {
	ScheduleData *SD = WorkList.pop_back_val();
	for (ScheduleData *BundleMember = SD; BundleMember;
	BundleMember = BundleMember->NextInBundle) {
	assert(isInSchedulingRegion(BundleMember));
	if (BundleMember->hasValidDependencies())
	continue;

	LLVM_DEBUG(dbgs() << "SLP: update deps of " << *BundleMember
	<< "\n");
	BundleMember->Dependencies = 0;
	BundleMember->resetUnscheduledDeps();

	// Handle def-use chain dependencies.
	if (BundleMember->OpValue != BundleMember->Inst) {
	if (ScheduleData *UseSD = getScheduleData(BundleMember->Inst)) {
	BundleMember->Dependencies++;
	ScheduleData *DestBundle = UseSD->FirstInBundle;
	if (!DestBundle->IsScheduled)
	BundleMember->incrementUnscheduledDeps(1);
	if (!DestBundle->hasValidDependencies())
	WorkList.push_back(DestBundle);
	}
	} else {
	for (User *U : BundleMember->Inst->users()) {
	if (ScheduleData *UseSD = getScheduleData(cast<Instruction>(U))) {
	BundleMember->Dependencies++;
	ScheduleData *DestBundle = UseSD->FirstInBundle;
	if (!DestBundle->IsScheduled)
	BundleMember->incrementUnscheduledDeps(1);
	if (!DestBundle->hasValidDependencies())
	WorkList.push_back(DestBundle);
	}
	}
	}

	auto MakeControlDependent = [&](Instruction *I) {
	auto *DepDest = getScheduleData(I);
	assert(DepDest && "must be in schedule window");
	DepDest->ControlDependencies.push_back(BundleMember);
	BundleMember->Dependencies++;
	ScheduleData *DestBundle = DepDest->FirstInBundle;
	if (!DestBundle->IsScheduled)
	BundleMember->incrementUnscheduledDeps(1);
	if (!DestBundle->hasValidDependencies())
	WorkList.push_back(DestBundle);
	};

	// Any instruction which isn't safe to speculate at the beginning of the
	// block is control dependend on any early exit or non-willreturn call
	// which proceeds it.
	if (!isGuaranteedToTransferExecutionToSuccessor(BundleMember->Inst)) {
	for (Instruction *I = BundleMember->Inst->getNextNode();
	I != ScheduleEnd; I = I->getNextNode()) {
	if (isSafeToSpeculativelyExecute(I, &*BB->begin(), SLP->AC))
	continue;

	// Add the dependency
	MakeControlDependent(I);

	if (!isGuaranteedToTransferExecutionToSuccessor(I))
	// Everything past here must be control dependent on I.
	break;
	}
	}

	if (RegionHasStackSave) {
	// If we have an inalloc alloca instruction, it needs to be scheduled
	// after any preceeding stacksave. We also need to prevent any alloca
	// from reordering above a preceeding stackrestore.
	if (match(BundleMember->Inst, m_Intrinsic<Intrinsic::stacksave>()) \|\|
	match(BundleMember->Inst, m_Intrinsic<Intrinsic::stackrestore>())) {
	for (Instruction *I = BundleMember->Inst->getNextNode();
	I != ScheduleEnd; I = I->getNextNode()) {
	if (match(I, m_Intrinsic<Intrinsic::stacksave>()) \|\|
	match(I, m_Intrinsic<Intrinsic::stackrestore>()))
	// Any allocas past here must be control dependent on I, and I
	// must be memory dependend on BundleMember->Inst.
	break;

	if (!isa<AllocaInst>(I))
	continue;

	// Add the dependency
	MakeControlDependent(I);
	}
	}

	// In addition to the cases handle just above, we need to prevent
	// allocas and loads/stores from moving below a stacksave or a
	// stackrestore. Avoiding moving allocas below stackrestore is currently
	// thought to be conservatism. Moving loads/stores below a stackrestore
	// can lead to incorrect code.
	if (isa<AllocaInst>(BundleMember->Inst) \|\|
	BundleMember->Inst->mayReadOrWriteMemory()) {
	for (Instruction *I = BundleMember->Inst->getNextNode();
	I != ScheduleEnd; I = I->getNextNode()) {
	if (!match(I, m_Intrinsic<Intrinsic::stacksave>()) &&
	!match(I, m_Intrinsic<Intrinsic::stackrestore>()))
	continue;

	// Add the dependency
	MakeControlDependent(I);
	break;
	}
	}
	}

	// Handle the memory dependencies (if any).
	ScheduleData *DepDest = BundleMember->NextLoadStore;
	if (!DepDest)
	continue;
	Instruction *SrcInst = BundleMember->Inst;
	assert(SrcInst->mayReadOrWriteMemory() &&
	"NextLoadStore list for non memory effecting bundle?");
	MemoryLocation SrcLoc = getLocation(SrcInst);
	bool SrcMayWrite = BundleMember->Inst->mayWriteToMemory();
	unsigned NumAliased = 0;
	unsigned DistToSrc = 1;

	for (; DepDest; DepDest = DepDest->NextLoadStore) {
	assert(isInSchedulingRegion(DepDest));

	// We have two limits to reduce the complexity:
	// 1) AliasedCheckLimit: It's a small limit to reduce calls to
	// SLP->isAliased (which is the expensive part in this loop).
	// 2) MaxMemDepDistance: It's for very large blocks and it aborts
	// the whole loop (even if the loop is fast, it's quadratic).
	// It's important for the loop break condition (see below) to
	// check this limit even between two read-only instructions.
	if (DistToSrc >= MaxMemDepDistance \|\|
	((SrcMayWrite \|\| DepDest->Inst->mayWriteToMemory()) &&
	(NumAliased >= AliasedCheckLimit \|\|
	SLP->isAliased(SrcLoc, SrcInst, DepDest->Inst)))) {

	// We increment the counter only if the locations are aliased
	// (instead of counting all alias checks). This gives a better
	// balance between reduced runtime and accurate dependencies.
	NumAliased++;

	DepDest->MemoryDependencies.push_back(BundleMember);
	BundleMember->Dependencies++;
	ScheduleData *DestBundle = DepDest->FirstInBundle;
	if (!DestBundle->IsScheduled) {
	BundleMember->incrementUnscheduledDeps(1);
	}
	if (!DestBundle->hasValidDependencies()) {
	WorkList.push_back(DestBundle);
	}
	}

	// Example, explaining the loop break condition: Let's assume our
	// starting instruction is i0 and MaxMemDepDistance = 3.
	//
	// +--------v--v--v
	// i0,i1,i2,i3,i4,i5,i6,i7,i8
	// +--------^--^--^
	//
	// MaxMemDepDistance let us stop alias-checking at i3 and we add
	// dependencies from i0 to i3,i4,.. (even if they are not aliased).
	// Previously we already added dependencies from i3 to i6,i7,i8
	// (because of MaxMemDepDistance). As we added a dependency from
	// i0 to i3, we have transitive dependencies from i0 to i6,i7,i8
	// and we can abort this loop at i6.
	if (DistToSrc >= 2 * MaxMemDepDistance)
	break;
	DistToSrc++;
	}
	}
	if (InsertInReadyList && SD->isReady()) {
	ReadyInsts.insert(SD);
	LLVM_DEBUG(dbgs() << "SLP: gets ready on update: " << *SD->Inst
	<< "\n");
	}
	}
	}

	void BoUpSLP::BlockScheduling::resetSchedule() {
	assert(ScheduleStart &&
	"tried to reset schedule on block which has not been scheduled");
	for (Instruction *I = ScheduleStart; I != ScheduleEnd; I = I->getNextNode()) {
	doForAllOpcodes(I, [&](ScheduleData *SD) {
	assert(isInSchedulingRegion(SD) &&
	"ScheduleData not in scheduling region");
	SD->IsScheduled = false;
	SD->resetUnscheduledDeps();
	});
	}
	ReadyInsts.clear();
	}

	void BoUpSLP::scheduleBlock(BlockScheduling *BS) {
	if (!BS->ScheduleStart)
	return;

	LLVM_DEBUG(dbgs() << "SLP: schedule block " << BS->BB->getName() << "\n");

	// A key point - if we got here, pre-scheduling was able to find a valid
	// scheduling of the sub-graph of the scheduling window which consists
	// of all vector bundles and their transitive users. As such, we do not
	// need to reschedule anything outside of that subgraph.

	BS->resetSchedule();

	// For the real scheduling we use a more sophisticated ready-list: it is
	// sorted by the original instruction location. This lets the final schedule
	// be as close as possible to the original instruction order.
	// WARNING: If changing this order causes a correctness issue, that means
	// there is some missing dependence edge in the schedule data graph.
	struct ScheduleDataCompare {
	bool operator()(ScheduleData SD1, ScheduleData SD2) const {
	return SD2->SchedulingPriority < SD1->SchedulingPriority;
	}
	};
	std::set<ScheduleData *, ScheduleDataCompare> ReadyInsts;

	// Ensure that all dependency data is updated (for nodes in the sub-graph)
	// and fill the ready-list with initial instructions.
	int Idx = 0;
	for (auto *I = BS->ScheduleStart; I != BS->ScheduleEnd;
	I = I->getNextNode()) {
	BS->doForAllOpcodes(I, [this, &Idx, BS](ScheduleData *SD) {
	TreeEntry *SDTE = getTreeEntry(SD->Inst);
	(void)SDTE;
	assert((isVectorLikeInstWithConstOps(SD->Inst) \|\|
	SD->isPartOfBundle() ==
	(SDTE && !doesNotNeedToSchedule(SDTE->Scalars))) &&
	"scheduler and vectorizer bundle mismatch");
	SD->FirstInBundle->SchedulingPriority = Idx++;

	if (SD->isSchedulingEntity() && SD->isPartOfBundle())
	BS->calculateDependencies(SD, false, this);
	});
	}
	BS->initialFillReadyList(ReadyInsts);

	Instruction *LastScheduledInst = BS->ScheduleEnd;

	// Do the "real" scheduling.
	while (!ReadyInsts.empty()) {
	ScheduleData Picked = ReadyInsts.begin();
	ReadyInsts.erase(ReadyInsts.begin());

	// Move the scheduled instruction(s) to their dedicated places, if not
	// there yet.
	for (ScheduleData *BundleMember = Picked; BundleMember;
	BundleMember = BundleMember->NextInBundle) {
	Instruction *PickedInst = BundleMember->Inst;
	if (PickedInst->getNextNonDebugInstruction() != LastScheduledInst)
	PickedInst->moveAfter(LastScheduledInst->getPrevNode());
	LastScheduledInst = PickedInst;
	}

	BS->schedule(Picked, ReadyInsts);
	}

	// Check that we didn't break any of our invariants.
	#ifdef EXPENSIVE_CHECKS
	BS->verify();
	#endif

	#if !defined(NDEBUG) \|\| defined(EXPENSIVE_CHECKS)
	// Check that all schedulable entities got scheduled
	for (auto *I = BS->ScheduleStart; I != BS->ScheduleEnd; I = I->getNextNode()) {
	BS->doForAllOpcodes(I, [&](ScheduleData *SD) {
	if (SD->isSchedulingEntity() && SD->hasValidDependencies()) {
	assert(SD->IsScheduled && "must be scheduled at this point");
	}
	});
	}
	#endif

	// Avoid duplicate scheduling of the block.
	BS->ScheduleStart = nullptr;
	}

	unsigned BoUpSLP::getVectorElementSize(Value *V) {
	// If V is a store, just return the width of the stored value (or value
	// truncated just before storing) without traversing the expression tree.
	// This is the common case.
	if (auto *Store = dyn_cast<StoreInst>(V))
	return DL->getTypeSizeInBits(Store->getValueOperand()->getType());

	if (auto *IEI = dyn_cast<InsertElementInst>(V))
	return getVectorElementSize(IEI->getOperand(1));

	auto E = InstrElementSize.find(V);
	if (E != InstrElementSize.end())
	return E->second;

	// If V is not a store, we can traverse the expression tree to find loads
	// that feed it. The type of the loaded value may indicate a more suitable
	// width than V's type. We want to base the vector element size on the width
	// of memory operations where possible.
	SmallVector<std::tuple<Instruction , BasicBlock , unsigned>> Worklist;
	SmallPtrSet<Instruction *, 16> Visited;
	if (auto *I = dyn_cast<Instruction>(V)) {
	Worklist.emplace_back(I, I->getParent(), 0);
	Visited.insert(I);
	}

	// Traverse the expression tree in bottom-up order looking for loads. If we
	// encounter an instruction we don't yet handle, we give up.
	auto Width = 0u;
	Value *FirstNonBool = nullptr;
	while (!Worklist.empty()) {
	auto [I, Parent, Level] = Worklist.pop_back_val();

	// We should only be looking at scalar instructions here. If the current
	// instruction has a vector type, skip.
	auto *Ty = I->getType();
	if (isa<VectorType>(Ty))
	continue;
	if (Ty != Builder.getInt1Ty() && !FirstNonBool)
	FirstNonBool = I;
	if (Level > RecursionMaxDepth)
	continue;

	// If the current instruction is a load, update MaxWidth to reflect the
	// width of the loaded value.
	if (isa<LoadInst, ExtractElementInst, ExtractValueInst>(I))
	Width = std::max<unsigned>(Width, DL->getTypeSizeInBits(Ty));

	// Otherwise, we need to visit the operands of the instruction. We only
	// handle the interesting cases from buildTree here. If an operand is an
	// instruction we haven't yet visited and from the same basic block as the
	// user or the use is a PHI node, we add it to the worklist.
	else if (isa<PHINode, CastInst, GetElementPtrInst, CmpInst, SelectInst,
	BinaryOperator, UnaryOperator>(I)) {
	for (Use &U : I->operands()) {
	if (auto *J = dyn_cast<Instruction>(U.get()))
	if (Visited.insert(J).second &&
	(isa<PHINode>(I) \|\| J->getParent() == Parent)) {
	Worklist.emplace_back(J, J->getParent(), Level + 1);
	continue;
	}
	if (!FirstNonBool && U.get()->getType() != Builder.getInt1Ty())
	FirstNonBool = U.get();
	}
	} else {
	break;
	}
	}

	// If we didn't encounter a memory access in the expression tree, or if we
	// gave up for some reason, just return the width of V. Otherwise, return the
	// maximum width we found.
	if (!Width) {
	if (V->getType() == Builder.getInt1Ty() && FirstNonBool)
	V = FirstNonBool;
	Width = DL->getTypeSizeInBits(V->getType());
	}

	for (Instruction *I : Visited)
	InstrElementSize[I] = Width;

	return Width;
	}

	bool BoUpSLP::collectValuesToDemote(
	const TreeEntry &E, bool IsProfitableToDemoteRoot, unsigned &BitWidth,
	SmallVectorImpl<unsigned> &ToDemote, DenseSet<const TreeEntry *> &Visited,
	unsigned &MaxDepthLevel, bool &IsProfitableToDemote,
	bool IsTruncRoot) const {
	// We can always demote constants.
	if (all_of(E.Scalars, IsaPred<Constant>))
	return true;

	unsigned OrigBitWidth = DL->getTypeSizeInBits(E.Scalars.front()->getType());
	if (OrigBitWidth == BitWidth) {
	MaxDepthLevel = 1;
	return true;
	}

	// If the value is not a vectorized instruction in the expression and not used
	// by the insertelement instruction and not used in multiple vector nodes, it
	// cannot be demoted.
	bool IsSignedNode = any_of(E.Scalars, [&](Value *R) {
	return !isKnownNonNegative(R, SimplifyQuery(*DL));
	});
	auto IsPotentiallyTruncated = [&](Value *V, unsigned &BitWidth) -> bool {
	if (MultiNodeScalars.contains(V))
	return false;
	// For lat shuffle of sext/zext with many uses need to check the extra bit
	// for unsigned values, otherwise may have incorrect casting for reused
	// scalars.
	bool IsSignedVal = !isKnownNonNegative(V, SimplifyQuery(*DL));
	if ((!IsSignedNode \|\| IsSignedVal) && OrigBitWidth > BitWidth) {
	APInt Mask = APInt::getBitsSetFrom(OrigBitWidth, BitWidth);
	if (MaskedValueIsZero(V, Mask, SimplifyQuery(*DL)))
	return true;
	}
	unsigned NumSignBits = ComputeNumSignBits(V, *DL, 0, AC, nullptr, DT);
	unsigned BitWidth1 = OrigBitWidth - NumSignBits;
	if (IsSignedNode)
	++BitWidth1;
	if (auto *I = dyn_cast<Instruction>(V)) {
	APInt Mask = DB->getDemandedBits(I);
	unsigned BitWidth2 =
	std::max<unsigned>(1, Mask.getBitWidth() - Mask.countl_zero());
	while (!IsSignedNode && BitWidth2 < OrigBitWidth) {
	APInt Mask = APInt::getBitsSetFrom(OrigBitWidth, BitWidth2 - 1);
	if (MaskedValueIsZero(V, Mask, SimplifyQuery(*DL)))
	break;
	BitWidth2 *= 2;
	}
	BitWidth1 = std::min(BitWidth1, BitWidth2);
	}
	BitWidth = std::max(BitWidth, BitWidth1);
	return BitWidth > 0 && OrigBitWidth >= (BitWidth * 2);
	};
	using namespace std::placeholders;
	auto FinalAnalysis = [&]() {
	if (!IsProfitableToDemote)
	return false;
	bool Res = all_of(
	E.Scalars, std::bind(IsPotentiallyTruncated, _1, std::ref(BitWidth)));
	// Demote gathers.
	if (Res && E.isGather()) {
	// Check possible extractelement instructions bases and final vector
	// length.
	SmallPtrSet<Value *, 4> UniqueBases;
	for (Value *V : E.Scalars) {
	auto *EE = dyn_cast<ExtractElementInst>(V);
	if (!EE)
	continue;
	UniqueBases.insert(EE->getVectorOperand());
	}
	const unsigned VF = E.Scalars.size();
	Type *OrigScalarTy = E.Scalars.front()->getType();
	if (UniqueBases.size() <= 2 \|\|
	TTI->getNumberOfParts(getWidenedType(OrigScalarTy, VF)) ==
	TTI->getNumberOfParts(getWidenedType(
	IntegerType::get(OrigScalarTy->getContext(), BitWidth), VF)))
	ToDemote.push_back(E.Idx);
	}
	return Res;
	};
	if (E.isGather() \|\| !Visited.insert(&E).second \|\|
	any_of(E.Scalars, [&](Value *V) {
	return all_of(V->users(), [&](User *U) {
	return isa<InsertElementInst>(U) && !getTreeEntry(U);
	});
	}))
	return FinalAnalysis();

	if (any_of(E.Scalars, [&](Value *V) {
	return !all_of(V->users(), [=](User *U) {
	return getTreeEntry(U) \|\|
	- (UserIgnoreList && UserIgnoreList->contains(U)) \|\|
	+ (E.Idx == 0 && UserIgnoreList &&
	+ UserIgnoreList->contains(U)) \|\|
	(!isa<CmpInst>(U) && U->getType()->isSized() &&
	!U->getType()->isScalableTy() &&
	DL->getTypeSizeInBits(U->getType()) <= BitWidth);
	}) && !IsPotentiallyTruncated(V, BitWidth);
	}))
	return false;

	auto ProcessOperands = [&](ArrayRef<const TreeEntry *> Operands,
	bool &NeedToExit) {
	NeedToExit = false;
	unsigned InitLevel = MaxDepthLevel;
	for (const TreeEntry *Op : Operands) {
	unsigned Level = InitLevel;
	if (!collectValuesToDemote(*Op, IsProfitableToDemoteRoot, BitWidth,
	ToDemote, Visited, Level, IsProfitableToDemote,
	IsTruncRoot)) {
	if (!IsProfitableToDemote)
	return false;
	NeedToExit = true;
	if (!FinalAnalysis())
	return false;
	continue;
	}
	MaxDepthLevel = std::max(MaxDepthLevel, Level);
	}
	return true;
	};
	auto AttemptCheckBitwidth =
	[&](function_ref<bool(unsigned, unsigned)> Checker, bool &NeedToExit) {
	// Try all bitwidth < OrigBitWidth.
	NeedToExit = false;
	unsigned BestFailBitwidth = 0;
	for (; BitWidth < OrigBitWidth; BitWidth *= 2) {
	if (Checker(BitWidth, OrigBitWidth))
	return true;
	if (BestFailBitwidth == 0 && FinalAnalysis())
	BestFailBitwidth = BitWidth;
	}
	if (BitWidth >= OrigBitWidth) {
	if (BestFailBitwidth == 0) {
	BitWidth = OrigBitWidth;
	return false;
	}
	MaxDepthLevel = 1;
	BitWidth = BestFailBitwidth;
	NeedToExit = true;
	return true;
	}
	return false;
	};
	auto TryProcessInstruction =
	[&](unsigned &BitWidth,
	ArrayRef<const TreeEntry *> Operands = std::nullopt,
	function_ref<bool(unsigned, unsigned)> Checker = {}) {
	if (Operands.empty()) {
	if (!IsTruncRoot)
	MaxDepthLevel = 1;
	(void)for_each(E.Scalars, std::bind(IsPotentiallyTruncated, _1,
	std::ref(BitWidth)));
	} else {
	// Several vectorized uses? Check if we can truncate it, otherwise -
	// exit.
	if (E.UserTreeIndices.size() > 1 &&
	!all_of(E.Scalars, std::bind(IsPotentiallyTruncated, _1,
	std::ref(BitWidth))))
	return false;
	bool NeedToExit = false;
	if (Checker && !AttemptCheckBitwidth(Checker, NeedToExit))
	return false;
	if (NeedToExit)
	return true;
	if (!ProcessOperands(Operands, NeedToExit))
	return false;
	if (NeedToExit)
	return true;
	}

	++MaxDepthLevel;
	// Record the entry that we can demote.
	ToDemote.push_back(E.Idx);
	return IsProfitableToDemote;
	};
	switch (E.getOpcode()) {

	// We can always demote truncations and extensions. Since truncations can
	// seed additional demotion, we save the truncated value.
	case Instruction::Trunc:
	if (IsProfitableToDemoteRoot)
	IsProfitableToDemote = true;
	return TryProcessInstruction(BitWidth);
	case Instruction::ZExt:
	case Instruction::SExt:
	IsProfitableToDemote = true;
	return TryProcessInstruction(BitWidth);

	// We can demote certain binary operations if we can demote both of their
	// operands.
	case Instruction::Add:
	case Instruction::Sub:
	case Instruction::Mul:
	case Instruction::And:
	case Instruction::Or:
	case Instruction::Xor: {
	return TryProcessInstruction(
	BitWidth, {getOperandEntry(&E, 0), getOperandEntry(&E, 1)});
	}
	case Instruction::Shl: {
	// If we are truncating the result of this SHL, and if it's a shift of an
	// inrange amount, we can always perform a SHL in a smaller type.
	auto ShlChecker = [&](unsigned BitWidth, unsigned) {
	return all_of(E.Scalars, [&](Value *V) {
	auto *I = cast<Instruction>(V);
	KnownBits AmtKnownBits = computeKnownBits(I->getOperand(1), *DL);
	return AmtKnownBits.getMaxValue().ult(BitWidth);
	});
	};
	return TryProcessInstruction(
	BitWidth, {getOperandEntry(&E, 0), getOperandEntry(&E, 1)}, ShlChecker);
	}
	case Instruction::LShr: {
	// If this is a truncate of a logical shr, we can truncate it to a smaller
	// lshr iff we know that the bits we would otherwise be shifting in are
	// already zeros.
	auto LShrChecker = [&](unsigned BitWidth, unsigned OrigBitWidth) {
	return all_of(E.Scalars, [&](Value *V) {
	auto *I = cast<Instruction>(V);
	KnownBits AmtKnownBits = computeKnownBits(I->getOperand(1), *DL);
	APInt ShiftedBits = APInt::getBitsSetFrom(OrigBitWidth, BitWidth);
	return AmtKnownBits.getMaxValue().ult(BitWidth) &&
	MaskedValueIsZero(I->getOperand(0), ShiftedBits,
	SimplifyQuery(*DL));
	});
	};
	return TryProcessInstruction(
	BitWidth, {getOperandEntry(&E, 0), getOperandEntry(&E, 1)},
	LShrChecker);
	}
	case Instruction::AShr: {
	// If this is a truncate of an arithmetic shr, we can truncate it to a
	// smaller ashr iff we know that all the bits from the sign bit of the
	// original type and the sign bit of the truncate type are similar.
	auto AShrChecker = [&](unsigned BitWidth, unsigned OrigBitWidth) {
	return all_of(E.Scalars, [&](Value *V) {
	auto *I = cast<Instruction>(V);
	KnownBits AmtKnownBits = computeKnownBits(I->getOperand(1), *DL);
	unsigned ShiftedBits = OrigBitWidth - BitWidth;
	return AmtKnownBits.getMaxValue().ult(BitWidth) &&
	ShiftedBits < ComputeNumSignBits(I->getOperand(0), *DL, 0, AC,
	nullptr, DT);
	});
	};
	return TryProcessInstruction(
	BitWidth, {getOperandEntry(&E, 0), getOperandEntry(&E, 1)},
	AShrChecker);
	}
	case Instruction::UDiv:
	case Instruction::URem: {
	// UDiv and URem can be truncated if all the truncated bits are zero.
	auto Checker = [&](unsigned BitWidth, unsigned OrigBitWidth) {
	assert(BitWidth <= OrigBitWidth && "Unexpected bitwidths!");
	return all_of(E.Scalars, [&](Value *V) {
	auto *I = cast<Instruction>(V);
	APInt Mask = APInt::getBitsSetFrom(OrigBitWidth, BitWidth);
	return MaskedValueIsZero(I->getOperand(0), Mask, SimplifyQuery(*DL)) &&
	MaskedValueIsZero(I->getOperand(1), Mask, SimplifyQuery(*DL));
	});
	};
	return TryProcessInstruction(
	BitWidth, {getOperandEntry(&E, 0), getOperandEntry(&E, 1)}, Checker);
	}

	// We can demote selects if we can demote their true and false values.
	case Instruction::Select: {
	return TryProcessInstruction(
	BitWidth, {getOperandEntry(&E, 1), getOperandEntry(&E, 2)});
	}

	// We can demote phis if we can demote all their incoming operands. Note that
	// we don't need to worry about cycles since we ensure single use above.
	case Instruction::PHI: {
	const unsigned NumOps = E.getNumOperands();
	SmallVector<const TreeEntry *> Ops(NumOps);
	transform(seq<unsigned>(0, NumOps), Ops.begin(),
	std::bind(&BoUpSLP::getOperandEntry, this, &E, _1));

	return TryProcessInstruction(BitWidth, Ops);
	}

	case Instruction::Call: {
	auto *IC = dyn_cast<IntrinsicInst>(E.getMainOp());
	if (!IC)
	break;
	Intrinsic::ID ID = getVectorIntrinsicIDForCall(IC, TLI);
	if (ID != Intrinsic::abs && ID != Intrinsic::smin &&
	ID != Intrinsic::smax && ID != Intrinsic::umin && ID != Intrinsic::umax)
	break;
	SmallVector<const TreeEntry *, 2> Operands(1, getOperandEntry(&E, 0));
	function_ref<bool(unsigned, unsigned)> CallChecker;
	auto CompChecker = [&](unsigned BitWidth, unsigned OrigBitWidth) {
	assert(BitWidth <= OrigBitWidth && "Unexpected bitwidths!");
	return all_of(E.Scalars, [&](Value *V) {
	auto *I = cast<Instruction>(V);
	if (ID == Intrinsic::umin \|\| ID == Intrinsic::umax) {
	APInt Mask = APInt::getBitsSetFrom(OrigBitWidth, BitWidth);
	return MaskedValueIsZero(I->getOperand(0), Mask,
	SimplifyQuery(*DL)) &&
	MaskedValueIsZero(I->getOperand(1), Mask, SimplifyQuery(*DL));
	}
	assert((ID == Intrinsic::smin \|\| ID == Intrinsic::smax) &&
	"Expected min/max intrinsics only.");
	unsigned SignBits = OrigBitWidth - BitWidth;
	APInt Mask = APInt::getBitsSetFrom(OrigBitWidth, BitWidth - 1);
	unsigned Op0SignBits = ComputeNumSignBits(I->getOperand(0), *DL, 0, AC,
	nullptr, DT);
	unsigned Op1SignBits = ComputeNumSignBits(I->getOperand(1), *DL, 0, AC,
	nullptr, DT);
	return SignBits <= Op0SignBits &&
	((SignBits != Op0SignBits &&
	!isKnownNonNegative(I->getOperand(0), SimplifyQuery(*DL))) \|\|
	MaskedValueIsZero(I->getOperand(0), Mask,
	SimplifyQuery(*DL))) &&
	SignBits <= Op1SignBits &&
	((SignBits != Op1SignBits &&
	!isKnownNonNegative(I->getOperand(1), SimplifyQuery(*DL))) \|\|
	MaskedValueIsZero(I->getOperand(1), Mask, SimplifyQuery(*DL)));
	});
	};
	if (ID != Intrinsic::abs) {
	Operands.push_back(getOperandEntry(&E, 1));
	CallChecker = CompChecker;
	}
	InstructionCost BestCost =
	std::numeric_limits<InstructionCost::CostType>::max();
	unsigned BestBitWidth = BitWidth;
	unsigned VF = E.Scalars.size();
	// Choose the best bitwidth based on cost estimations.
	auto Checker = [&](unsigned BitWidth, unsigned) {
	unsigned MinBW = PowerOf2Ceil(BitWidth);
	SmallVector<Type *> ArgTys = buildIntrinsicArgTypes(IC, ID, VF, MinBW);
	auto VecCallCosts = getVectorCallCosts(
	IC, getWidenedType(IntegerType::get(IC->getContext(), MinBW), VF),
	TTI, TLI, ArgTys);
	InstructionCost Cost = std::min(VecCallCosts.first, VecCallCosts.second);
	if (Cost < BestCost) {
	BestCost = Cost;
	BestBitWidth = BitWidth;
	}
	return false;
	};
	[[maybe_unused]] bool NeedToExit;
	(void)AttemptCheckBitwidth(Checker, NeedToExit);
	BitWidth = BestBitWidth;
	return TryProcessInstruction(BitWidth, Operands, CallChecker);
	}

	// Otherwise, conservatively give up.
	default:
	break;
	}
	MaxDepthLevel = 1;
	return FinalAnalysis();
	}

	static RecurKind getRdxKind(Value *V);

	void BoUpSLP::computeMinimumValueSizes() {
	// We only attempt to truncate integer expressions.
	bool IsStoreOrInsertElt =
	VectorizableTree.front()->getOpcode() == Instruction::Store \|\|
	VectorizableTree.front()->getOpcode() == Instruction::InsertElement;
	if ((IsStoreOrInsertElt \|\| UserIgnoreList) &&
	ExtraBitWidthNodes.size() <= 1 &&
	(!CastMaxMinBWSizes \|\| CastMaxMinBWSizes->second == 0 \|\|
	CastMaxMinBWSizes->first / CastMaxMinBWSizes->second <= 2))
	return;

	unsigned NodeIdx = 0;
	if (IsStoreOrInsertElt && !VectorizableTree.front()->isGather())
	NodeIdx = 1;

	// Ensure the roots of the vectorizable tree don't form a cycle.
	if (VectorizableTree[NodeIdx]->isGather() \|\|
	(NodeIdx == 0 && !VectorizableTree[NodeIdx]->UserTreeIndices.empty()) \|\|
	(NodeIdx != 0 && any_of(VectorizableTree[NodeIdx]->UserTreeIndices,
	[NodeIdx](const EdgeInfo &EI) {
	return EI.UserTE->Idx >
	static_cast<int>(NodeIdx);
	})))
	return;

	// The first value node for store/insertelement is sext/zext/trunc? Skip it,
	// resize to the final type.
	bool IsTruncRoot = false;
	bool IsProfitableToDemoteRoot = !IsStoreOrInsertElt;
	SmallVector<unsigned> RootDemotes;
	if (NodeIdx != 0 &&
	VectorizableTree[NodeIdx]->State == TreeEntry::Vectorize &&
	VectorizableTree[NodeIdx]->getOpcode() == Instruction::Trunc) {
	assert(IsStoreOrInsertElt && "Expected store/insertelement seeded graph.");
	IsTruncRoot = true;
	RootDemotes.push_back(NodeIdx);
	IsProfitableToDemoteRoot = true;
	++NodeIdx;
	}

	// Analyzed the reduction already and not profitable - exit.
	if (AnalyzedMinBWVals.contains(VectorizableTree[NodeIdx]->Scalars.front()))
	return;

	SmallVector<unsigned> ToDemote;
	auto ComputeMaxBitWidth = [&](const TreeEntry &E, bool IsTopRoot,
	bool IsProfitableToDemoteRoot, unsigned Opcode,
	unsigned Limit, bool IsTruncRoot,
	bool IsSignedCmp) -> unsigned {
	ToDemote.clear();
	// Check if the root is trunc and the next node is gather/buildvector, then
	// keep trunc in scalars, which is free in most cases.
	if (E.isGather() && IsTruncRoot && E.UserTreeIndices.size() == 1 &&
	E.Idx > (IsStoreOrInsertElt ? 2 : 1) &&
	all_of(E.Scalars, [&](Value *V) {
	return V->hasOneUse() \|\| isa<Constant>(V) \|\|
	(!V->hasNUsesOrMore(UsesLimit) &&
	none_of(V->users(), [&](User *U) {
	const TreeEntry *TE = getTreeEntry(U);
	const TreeEntry *UserTE = E.UserTreeIndices.back().UserTE;
	if (TE == UserTE \|\| !TE)
	return false;
	+ if (!isa<CastInst, BinaryOperator, FreezeInst, PHINode,
	+ SelectInst>(U) \|\|
	+ !isa<CastInst, BinaryOperator, FreezeInst, PHINode,
	+ SelectInst>(UserTE->getMainOp()))
	+ return true;
	unsigned UserTESz = DL->getTypeSizeInBits(
	UserTE->Scalars.front()->getType());
	auto It = MinBWs.find(TE);
	if (It != MinBWs.end() && It->second.first > UserTESz)
	return true;
	return DL->getTypeSizeInBits(U->getType()) > UserTESz;
	}));
	})) {
	ToDemote.push_back(E.Idx);
	const TreeEntry *UserTE = E.UserTreeIndices.back().UserTE;
	auto It = MinBWs.find(UserTE);
	if (It != MinBWs.end())
	return It->second.first;
	unsigned MaxBitWidth =
	DL->getTypeSizeInBits(UserTE->Scalars.front()->getType());
	MaxBitWidth = bit_ceil(MaxBitWidth);
	if (MaxBitWidth < 8 && MaxBitWidth > 1)
	MaxBitWidth = 8;
	return MaxBitWidth;
	}

	unsigned VF = E.getVectorFactor();
	auto *TreeRootIT = dyn_cast<IntegerType>(E.Scalars.front()->getType());
	if (!TreeRootIT \|\| !Opcode)
	return 0u;

	if (any_of(E.Scalars,
	[&](Value *V) { return AnalyzedMinBWVals.contains(V); }))
	return 0u;

	unsigned NumParts = TTI->getNumberOfParts(getWidenedType(TreeRootIT, VF));

	// The maximum bit width required to represent all the values that can be
	// demoted without loss of precision. It would be safe to truncate the roots
	// of the expression to this width.
	unsigned MaxBitWidth = 1u;

	// True if the roots can be zero-extended back to their original type,
	// rather than sign-extended. We know that if the leading bits are not
	// demanded, we can safely zero-extend. So we initialize IsKnownPositive to
	// True.
	// Determine if the sign bit of all the roots is known to be zero. If not,
	// IsKnownPositive is set to False.
	bool IsKnownPositive = !IsSignedCmp && all_of(E.Scalars, [&](Value *R) {
	KnownBits Known = computeKnownBits(R, *DL);
	return Known.isNonNegative();
	});

	// We first check if all the bits of the roots are demanded. If they're not,
	// we can truncate the roots to this narrower type.
	for (Value *Root : E.Scalars) {
	unsigned NumSignBits = ComputeNumSignBits(Root, *DL, 0, AC, nullptr, DT);
	TypeSize NumTypeBits = DL->getTypeSizeInBits(Root->getType());
	unsigned BitWidth1 = NumTypeBits - NumSignBits;
	// If we can't prove that the sign bit is zero, we must add one to the
	// maximum bit width to account for the unknown sign bit. This preserves
	// the existing sign bit so we can safely sign-extend the root back to the
	// original type. Otherwise, if we know the sign bit is zero, we will
	// zero-extend the root instead.
	//
	// FIXME: This is somewhat suboptimal, as there will be cases where adding
	// one to the maximum bit width will yield a larger-than-necessary
	// type. In general, we need to add an extra bit only if we can't
	// prove that the upper bit of the original type is equal to the
	// upper bit of the proposed smaller type. If these two bits are
	// the same (either zero or one) we know that sign-extending from
	// the smaller type will result in the same value. Here, since we
	// can't yet prove this, we are just making the proposed smaller
	// type larger to ensure correctness.
	if (!IsKnownPositive)
	++BitWidth1;

	APInt Mask = DB->getDemandedBits(cast<Instruction>(Root));
	unsigned BitWidth2 = Mask.getBitWidth() - Mask.countl_zero();
	MaxBitWidth =
	std::max<unsigned>(std::min(BitWidth1, BitWidth2), MaxBitWidth);
	}

	if (MaxBitWidth < 8 && MaxBitWidth > 1)
	MaxBitWidth = 8;

	// If the original type is large, but reduced type does not improve the reg
	// use - ignore it.
	if (NumParts > 1 &&
	NumParts ==
	TTI->getNumberOfParts(getWidenedType(
	IntegerType::get(F->getContext(), bit_ceil(MaxBitWidth)), VF)))
	return 0u;

	bool IsProfitableToDemote = Opcode == Instruction::Trunc \|\|
	Opcode == Instruction::SExt \|\|
	Opcode == Instruction::ZExt \|\| NumParts > 1;
	// Conservatively determine if we can actually truncate the roots of the
	// expression. Collect the values that can be demoted in ToDemote and
	// additional roots that require investigating in Roots.
	DenseSet<const TreeEntry *> Visited;
	unsigned MaxDepthLevel = IsTruncRoot ? Limit : 1;
	bool NeedToDemote = IsProfitableToDemote;

	if (!collectValuesToDemote(E, IsProfitableToDemoteRoot, MaxBitWidth,
	ToDemote, Visited, MaxDepthLevel, NeedToDemote,
	IsTruncRoot) \|\|
	(MaxDepthLevel <= Limit &&
	!(((Opcode == Instruction::SExt \|\| Opcode == Instruction::ZExt) &&
	(!IsTopRoot \|\| !(IsStoreOrInsertElt \|\| UserIgnoreList) \|\|
	DL->getTypeSizeInBits(TreeRootIT) /
	DL->getTypeSizeInBits(cast<Instruction>(E.Scalars.front())
	->getOperand(0)
	->getType()) >
	2)))))
	return 0u;
	// Round MaxBitWidth up to the next power-of-two.
	MaxBitWidth = bit_ceil(MaxBitWidth);

	return MaxBitWidth;
	};

	// If we can truncate the root, we must collect additional values that might
	// be demoted as a result. That is, those seeded by truncations we will
	// modify.
	// Add reduction ops sizes, if any.
	if (UserIgnoreList &&
	isa<IntegerType>(VectorizableTree.front()->Scalars.front()->getType())) {
	for (Value V : UserIgnoreList) {
	auto NumSignBits = ComputeNumSignBits(V, *DL, 0, AC, nullptr, DT);
	auto NumTypeBits = DL->getTypeSizeInBits(V->getType());
	unsigned BitWidth1 = NumTypeBits - NumSignBits;
	if (!isKnownNonNegative(V, SimplifyQuery(*DL)))
	++BitWidth1;
	unsigned BitWidth2 = BitWidth1;
	if (!RecurrenceDescriptor::isIntMinMaxRecurrenceKind(::getRdxKind(V))) {
	auto Mask = DB->getDemandedBits(cast<Instruction>(V));
	BitWidth2 = Mask.getBitWidth() - Mask.countl_zero();
	}
	ReductionBitWidth =
	std::max(std::min(BitWidth1, BitWidth2), ReductionBitWidth);
	}
	if (ReductionBitWidth < 8 && ReductionBitWidth > 1)
	ReductionBitWidth = 8;

	ReductionBitWidth = bit_ceil(ReductionBitWidth);
	}
	bool IsTopRoot = NodeIdx == 0;
	while (NodeIdx < VectorizableTree.size() &&
	VectorizableTree[NodeIdx]->State == TreeEntry::Vectorize &&
	VectorizableTree[NodeIdx]->getOpcode() == Instruction::Trunc) {
	RootDemotes.push_back(NodeIdx);
	++NodeIdx;
	IsTruncRoot = true;
	}
	bool IsSignedCmp = false;
	while (NodeIdx < VectorizableTree.size()) {
	ArrayRef<Value *> TreeRoot = VectorizableTree[NodeIdx]->Scalars;
	unsigned Limit = 2;
	unsigned Opcode = VectorizableTree[NodeIdx]->getOpcode();
	if (IsTopRoot &&
	ReductionBitWidth ==
	DL->getTypeSizeInBits(
	VectorizableTree.front()->Scalars.front()->getType()))
	Limit = 3;
	unsigned MaxBitWidth = ComputeMaxBitWidth(
	*VectorizableTree[NodeIdx], IsTopRoot, IsProfitableToDemoteRoot, Opcode,
	Limit, IsTruncRoot, IsSignedCmp);
	if (ReductionBitWidth != 0 && (IsTopRoot \|\| !RootDemotes.empty())) {
	if (MaxBitWidth != 0 && ReductionBitWidth < MaxBitWidth)
	ReductionBitWidth = bit_ceil(MaxBitWidth);
	else if (MaxBitWidth == 0)
	ReductionBitWidth = 0;
	}

	for (unsigned Idx : RootDemotes) {
	if (all_of(VectorizableTree[Idx]->Scalars, [&](Value *V) {
	uint32_t OrigBitWidth = DL->getTypeSizeInBits(V->getType());
	if (OrigBitWidth > MaxBitWidth) {
	APInt Mask = APInt::getBitsSetFrom(OrigBitWidth, MaxBitWidth);
	return MaskedValueIsZero(V, Mask, SimplifyQuery(*DL));
	}
	return false;
	}))
	ToDemote.push_back(Idx);
	}
	RootDemotes.clear();
	IsTopRoot = false;
	IsProfitableToDemoteRoot = true;

	if (ExtraBitWidthNodes.empty()) {
	NodeIdx = VectorizableTree.size();
	} else {
	unsigned NewIdx = 0;
	do {
	NewIdx = *ExtraBitWidthNodes.begin();
	ExtraBitWidthNodes.erase(ExtraBitWidthNodes.begin());
	} while (NewIdx <= NodeIdx && !ExtraBitWidthNodes.empty());
	NodeIdx = NewIdx;
	IsTruncRoot =
	NodeIdx < VectorizableTree.size() &&
	any_of(VectorizableTree[NodeIdx]->UserTreeIndices,
	[](const EdgeInfo &EI) {
	return EI.EdgeIdx == 0 &&
	EI.UserTE->getOpcode() == Instruction::Trunc &&
	!EI.UserTE->isAltShuffle();
	});
	IsSignedCmp =
	NodeIdx < VectorizableTree.size() &&
	any_of(VectorizableTree[NodeIdx]->UserTreeIndices,
	[&](const EdgeInfo &EI) {
	return EI.UserTE->getOpcode() == Instruction::ICmp &&
	any_of(EI.UserTE->Scalars, [&](Value *V) {
	auto *IC = dyn_cast<ICmpInst>(V);
	return IC &&
	(IC->isSigned() \|\|
	!isKnownNonNegative(IC->getOperand(0),
	SimplifyQuery(*DL)) \|\|
	!isKnownNonNegative(IC->getOperand(1),
	SimplifyQuery(*DL)));
	});
	});
	}

	// If the maximum bit width we compute is less than the with of the roots'
	// type, we can proceed with the narrowing. Otherwise, do nothing.
	if (MaxBitWidth == 0 \|\|
	MaxBitWidth >=
	cast<IntegerType>(TreeRoot.front()->getType())->getBitWidth()) {
	if (UserIgnoreList)
	AnalyzedMinBWVals.insert(TreeRoot.begin(), TreeRoot.end());
	continue;
	}

	// Finally, map the values we can demote to the maximum bit with we
	// computed.
	for (unsigned Idx : ToDemote) {
	TreeEntry *TE = VectorizableTree[Idx].get();
	if (MinBWs.contains(TE))
	continue;
	bool IsSigned = any_of(TE->Scalars, [&](Value *R) {
	return !isKnownNonNegative(R, SimplifyQuery(*DL));
	});
	MinBWs.try_emplace(TE, MaxBitWidth, IsSigned);
	}
	}
	}

	PreservedAnalyses SLPVectorizerPass::run(Function &F, FunctionAnalysisManager &AM) {
	auto *SE = &AM.getResult<ScalarEvolutionAnalysis>(F);
	auto *TTI = &AM.getResult<TargetIRAnalysis>(F);
	auto *TLI = AM.getCachedResult<TargetLibraryAnalysis>(F);
	auto *AA = &AM.getResult<AAManager>(F);
	auto *LI = &AM.getResult<LoopAnalysis>(F);
	auto *DT = &AM.getResult<DominatorTreeAnalysis>(F);
	auto *AC = &AM.getResult<AssumptionAnalysis>(F);
	auto *DB = &AM.getResult<DemandedBitsAnalysis>(F);
	auto *ORE = &AM.getResult<OptimizationRemarkEmitterAnalysis>(F);

	bool Changed = runImpl(F, SE, TTI, TLI, AA, LI, DT, AC, DB, ORE);
	if (!Changed)
	return PreservedAnalyses::all();

	PreservedAnalyses PA;
	PA.preserveSet<CFGAnalyses>();
	return PA;
	}

	bool SLPVectorizerPass::runImpl(Function &F, ScalarEvolution *SE_,
	TargetTransformInfo *TTI_,
	TargetLibraryInfo TLI_, AAResults AA_,
	LoopInfo LI_, DominatorTree DT_,
	AssumptionCache AC_, DemandedBits DB_,
	OptimizationRemarkEmitter *ORE_) {
	if (!RunSLPVectorization)
	return false;
	SE = SE_;
	TTI = TTI_;
	TLI = TLI_;
	AA = AA_;
	LI = LI_;
	DT = DT_;
	AC = AC_;
	DB = DB_;
	DL = &F.getDataLayout();

	Stores.clear();
	GEPs.clear();
	bool Changed = false;

	// If the target claims to have no vector registers don't attempt
	// vectorization.
	if (!TTI->getNumberOfRegisters(TTI->getRegisterClassForType(true))) {
	LLVM_DEBUG(
	dbgs() << "SLP: Didn't find any vector registers for target, abort.\n");
	return false;
	}

	// Don't vectorize when the attribute NoImplicitFloat is used.
	if (F.hasFnAttribute(Attribute::NoImplicitFloat))
	return false;

	LLVM_DEBUG(dbgs() << "SLP: Analyzing blocks in " << F.getName() << ".\n");

	// Use the bottom up slp vectorizer to construct chains that start with
	// store instructions.
	BoUpSLP R(&F, SE, TTI, TLI, AA, LI, DT, AC, DB, DL, ORE_);

	// A general note: the vectorizer must use BoUpSLP::eraseInstruction() to
	// delete instructions.

	// Update DFS numbers now so that we can use them for ordering.
	DT->updateDFSNumbers();

	// Scan the blocks in the function in post order.
	for (auto *BB : post_order(&F.getEntryBlock())) {
	// Start new block - clear the list of reduction roots.
	R.clearReductionData();
	collectSeedInstructions(BB);

	// Vectorize trees that end at stores.
	if (!Stores.empty()) {
	LLVM_DEBUG(dbgs() << "SLP: Found stores for " << Stores.size()
	<< " underlying objects.\n");
	Changed \|= vectorizeStoreChains(R);
	}

	// Vectorize trees that end at reductions.
	Changed \|= vectorizeChainsInBlock(BB, R);

	// Vectorize the index computations of getelementptr instructions. This
	// is primarily intended to catch gather-like idioms ending at
	// non-consecutive loads.
	if (!GEPs.empty()) {
	LLVM_DEBUG(dbgs() << "SLP: Found GEPs for " << GEPs.size()
	<< " underlying objects.\n");
	Changed \|= vectorizeGEPIndices(BB, R);
	}
	}

	if (Changed) {
	R.optimizeGatherSequence();
	LLVM_DEBUG(dbgs() << "SLP: vectorized \"" << F.getName() << "\"\n");
	}
	return Changed;
	}

	std::optional<bool>
	SLPVectorizerPass::vectorizeStoreChain(ArrayRef<Value *> Chain, BoUpSLP &R,
	unsigned Idx, unsigned MinVF,
	unsigned &Size) {
	Size = 0;
	LLVM_DEBUG(dbgs() << "SLP: Analyzing a store chain of length " << Chain.size()
	<< "\n");
	const unsigned Sz = R.getVectorElementSize(Chain[0]);
	unsigned VF = Chain.size();

	if (!isPowerOf2_32(Sz) \|\| !isPowerOf2_32(VF) \|\| VF < 2 \|\| VF < MinVF) {
	// Check if vectorizing with a non-power-of-2 VF should be considered. At
	// the moment, only consider cases where VF + 1 is a power-of-2, i.e. almost
	// all vector lanes are used.
	if (!VectorizeNonPowerOf2 \|\| (VF < MinVF && VF + 1 != MinVF))
	return false;
	}

	LLVM_DEBUG(dbgs() << "SLP: Analyzing " << VF << " stores at offset " << Idx
	<< "\n");

	SetVector<Value *> ValOps;
	for (Value *V : Chain)
	ValOps.insert(cast<StoreInst>(V)->getValueOperand());
	// Operands are not same/alt opcodes or non-power-of-2 uniques - exit.
	InstructionsState S = getSameOpcode(ValOps.getArrayRef(), *TLI);
	if (all_of(ValOps, IsaPred<Instruction>) && ValOps.size() > 1) {
	DenseSet<Value *> Stores(Chain.begin(), Chain.end());
	bool IsPowerOf2 =
	isPowerOf2_32(ValOps.size()) \|\|
	(VectorizeNonPowerOf2 && isPowerOf2_32(ValOps.size() + 1));
	if ((!IsPowerOf2 && S.getOpcode() && S.getOpcode() != Instruction::Load &&
	(!S.MainOp->isSafeToRemove() \|\|
	any_of(ValOps.getArrayRef(),
	[&](Value *V) {
	return !isa<ExtractElementInst>(V) &&
	(V->getNumUses() > Chain.size() \|\|
	any_of(V->users(), [&](User *U) {
	return !Stores.contains(U);
	}));
	}))) \|\|
	(ValOps.size() > Chain.size() / 2 && !S.getOpcode())) {
	Size = (!IsPowerOf2 && S.getOpcode()) ? 1 : 2;
	return false;
	}
	}
	if (R.isLoadCombineCandidate(Chain))
	return true;
	R.buildTree(Chain);
	// Check if tree tiny and store itself or its value is not vectorized.
	if (R.isTreeTinyAndNotFullyVectorizable()) {
	if (R.isGathered(Chain.front()) \|\|
	R.isNotScheduled(cast<StoreInst>(Chain.front())->getValueOperand()))
	return std::nullopt;
	Size = R.getTreeSize();
	return false;
	}
	R.reorderTopToBottom();
	R.reorderBottomToTop();
	R.buildExternalUses();

	R.computeMinimumValueSizes();
	R.transformNodes();

	Size = R.getTreeSize();
	if (S.getOpcode() == Instruction::Load)
	Size = 2; // cut off masked gather small trees
	InstructionCost Cost = R.getTreeCost();

	LLVM_DEBUG(dbgs() << "SLP: Found cost = " << Cost << " for VF=" << VF << "\n");
	if (Cost < -SLPCostThreshold) {
	LLVM_DEBUG(dbgs() << "SLP: Decided to vectorize cost = " << Cost << "\n");

	using namespace ore;

	R.getORE()->emit(OptimizationRemark(SV_NAME, "StoresVectorized",
	cast<StoreInst>(Chain[0]))
	<< "Stores SLP vectorized with cost " << NV("Cost", Cost)
	<< " and with tree size "
	<< NV("TreeSize", R.getTreeSize()));

	R.vectorizeTree();
	return true;
	}

	return false;
	}

	/// Checks if the quadratic mean deviation is less than 90% of the mean size.
	static bool checkTreeSizes(ArrayRef<std::pair<unsigned, unsigned>> Sizes,
	bool First) {
	unsigned Num = 0;
	uint64_t Sum = std::accumulate(
	Sizes.begin(), Sizes.end(), static_cast<uint64_t>(0),
	[&](uint64_t V, const std::pair<unsigned, unsigned> &Val) {
	unsigned Size = First ? Val.first : Val.second;
	if (Size == 1)
	return V;
	++Num;
	return V + Size;
	});
	if (Num == 0)
	return true;
	uint64_t Mean = Sum / Num;
	if (Mean == 0)
	return true;
	uint64_t Dev = std::accumulate(
	Sizes.begin(), Sizes.end(), static_cast<uint64_t>(0),
	[&](uint64_t V, const std::pair<unsigned, unsigned> &Val) {
	unsigned P = First ? Val.first : Val.second;
	if (P == 1)
	return V;
	return V + (P - Mean) * (P - Mean);
	}) /
	Num;
	return Dev * 81 / (Mean * Mean) == 0;
	}

	bool SLPVectorizerPass::vectorizeStores(
	ArrayRef<StoreInst *> Stores, BoUpSLP &R,
	DenseSet<std::tuple<Value , Value , Value , Value , unsigned>>
	&Visited) {
	// We may run into multiple chains that merge into a single chain. We mark the
	// stores that we vectorized so that we don't visit the same store twice.
	BoUpSLP::ValueSet VectorizedStores;
	bool Changed = false;

	struct StoreDistCompare {
	bool operator()(const std::pair<unsigned, int> &Op1,
	const std::pair<unsigned, int> &Op2) const {
	return Op1.second < Op2.second;
	}
	};
	// A set of pairs (index of store in Stores array ref, Distance of the store
	// address relative to base store address in units).
	using StoreIndexToDistSet =
	std::set<std::pair<unsigned, int>, StoreDistCompare>;
	auto TryToVectorize = [&](const StoreIndexToDistSet &Set) {
	int PrevDist = -1;
	BoUpSLP::ValueList Operands;
	// Collect the chain into a list.
	for (auto [Idx, Data] : enumerate(Set)) {
	if (Operands.empty() \|\| Data.second - PrevDist == 1) {
	Operands.push_back(Stores[Data.first]);
	PrevDist = Data.second;
	if (Idx != Set.size() - 1)
	continue;
	}
	auto E = make_scope_exit([&, &DataVar = Data]() {
	Operands.clear();
	Operands.push_back(Stores[DataVar.first]);
	PrevDist = DataVar.second;
	});

	if (Operands.size() <= 1 \|\|
	!Visited
	.insert({Operands.front(),
	cast<StoreInst>(Operands.front())->getValueOperand(),
	Operands.back(),
	cast<StoreInst>(Operands.back())->getValueOperand(),
	Operands.size()})
	.second)
	continue;

	unsigned MaxVecRegSize = R.getMaxVecRegSize();
	unsigned EltSize = R.getVectorElementSize(Operands[0]);
	unsigned MaxElts = llvm::bit_floor(MaxVecRegSize / EltSize);

	unsigned MaxVF =
	std::min(R.getMaximumVF(EltSize, Instruction::Store), MaxElts);
	unsigned MaxRegVF = MaxVF;
	auto *Store = cast<StoreInst>(Operands[0]);
	Type *StoreTy = Store->getValueOperand()->getType();
	Type *ValueTy = StoreTy;
	if (auto *Trunc = dyn_cast<TruncInst>(Store->getValueOperand()))
	ValueTy = Trunc->getSrcTy();
	if (ValueTy == StoreTy &&
	R.getVectorElementSize(Store->getValueOperand()) <= EltSize)
	MaxVF = std::min<unsigned>(MaxVF, bit_floor(Operands.size()));
	unsigned MinVF = std::max<unsigned>(
	2, PowerOf2Ceil(TTI->getStoreMinimumVF(
	R.getMinVF(DL->getTypeStoreSizeInBits(StoreTy)), StoreTy,
	ValueTy)));

	if (MaxVF < MinVF) {
	LLVM_DEBUG(dbgs() << "SLP: Vectorization infeasible as MaxVF (" << MaxVF
	<< ") < "
	<< "MinVF (" << MinVF << ")\n");
	continue;
	}

	unsigned NonPowerOf2VF = 0;
	if (VectorizeNonPowerOf2) {
	// First try vectorizing with a non-power-of-2 VF. At the moment, only
	// consider cases where VF + 1 is a power-of-2, i.e. almost all vector
	// lanes are used.
	unsigned CandVF = Operands.size();
	if (isPowerOf2_32(CandVF + 1) && CandVF <= MaxRegVF)
	NonPowerOf2VF = CandVF;
	}

	unsigned Sz = 1 + Log2_32(MaxVF) - Log2_32(MinVF);
	SmallVector<unsigned> CandidateVFs(Sz + (NonPowerOf2VF > 0 ? 1 : 0));
	unsigned Size = MinVF;
	for_each(reverse(CandidateVFs), [&](unsigned &VF) {
	VF = Size > MaxVF ? NonPowerOf2VF : Size;
	Size *= 2;
	});
	unsigned End = Operands.size();
	unsigned Repeat = 0;
	constexpr unsigned MaxAttempts = 4;
	OwningArrayRef<std::pair<unsigned, unsigned>> RangeSizes(Operands.size());
	for_each(RangeSizes, [](std::pair<unsigned, unsigned> &P) {
	P.first = P.second = 1;
	});
	DenseMap<Value *, std::pair<unsigned, unsigned>> NonSchedulable;
	auto IsNotVectorized = [](bool First,
	const std::pair<unsigned, unsigned> &P) {
	return First ? P.first > 0 : P.second > 0;
	};
	auto IsVectorized = [](bool First,
	const std::pair<unsigned, unsigned> &P) {
	return First ? P.first == 0 : P.second == 0;
	};
	auto VFIsProfitable = [](bool First, unsigned Size,
	const std::pair<unsigned, unsigned> &P) {
	return First ? Size >= P.first : Size >= P.second;
	};
	auto FirstSizeSame = [](unsigned Size,
	const std::pair<unsigned, unsigned> &P) {
	return Size == P.first;
	};
	while (true) {
	++Repeat;
	bool RepeatChanged = false;
	bool AnyProfitableGraph = false;
	for (unsigned Size : CandidateVFs) {
	AnyProfitableGraph = false;
	unsigned StartIdx = std::distance(
	RangeSizes.begin(),
	find_if(RangeSizes, std::bind(IsNotVectorized, Size >= MaxRegVF,
	std::placeholders::_1)));
	while (StartIdx < End) {
	unsigned EndIdx =
	std::distance(RangeSizes.begin(),
	find_if(RangeSizes.drop_front(StartIdx),
	std::bind(IsVectorized, Size >= MaxRegVF,
	std::placeholders::_1)));
	unsigned Sz = EndIdx >= End ? End : EndIdx;
	for (unsigned Cnt = StartIdx; Cnt + Size <= Sz;) {
	if (!checkTreeSizes(RangeSizes.slice(Cnt, Size),
	Size >= MaxRegVF)) {
	++Cnt;
	continue;
	}
	ArrayRef<Value *> Slice = ArrayRef(Operands).slice(Cnt, Size);
	assert(all_of(Slice,
	[&](Value *V) {
	return cast<StoreInst>(V)
	->getValueOperand()
	->getType() ==
	cast<StoreInst>(Slice.front())
	->getValueOperand()
	->getType();
	}) &&
	"Expected all operands of same type.");
	if (!NonSchedulable.empty()) {
	auto [NonSchedSizeMax, NonSchedSizeMin] =
	NonSchedulable.lookup(Slice.front());
	if (NonSchedSizeMax > 0 && NonSchedSizeMin <= Size) {
	Cnt += NonSchedSizeMax;
	continue;
	}
	}
	unsigned TreeSize;
	std::optional<bool> Res =
	vectorizeStoreChain(Slice, R, Cnt, MinVF, TreeSize);
	if (!Res) {
	NonSchedulable
	.try_emplace(Slice.front(), std::make_pair(Size, Size))
	.first->getSecond()
	.second = Size;
	} else if (*Res) {
	// Mark the vectorized stores so that we don't vectorize them
	// again.
	VectorizedStores.insert(Slice.begin(), Slice.end());
	// Mark the vectorized stores so that we don't vectorize them
	// again.
	AnyProfitableGraph = RepeatChanged = Changed = true;
	// If we vectorized initial block, no need to try to vectorize
	// it again.
	for_each(RangeSizes.slice(Cnt, Size),
	[](std::pair<unsigned, unsigned> &P) {
	P.first = P.second = 0;
	});
	if (Cnt < StartIdx + MinVF) {
	for_each(RangeSizes.slice(StartIdx, Cnt - StartIdx),
	[](std::pair<unsigned, unsigned> &P) {
	P.first = P.second = 0;
	});
	StartIdx = Cnt + Size;
	}
	if (Cnt > Sz - Size - MinVF) {
	for_each(RangeSizes.slice(Cnt + Size, Sz - (Cnt + Size)),
	[](std::pair<unsigned, unsigned> &P) {
	P.first = P.second = 0;
	});
	if (Sz == End)
	End = Cnt;
	Sz = Cnt;
	}
	Cnt += Size;
	continue;
	}
	if (Size > 2 && Res &&
	!all_of(RangeSizes.slice(Cnt, Size),
	std::bind(VFIsProfitable, Size >= MaxRegVF, TreeSize,
	std::placeholders::_1))) {
	Cnt += Size;
	continue;
	}
	// Check for the very big VFs that we're not rebuilding same
	// trees, just with larger number of elements.
	if (Size > MaxRegVF && TreeSize > 1 &&
	all_of(RangeSizes.slice(Cnt, Size),
	std::bind(FirstSizeSame, TreeSize,
	std::placeholders::_1))) {
	Cnt += Size;
	while (Cnt != Sz && RangeSizes[Cnt].first == TreeSize)
	++Cnt;
	continue;
	}
	if (TreeSize > 1)
	for_each(RangeSizes.slice(Cnt, Size),
	[&](std::pair<unsigned, unsigned> &P) {
	if (Size >= MaxRegVF)
	P.second = std::max(P.second, TreeSize);
	else
	P.first = std::max(P.first, TreeSize);
	});
	++Cnt;
	AnyProfitableGraph = true;
	}
	if (StartIdx >= End)
	break;
	if (Sz - StartIdx < Size && Sz - StartIdx >= MinVF)
	AnyProfitableGraph = true;
	StartIdx = std::distance(
	RangeSizes.begin(),
	find_if(RangeSizes.drop_front(Sz),
	std::bind(IsNotVectorized, Size >= MaxRegVF,
	std::placeholders::_1)));
	}
	if (!AnyProfitableGraph && Size >= MaxRegVF)
	break;
	}
	// All values vectorized - exit.
	if (all_of(RangeSizes, [](const std::pair<unsigned, unsigned> &P) {
	return P.first == 0 && P.second == 0;
	}))
	break;
	// Check if tried all attempts or no need for the last attempts at all.
	if (Repeat >= MaxAttempts \|\|
	(Repeat > 1 && (RepeatChanged \|\| !AnyProfitableGraph)))
	break;
	constexpr unsigned StoresLimit = 64;
	const unsigned MaxTotalNum = bit_floor(std::min<unsigned>(
	Operands.size(),
	static_cast<unsigned>(
	End -
	std::distance(
	RangeSizes.begin(),
	find_if(RangeSizes, std::bind(IsNotVectorized, true,
	std::placeholders::_1))) +
	1)));
	unsigned VF = PowerOf2Ceil(CandidateVFs.front()) * 2;
	if (VF > MaxTotalNum \|\| VF >= StoresLimit)
	break;
	for_each(RangeSizes, [&](std::pair<unsigned, unsigned> &P) {
	if (P.first != 0)
	P.first = std::max(P.second, P.first);
	});
	// Last attempt to vectorize max number of elements, if all previous
	// attempts were unsuccessful because of the cost issues.
	CandidateVFs.clear();
	CandidateVFs.push_back(VF);
	}
	}
	};

	// Stores pair (first: index of the store into Stores array ref, address of
	// which taken as base, second: sorted set of pairs {index, dist}, which are
	// indices of stores in the set and their store location distances relative to
	// the base address).

	// Need to store the index of the very first store separately, since the set
	// may be reordered after the insertion and the first store may be moved. This
	// container allows to reduce number of calls of getPointersDiff() function.
	SmallVector<std::pair<unsigned, StoreIndexToDistSet>> SortedStores;
	// Inserts the specified store SI with the given index Idx to the set of the
	// stores. If the store with the same distance is found already - stop
	// insertion, try to vectorize already found stores. If some stores from this
	// sequence were not vectorized - try to vectorize them with the new store
	// later. But this logic is applied only to the stores, that come before the
	// previous store with the same distance.
	// Example:
	// 1. store x, %p
	// 2. store y, %p+1
	// 3. store z, %p+2
	// 4. store a, %p
	// 5. store b, %p+3
	// - Scan this from the last to first store. The very first bunch of stores is
	// {5, {{4, -3}, {2, -2}, {3, -1}, {5, 0}}} (the element in SortedStores
	// vector).
	// - The next store in the list - #1 - has the same distance from store #5 as
	// the store #4.
	// - Try to vectorize sequence of stores 4,2,3,5.
	// - If all these stores are vectorized - just drop them.
	// - If some of them are not vectorized (say, #3 and #5), do extra analysis.
	// - Start new stores sequence.
	// The new bunch of stores is {1, {1, 0}}.
	// - Add the stores from previous sequence, that were not vectorized.
	// Here we consider the stores in the reversed order, rather they are used in
	// the IR (Stores are reversed already, see vectorizeStoreChains() function).
	// Store #3 can be added -> comes after store #4 with the same distance as
	// store #1.
	// Store #5 cannot be added - comes before store #4.
	// This logic allows to improve the compile time, we assume that the stores
	// after previous store with the same distance most likely have memory
	// dependencies and no need to waste compile time to try to vectorize them.
	// - Try to vectorize the sequence {1, {1, 0}, {3, 2}}.
	auto FillStoresSet = [&](unsigned Idx, StoreInst *SI) {
	for (std::pair<unsigned, StoreIndexToDistSet> &Set : SortedStores) {
	std::optional<int> Diff = getPointersDiff(
	Stores[Set.first]->getValueOperand()->getType(),
	Stores[Set.first]->getPointerOperand(),
	SI->getValueOperand()->getType(), SI->getPointerOperand(), DL, SE,
	/StrictCheck=/true);
	if (!Diff)
	continue;
	auto It = Set.second.find(std::make_pair(Idx, *Diff));
	if (It == Set.second.end()) {
	Set.second.emplace(Idx, *Diff);
	return;
	}
	// Try to vectorize the first found set to avoid duplicate analysis.
	TryToVectorize(Set.second);
	StoreIndexToDistSet PrevSet;
	PrevSet.swap(Set.second);
	Set.first = Idx;
	Set.second.emplace(Idx, 0);
	// Insert stores that followed previous match to try to vectorize them
	// with this store.
	unsigned StartIdx = It->first + 1;
	SmallBitVector UsedStores(Idx - StartIdx);
	// Distances to previously found dup store (or this store, since they
	// store to the same addresses).
	SmallVector<int> Dists(Idx - StartIdx, 0);
	for (const std::pair<unsigned, int> &Pair : reverse(PrevSet)) {
	// Do not try to vectorize sequences, we already tried.
	if (Pair.first <= It->first \|\|
	VectorizedStores.contains(Stores[Pair.first]))
	break;
	unsigned BI = Pair.first - StartIdx;
	UsedStores.set(BI);
	Dists[BI] = Pair.second - It->second;
	}
	for (unsigned I = StartIdx; I < Idx; ++I) {
	unsigned BI = I - StartIdx;
	if (UsedStores.test(BI))
	Set.second.emplace(I, Dists[BI]);
	}
	return;
	}
	auto &Res = SortedStores.emplace_back();
	Res.first = Idx;
	Res.second.emplace(Idx, 0);
	};
	Type *PrevValTy = nullptr;
	for (auto [I, SI] : enumerate(Stores)) {
	if (R.isDeleted(SI))
	continue;
	if (!PrevValTy)
	PrevValTy = SI->getValueOperand()->getType();
	// Check that we do not try to vectorize stores of different types.
	if (PrevValTy != SI->getValueOperand()->getType()) {
	for (auto &Set : SortedStores)
	TryToVectorize(Set.second);
	SortedStores.clear();
	PrevValTy = SI->getValueOperand()->getType();
	}
	FillStoresSet(I, SI);
	}

	// Final vectorization attempt.
	for (auto &Set : SortedStores)
	TryToVectorize(Set.second);

	return Changed;
	}

	void SLPVectorizerPass::collectSeedInstructions(BasicBlock *BB) {
	// Initialize the collections. We will make a single pass over the block.
	Stores.clear();
	GEPs.clear();

	// Visit the store and getelementptr instructions in BB and organize them in
	// Stores and GEPs according to the underlying objects of their pointer
	// operands.
	for (Instruction &I : *BB) {
	// Ignore store instructions that are volatile or have a pointer operand
	// that doesn't point to a scalar type.
	if (auto *SI = dyn_cast<StoreInst>(&I)) {
	if (!SI->isSimple())
	continue;
	if (!isValidElementType(SI->getValueOperand()->getType()))
	continue;
	Stores[getUnderlyingObject(SI->getPointerOperand())].push_back(SI);
	}

	// Ignore getelementptr instructions that have more than one index, a
	// constant index, or a pointer operand that doesn't point to a scalar
	// type.
	else if (auto *GEP = dyn_cast<GetElementPtrInst>(&I)) {
	if (GEP->getNumIndices() != 1)
	continue;
	Value *Idx = GEP->idx_begin()->get();
	if (isa<Constant>(Idx))
	continue;
	if (!isValidElementType(Idx->getType()))
	continue;
	if (GEP->getType()->isVectorTy())
	continue;
	GEPs[GEP->getPointerOperand()].push_back(GEP);
	}
	}
	}

	bool SLPVectorizerPass::tryToVectorizeList(ArrayRef<Value *> VL, BoUpSLP &R,
	bool MaxVFOnly) {
	if (VL.size() < 2)
	return false;

	LLVM_DEBUG(dbgs() << "SLP: Trying to vectorize a list of length = "
	<< VL.size() << ".\n");

	// Check that all of the parts are instructions of the same type,
	// we permit an alternate opcode via InstructionsState.
	InstructionsState S = getSameOpcode(VL, *TLI);
	if (!S.getOpcode())
	return false;

	Instruction *I0 = cast<Instruction>(S.OpValue);
	// Make sure invalid types (including vector type) are rejected before
	// determining vectorization factor for scalar instructions.
	for (Value *V : VL) {
	Type *Ty = V->getType();
	if (!isa<InsertElementInst>(V) && !isValidElementType(Ty)) {
	// NOTE: the following will give user internal llvm type name, which may
	// not be useful.
	R.getORE()->emit([&]() {
	std::string TypeStr;
	llvm::raw_string_ostream rso(TypeStr);
	Ty->print(rso);
	return OptimizationRemarkMissed(SV_NAME, "UnsupportedType", I0)
	<< "Cannot SLP vectorize list: type "
	<< TypeStr + " is unsupported by vectorizer";
	});
	return false;
	}
	}

	unsigned Sz = R.getVectorElementSize(I0);
	unsigned MinVF = R.getMinVF(Sz);
	unsigned MaxVF = std::max<unsigned>(llvm::bit_floor(VL.size()), MinVF);
	MaxVF = std::min(R.getMaximumVF(Sz, S.getOpcode()), MaxVF);
	if (MaxVF < 2) {
	R.getORE()->emit([&]() {
	return OptimizationRemarkMissed(SV_NAME, "SmallVF", I0)
	<< "Cannot SLP vectorize list: vectorization factor "
	<< "less than 2 is not supported";
	});
	return false;
	}

	bool Changed = false;
	bool CandidateFound = false;
	InstructionCost MinCost = SLPCostThreshold.getValue();
	Type *ScalarTy = VL[0]->getType();
	if (auto *IE = dyn_cast<InsertElementInst>(VL[0]))
	ScalarTy = IE->getOperand(1)->getType();

	unsigned NextInst = 0, MaxInst = VL.size();
	for (unsigned VF = MaxVF; NextInst + 1 < MaxInst && VF >= MinVF; VF /= 2) {
	// No actual vectorization should happen, if number of parts is the same as
	// provided vectorization factor (i.e. the scalar type is used for vector
	// code during codegen).
	auto *VecTy = getWidenedType(ScalarTy, VF);
	if (TTI->getNumberOfParts(VecTy) == VF)
	continue;
	for (unsigned I = NextInst; I < MaxInst; ++I) {
	unsigned ActualVF = std::min(MaxInst - I, VF);

	if (!isPowerOf2_32(ActualVF))
	continue;

	if (MaxVFOnly && ActualVF < MaxVF)
	break;
	if ((VF > MinVF && ActualVF <= VF / 2) \|\| (VF == MinVF && ActualVF < 2))
	break;

	ArrayRef<Value *> Ops = VL.slice(I, ActualVF);
	// Check that a previous iteration of this loop did not delete the Value.
	if (llvm::any_of(Ops, [&R](Value *V) {
	auto *I = dyn_cast<Instruction>(V);
	return I && R.isDeleted(I);
	}))
	continue;

	LLVM_DEBUG(dbgs() << "SLP: Analyzing " << ActualVF << " operations "
	<< "\n");

	R.buildTree(Ops);
	if (R.isTreeTinyAndNotFullyVectorizable())
	continue;
	R.reorderTopToBottom();
	R.reorderBottomToTop(
	/IgnoreReorder=/!isa<InsertElementInst>(Ops.front()) &&
	!R.doesRootHaveInTreeUses());
	R.buildExternalUses();

	R.computeMinimumValueSizes();
	R.transformNodes();
	InstructionCost Cost = R.getTreeCost();
	CandidateFound = true;
	MinCost = std::min(MinCost, Cost);

	LLVM_DEBUG(dbgs() << "SLP: Found cost = " << Cost
	<< " for VF=" << ActualVF << "\n");
	if (Cost < -SLPCostThreshold) {
	LLVM_DEBUG(dbgs() << "SLP: Vectorizing list at cost:" << Cost << ".\n");
	R.getORE()->emit(OptimizationRemark(SV_NAME, "VectorizedList",
	cast<Instruction>(Ops[0]))
	<< "SLP vectorized with cost " << ore::NV("Cost", Cost)
	<< " and with tree size "
	<< ore::NV("TreeSize", R.getTreeSize()));

	R.vectorizeTree();
	// Move to the next bundle.
	I += VF - 1;
	NextInst = I + 1;
	Changed = true;
	}
	}
	}

	if (!Changed && CandidateFound) {
	R.getORE()->emit([&]() {
	return OptimizationRemarkMissed(SV_NAME, "NotBeneficial", I0)
	<< "List vectorization was possible but not beneficial with cost "
	<< ore::NV("Cost", MinCost) << " >= "
	<< ore::NV("Treshold", -SLPCostThreshold);
	});
	} else if (!Changed) {
	R.getORE()->emit([&]() {
	return OptimizationRemarkMissed(SV_NAME, "NotPossible", I0)
	<< "Cannot SLP vectorize list: vectorization was impossible"
	<< " with available vectorization factors";
	});
	}
	return Changed;
	}

	bool SLPVectorizerPass::tryToVectorize(Instruction *I, BoUpSLP &R) {
	if (!I)
	return false;

	if (!isa<BinaryOperator, CmpInst>(I) \|\| isa<VectorType>(I->getType()))
	return false;

	Value *P = I->getParent();

	// Vectorize in current basic block only.
	auto *Op0 = dyn_cast<Instruction>(I->getOperand(0));
	auto *Op1 = dyn_cast<Instruction>(I->getOperand(1));
	if (!Op0 \|\| !Op1 \|\| Op0->getParent() != P \|\| Op1->getParent() != P)
	return false;

	// First collect all possible candidates
	SmallVector<std::pair<Value , Value >, 4> Candidates;
	Candidates.emplace_back(Op0, Op1);

	auto *A = dyn_cast<BinaryOperator>(Op0);
	auto *B = dyn_cast<BinaryOperator>(Op1);
	// Try to skip B.
	if (A && B && B->hasOneUse()) {
	auto *B0 = dyn_cast<BinaryOperator>(B->getOperand(0));
	auto *B1 = dyn_cast<BinaryOperator>(B->getOperand(1));
	if (B0 && B0->getParent() == P)
	Candidates.emplace_back(A, B0);
	if (B1 && B1->getParent() == P)
	Candidates.emplace_back(A, B1);
	}
	// Try to skip A.
	if (B && A && A->hasOneUse()) {
	auto *A0 = dyn_cast<BinaryOperator>(A->getOperand(0));
	auto *A1 = dyn_cast<BinaryOperator>(A->getOperand(1));
	if (A0 && A0->getParent() == P)
	Candidates.emplace_back(A0, B);
	if (A1 && A1->getParent() == P)
	Candidates.emplace_back(A1, B);
	}

	if (Candidates.size() == 1)
	return tryToVectorizeList({Op0, Op1}, R);

	// We have multiple options. Try to pick the single best.
	std::optional<int> BestCandidate = R.findBestRootPair(Candidates);
	if (!BestCandidate)
	return false;
	return tryToVectorizeList(
	{Candidates[BestCandidate].first, Candidates[BestCandidate].second}, R);
	}

	namespace {

	/// Model horizontal reductions.
	///
	/// A horizontal reduction is a tree of reduction instructions that has values
	/// that can be put into a vector as its leaves. For example:
	///
	/// mul mul mul mul
	/// \ / \ /
	/// + +
	/// \ /
	/// +
	/// This tree has "mul" as its leaf values and "+" as its reduction
	/// instructions. A reduction can feed into a store or a binary operation
	/// feeding a phi.
	/// ...
	/// \ /
	/// +
	/// \|
	/// phi +=
	///
	/// Or:
	/// ...
	/// \ /
	/// +
	/// \|
	/// *p =
	///
	class HorizontalReduction {
	using ReductionOpsType = SmallVector<Value *, 16>;
	using ReductionOpsListType = SmallVector<ReductionOpsType, 2>;
	ReductionOpsListType ReductionOps;
	/// List of possibly reduced values.
	SmallVector<SmallVector<Value *>> ReducedVals;
	/// Maps reduced value to the corresponding reduction operation.
	DenseMap<Value , SmallVector<Instruction >> ReducedValsToOps;
	// Use map vector to make stable output.
	MapVector<Instruction , Value > ExtraArgs;
	WeakTrackingVH ReductionRoot;
	/// The type of reduction operation.
	RecurKind RdxKind;
	/// Checks if the optimization of original scalar identity operations on
	/// matched horizontal reductions is enabled and allowed.
	bool IsSupportedHorRdxIdentityOp = false;

	static bool isCmpSelMinMax(Instruction *I) {
	return match(I, m_Select(m_Cmp(), m_Value(), m_Value())) &&
	RecurrenceDescriptor::isMinMaxRecurrenceKind(getRdxKind(I));
	}

	// And/or are potentially poison-safe logical patterns like:
	// select x, y, false
	// select x, true, y
	static bool isBoolLogicOp(Instruction *I) {
	return isa<SelectInst>(I) &&
	(match(I, m_LogicalAnd()) \|\| match(I, m_LogicalOr()));
	}

	/// Checks if instruction is associative and can be vectorized.
	static bool isVectorizable(RecurKind Kind, Instruction *I) {
	if (Kind == RecurKind::None)
	return false;

	// Integer ops that map to select instructions or intrinsics are fine.
	if (RecurrenceDescriptor::isIntMinMaxRecurrenceKind(Kind) \|\|
	isBoolLogicOp(I))
	return true;

	if (Kind == RecurKind::FMax \|\| Kind == RecurKind::FMin) {
	// FP min/max are associative except for NaN and -0.0. We do not
	// have to rule out -0.0 here because the intrinsic semantics do not
	// specify a fixed result for it.
	return I->getFastMathFlags().noNaNs();
	}

	if (Kind == RecurKind::FMaximum \|\| Kind == RecurKind::FMinimum)
	return true;

	return I->isAssociative();
	}

	static Value getRdxOperand(Instruction I, unsigned Index) {
	// Poison-safe 'or' takes the form: select X, true, Y
	// To make that work with the normal operand processing, we skip the
	// true value operand.
	// TODO: Change the code and data structures to handle this without a hack.
	if (getRdxKind(I) == RecurKind::Or && isa<SelectInst>(I) && Index == 1)
	return I->getOperand(2);
	return I->getOperand(Index);
	}

	/// Creates reduction operation with the current opcode.
	static Value createOp(IRBuilderBase &Builder, RecurKind Kind, Value LHS,
	Value *RHS, const Twine &Name, bool UseSelect) {
	unsigned RdxOpcode = RecurrenceDescriptor::getOpcode(Kind);
	switch (Kind) {
	case RecurKind::Or:
	if (UseSelect &&
	LHS->getType() == CmpInst::makeCmpResultType(LHS->getType()))
	return Builder.CreateSelect(LHS, Builder.getTrue(), RHS, Name);
	return Builder.CreateBinOp((Instruction::BinaryOps)RdxOpcode, LHS, RHS,
	Name);
	case RecurKind::And:
	if (UseSelect &&
	LHS->getType() == CmpInst::makeCmpResultType(LHS->getType()))
	return Builder.CreateSelect(LHS, RHS, Builder.getFalse(), Name);
	return Builder.CreateBinOp((Instruction::BinaryOps)RdxOpcode, LHS, RHS,
	Name);
	case RecurKind::Add:
	case RecurKind::Mul:
	case RecurKind::Xor:
	case RecurKind::FAdd:
	case RecurKind::FMul:
	return Builder.CreateBinOp((Instruction::BinaryOps)RdxOpcode, LHS, RHS,
	Name);
	case RecurKind::FMax:
	return Builder.CreateBinaryIntrinsic(Intrinsic::maxnum, LHS, RHS);
	case RecurKind::FMin:
	return Builder.CreateBinaryIntrinsic(Intrinsic::minnum, LHS, RHS);
	case RecurKind::FMaximum:
	return Builder.CreateBinaryIntrinsic(Intrinsic::maximum, LHS, RHS);
	case RecurKind::FMinimum:
	return Builder.CreateBinaryIntrinsic(Intrinsic::minimum, LHS, RHS);
	case RecurKind::SMax:
	if (UseSelect) {
	Value *Cmp = Builder.CreateICmpSGT(LHS, RHS, Name);
	return Builder.CreateSelect(Cmp, LHS, RHS, Name);
	}
	return Builder.CreateBinaryIntrinsic(Intrinsic::smax, LHS, RHS);
	case RecurKind::SMin:
	if (UseSelect) {
	Value *Cmp = Builder.CreateICmpSLT(LHS, RHS, Name);
	return Builder.CreateSelect(Cmp, LHS, RHS, Name);
	}
	return Builder.CreateBinaryIntrinsic(Intrinsic::smin, LHS, RHS);
	case RecurKind::UMax:
	if (UseSelect) {
	Value *Cmp = Builder.CreateICmpUGT(LHS, RHS, Name);
	return Builder.CreateSelect(Cmp, LHS, RHS, Name);
	}
	return Builder.CreateBinaryIntrinsic(Intrinsic::umax, LHS, RHS);
	case RecurKind::UMin:
	if (UseSelect) {
	Value *Cmp = Builder.CreateICmpULT(LHS, RHS, Name);
	return Builder.CreateSelect(Cmp, LHS, RHS, Name);
	}
	return Builder.CreateBinaryIntrinsic(Intrinsic::umin, LHS, RHS);
	default:
	llvm_unreachable("Unknown reduction operation.");
	}
	}

	/// Creates reduction operation with the current opcode with the IR flags
	/// from \p ReductionOps, dropping nuw/nsw flags.
	static Value createOp(IRBuilderBase &Builder, RecurKind RdxKind, Value LHS,
	Value *RHS, const Twine &Name,
	const ReductionOpsListType &ReductionOps) {
	bool UseSelect = ReductionOps.size() == 2 \|\|
	// Logical or/and.
	(ReductionOps.size() == 1 &&
	any_of(ReductionOps.front(), IsaPred<SelectInst>));
	assert((!UseSelect \|\| ReductionOps.size() != 2 \|\|
	isa<SelectInst>(ReductionOps[1][0])) &&
	"Expected cmp + select pairs for reduction");
	Value *Op = createOp(Builder, RdxKind, LHS, RHS, Name, UseSelect);
	if (RecurrenceDescriptor::isIntMinMaxRecurrenceKind(RdxKind)) {
	if (auto *Sel = dyn_cast<SelectInst>(Op)) {
	propagateIRFlags(Sel->getCondition(), ReductionOps[0], nullptr,
	/IncludeWrapFlags=/false);
	propagateIRFlags(Op, ReductionOps[1], nullptr,
	/IncludeWrapFlags=/false);
	return Op;
	}
	}
	propagateIRFlags(Op, ReductionOps[0], nullptr, /IncludeWrapFlags=/false);
	return Op;
	}

	public:
	static RecurKind getRdxKind(Value *V) {
	auto *I = dyn_cast<Instruction>(V);
	if (!I)
	return RecurKind::None;
	if (match(I, m_Add(m_Value(), m_Value())))
	return RecurKind::Add;
	if (match(I, m_Mul(m_Value(), m_Value())))
	return RecurKind::Mul;
	if (match(I, m_And(m_Value(), m_Value())) \|\|
	match(I, m_LogicalAnd(m_Value(), m_Value())))
	return RecurKind::And;
	if (match(I, m_Or(m_Value(), m_Value())) \|\|
	match(I, m_LogicalOr(m_Value(), m_Value())))
	return RecurKind::Or;
	if (match(I, m_Xor(m_Value(), m_Value())))
	return RecurKind::Xor;
	if (match(I, m_FAdd(m_Value(), m_Value())))
	return RecurKind::FAdd;
	if (match(I, m_FMul(m_Value(), m_Value())))
	return RecurKind::FMul;

	if (match(I, m_Intrinsic<Intrinsic::maxnum>(m_Value(), m_Value())))
	return RecurKind::FMax;
	if (match(I, m_Intrinsic<Intrinsic::minnum>(m_Value(), m_Value())))
	return RecurKind::FMin;

	if (match(I, m_Intrinsic<Intrinsic::maximum>(m_Value(), m_Value())))
	return RecurKind::FMaximum;
	if (match(I, m_Intrinsic<Intrinsic::minimum>(m_Value(), m_Value())))
	return RecurKind::FMinimum;
	// This matches either cmp+select or intrinsics. SLP is expected to handle
	// either form.
	// TODO: If we are canonicalizing to intrinsics, we can remove several
	// special-case paths that deal with selects.
	if (match(I, m_SMax(m_Value(), m_Value())))
	return RecurKind::SMax;
	if (match(I, m_SMin(m_Value(), m_Value())))
	return RecurKind::SMin;
	if (match(I, m_UMax(m_Value(), m_Value())))
	return RecurKind::UMax;
	if (match(I, m_UMin(m_Value(), m_Value())))
	return RecurKind::UMin;

	if (auto *Select = dyn_cast<SelectInst>(I)) {
	// Try harder: look for min/max pattern based on instructions producing
	// same values such as: select ((cmp Inst1, Inst2), Inst1, Inst2).
	// During the intermediate stages of SLP, it's very common to have
	// pattern like this (since optimizeGatherSequence is run only once
	// at the end):
	// %1 = extractelement <2 x i32> %a, i32 0
	// %2 = extractelement <2 x i32> %a, i32 1
	// %cond = icmp sgt i32 %1, %2
	// %3 = extractelement <2 x i32> %a, i32 0
	// %4 = extractelement <2 x i32> %a, i32 1
	// %select = select i1 %cond, i32 %3, i32 %4
	CmpInst::Predicate Pred;
	Instruction *L1;
	Instruction *L2;

	Value *LHS = Select->getTrueValue();
	Value *RHS = Select->getFalseValue();
	Value *Cond = Select->getCondition();

	// TODO: Support inverse predicates.
	if (match(Cond, m_Cmp(Pred, m_Specific(LHS), m_Instruction(L2)))) {
	if (!isa<ExtractElementInst>(RHS) \|\|
	!L2->isIdenticalTo(cast<Instruction>(RHS)))
	return RecurKind::None;
	} else if (match(Cond, m_Cmp(Pred, m_Instruction(L1), m_Specific(RHS)))) {
	if (!isa<ExtractElementInst>(LHS) \|\|
	!L1->isIdenticalTo(cast<Instruction>(LHS)))
	return RecurKind::None;
	} else {
	if (!isa<ExtractElementInst>(LHS) \|\| !isa<ExtractElementInst>(RHS))
	return RecurKind::None;
	if (!match(Cond, m_Cmp(Pred, m_Instruction(L1), m_Instruction(L2))) \|\|
	!L1->isIdenticalTo(cast<Instruction>(LHS)) \|\|
	!L2->isIdenticalTo(cast<Instruction>(RHS)))
	return RecurKind::None;
	}

	switch (Pred) {
	default:
	return RecurKind::None;
	case CmpInst::ICMP_SGT:
	case CmpInst::ICMP_SGE:
	return RecurKind::SMax;
	case CmpInst::ICMP_SLT:
	case CmpInst::ICMP_SLE:
	return RecurKind::SMin;
	case CmpInst::ICMP_UGT:
	case CmpInst::ICMP_UGE:
	return RecurKind::UMax;
	case CmpInst::ICMP_ULT:
	case CmpInst::ICMP_ULE:
	return RecurKind::UMin;
	}
	}
	return RecurKind::None;
	}

	/// Get the index of the first operand.
	static unsigned getFirstOperandIndex(Instruction *I) {
	return isCmpSelMinMax(I) ? 1 : 0;
	}

	private:
	/// Total number of operands in the reduction operation.
	static unsigned getNumberOfOperands(Instruction *I) {
	return isCmpSelMinMax(I) ? 3 : 2;
	}

	/// Checks if the instruction is in basic block \p BB.
	/// For a cmp+sel min/max reduction check that both ops are in \p BB.
	static bool hasSameParent(Instruction I, BasicBlock BB) {
	if (isCmpSelMinMax(I) \|\| isBoolLogicOp(I)) {
	auto *Sel = cast<SelectInst>(I);
	auto *Cmp = dyn_cast<Instruction>(Sel->getCondition());
	return Sel->getParent() == BB && Cmp && Cmp->getParent() == BB;
	}
	return I->getParent() == BB;
	}

	/// Expected number of uses for reduction operations/reduced values.
	static bool hasRequiredNumberOfUses(bool IsCmpSelMinMax, Instruction *I) {
	if (IsCmpSelMinMax) {
	// SelectInst must be used twice while the condition op must have single
	// use only.
	if (auto *Sel = dyn_cast<SelectInst>(I))
	return Sel->hasNUses(2) && Sel->getCondition()->hasOneUse();
	return I->hasNUses(2);
	}

	// Arithmetic reduction operation must be used once only.
	return I->hasOneUse();
	}

	/// Initializes the list of reduction operations.
	void initReductionOps(Instruction *I) {
	if (isCmpSelMinMax(I))
	ReductionOps.assign(2, ReductionOpsType());
	else
	ReductionOps.assign(1, ReductionOpsType());
	}

	/// Add all reduction operations for the reduction instruction \p I.
	void addReductionOps(Instruction *I) {
	if (isCmpSelMinMax(I)) {
	ReductionOps[0].emplace_back(cast<SelectInst>(I)->getCondition());
	ReductionOps[1].emplace_back(I);
	} else {
	ReductionOps[0].emplace_back(I);
	}
	}

	static bool isGoodForReduction(ArrayRef<Value *> Data) {
	int Sz = Data.size();
	auto *I = dyn_cast<Instruction>(Data.front());
	return Sz > 1 \|\| isConstant(Data.front()) \|\|
	(I && !isa<LoadInst>(I) && isValidForAlternation(I->getOpcode()));
	}

	public:
	HorizontalReduction() = default;

	/// Try to find a reduction tree.
	bool matchAssociativeReduction(BoUpSLP &R, Instruction *Root,
	ScalarEvolution &SE, const DataLayout &DL,
	const TargetLibraryInfo &TLI) {
	RdxKind = HorizontalReduction::getRdxKind(Root);
	if (!isVectorizable(RdxKind, Root))
	return false;

	// Analyze "regular" integer/FP types for reductions - no target-specific
	// types or pointers.
	Type *Ty = Root->getType();
	if (!isValidElementType(Ty) \|\| Ty->isPointerTy())
	return false;

	// Though the ultimate reduction may have multiple uses, its condition must
	// have only single use.
	if (auto *Sel = dyn_cast<SelectInst>(Root))
	if (!Sel->getCondition()->hasOneUse())
	return false;

	ReductionRoot = Root;

	// Iterate through all the operands of the possible reduction tree and
	// gather all the reduced values, sorting them by their value id.
	BasicBlock *BB = Root->getParent();
	bool IsCmpSelMinMax = isCmpSelMinMax(Root);
	SmallVector<Instruction *> Worklist(1, Root);
	// Checks if the operands of the \p TreeN instruction are also reduction
	// operations or should be treated as reduced values or an extra argument,
	// which is not part of the reduction.
	auto CheckOperands = [&](Instruction *TreeN,
	SmallVectorImpl<Value *> &ExtraArgs,
	SmallVectorImpl<Value *> &PossibleReducedVals,
	SmallVectorImpl<Instruction *> &ReductionOps) {
	for (int I : reverse(seq<int>(getFirstOperandIndex(TreeN),
	getNumberOfOperands(TreeN)))) {
	Value *EdgeVal = getRdxOperand(TreeN, I);
	ReducedValsToOps[EdgeVal].push_back(TreeN);
	auto *EdgeInst = dyn_cast<Instruction>(EdgeVal);
	// Edge has wrong parent - mark as an extra argument.
	if (EdgeInst && !isVectorLikeInstWithConstOps(EdgeInst) &&
	!hasSameParent(EdgeInst, BB)) {
	ExtraArgs.push_back(EdgeVal);
	continue;
	}
	// If the edge is not an instruction, or it is different from the main
	// reduction opcode or has too many uses - possible reduced value.
	// Also, do not try to reduce const values, if the operation is not
	// foldable.
	if (!EdgeInst \|\| getRdxKind(EdgeInst) != RdxKind \|\|
	IsCmpSelMinMax != isCmpSelMinMax(EdgeInst) \|\|
	!hasRequiredNumberOfUses(IsCmpSelMinMax, EdgeInst) \|\|
	!isVectorizable(RdxKind, EdgeInst) \|\|
	(R.isAnalyzedReductionRoot(EdgeInst) &&
	all_of(EdgeInst->operands(), IsaPred<Constant>))) {
	PossibleReducedVals.push_back(EdgeVal);
	continue;
	}
	ReductionOps.push_back(EdgeInst);
	}
	};
	// Try to regroup reduced values so that it gets more profitable to try to
	// reduce them. Values are grouped by their value ids, instructions - by
	// instruction op id and/or alternate op id, plus do extra analysis for
	// loads (grouping them by the distabce between pointers) and cmp
	// instructions (grouping them by the predicate).
	MapVector<size_t, MapVector<size_t, MapVector<Value *, unsigned>>>
	PossibleReducedVals;
	initReductionOps(Root);
	DenseMap<Value , SmallVector<LoadInst >> LoadsMap;
	SmallSet<size_t, 2> LoadKeyUsed;

	auto GenerateLoadsSubkey = [&](size_t Key, LoadInst *LI) {
	Value *Ptr = getUnderlyingObject(LI->getPointerOperand());
	if (LoadKeyUsed.contains(Key)) {
	auto LIt = LoadsMap.find(Ptr);
	if (LIt != LoadsMap.end()) {
	for (LoadInst *RLI : LIt->second) {
	if (getPointersDiff(RLI->getType(), RLI->getPointerOperand(),
	LI->getType(), LI->getPointerOperand(), DL, SE,
	/StrictCheck=/true))
	return hash_value(RLI->getPointerOperand());
	}
	for (LoadInst *RLI : LIt->second) {
	if (arePointersCompatible(RLI->getPointerOperand(),
	LI->getPointerOperand(), TLI)) {
	hash_code SubKey = hash_value(RLI->getPointerOperand());
	return SubKey;
	}
	}
	if (LIt->second.size() > 2) {
	hash_code SubKey =
	hash_value(LIt->second.back()->getPointerOperand());
	return SubKey;
	}
	}
	}
	LoadKeyUsed.insert(Key);
	LoadsMap.try_emplace(Ptr).first->second.push_back(LI);
	return hash_value(LI->getPointerOperand());
	};

	while (!Worklist.empty()) {
	Instruction *TreeN = Worklist.pop_back_val();
	SmallVector<Value *> Args;
	SmallVector<Value *> PossibleRedVals;
	SmallVector<Instruction *> PossibleReductionOps;
	CheckOperands(TreeN, Args, PossibleRedVals, PossibleReductionOps);
	// If too many extra args - mark the instruction itself as a reduction
	// value, not a reduction operation.
	if (Args.size() < 2) {
	addReductionOps(TreeN);
	// Add extra args.
	if (!Args.empty()) {
	assert(Args.size() == 1 && "Expected only single argument.");
	ExtraArgs[TreeN] = Args.front();
	}
	// Add reduction values. The values are sorted for better vectorization
	// results.
	for (Value *V : PossibleRedVals) {
	size_t Key, Idx;
	std::tie(Key, Idx) = generateKeySubkey(V, &TLI, GenerateLoadsSubkey,
	/AllowAlternate=/false);
	++PossibleReducedVals[Key][Idx]
	.insert(std::make_pair(V, 0))
	.first->second;
	}
	Worklist.append(PossibleReductionOps.rbegin(),
	PossibleReductionOps.rend());
	} else {
	size_t Key, Idx;
	std::tie(Key, Idx) = generateKeySubkey(TreeN, &TLI, GenerateLoadsSubkey,
	/AllowAlternate=/false);
	++PossibleReducedVals[Key][Idx]
	.insert(std::make_pair(TreeN, 0))
	.first->second;
	}
	}
	auto PossibleReducedValsVect = PossibleReducedVals.takeVector();
	// Sort values by the total number of values kinds to start the reduction
	// from the longest possible reduced values sequences.
	for (auto &PossibleReducedVals : PossibleReducedValsVect) {
	auto PossibleRedVals = PossibleReducedVals.second.takeVector();
	SmallVector<SmallVector<Value *>> PossibleRedValsVect;
	for (auto It = PossibleRedVals.begin(), E = PossibleRedVals.end();
	It != E; ++It) {
	PossibleRedValsVect.emplace_back();
	auto RedValsVect = It->second.takeVector();
	stable_sort(RedValsVect, llvm::less_second());
	for (const std::pair<Value *, unsigned> &Data : RedValsVect)
	PossibleRedValsVect.back().append(Data.second, Data.first);
	}
	stable_sort(PossibleRedValsVect, [](const auto &P1, const auto &P2) {
	return P1.size() > P2.size();
	});
	int NewIdx = -1;
	for (ArrayRef<Value *> Data : PossibleRedValsVect) {
	if (NewIdx < 0 \|\|
	(!isGoodForReduction(Data) &&
	(!isa<LoadInst>(Data.front()) \|\|
	!isa<LoadInst>(ReducedVals[NewIdx].front()) \|\|
	getUnderlyingObject(
	cast<LoadInst>(Data.front())->getPointerOperand()) !=
	getUnderlyingObject(
	cast<LoadInst>(ReducedVals[NewIdx].front())
	->getPointerOperand())))) {
	NewIdx = ReducedVals.size();
	ReducedVals.emplace_back();
	}
	ReducedVals[NewIdx].append(Data.rbegin(), Data.rend());
	}
	}
	// Sort the reduced values by number of same/alternate opcode and/or pointer
	// operand.
	stable_sort(ReducedVals, [](ArrayRef<Value > P1, ArrayRef<Value > P2) {
	return P1.size() > P2.size();
	});
	return true;
	}

	/// Attempt to vectorize the tree found by matchAssociativeReduction.
	Value tryToReduce(BoUpSLP &V, const DataLayout &DL, TargetTransformInfo TTI,
	const TargetLibraryInfo &TLI) {
	constexpr int ReductionLimit = 4;
	constexpr unsigned RegMaxNumber = 4;
	constexpr unsigned RedValsMaxNumber = 128;
	// If there are a sufficient number of reduction values, reduce
	// to a nearby power-of-2. We can safely generate oversized
	// vectors and rely on the backend to split them to legal sizes.
	unsigned NumReducedVals =
	std::accumulate(ReducedVals.begin(), ReducedVals.end(), 0,
	[](unsigned Num, ArrayRef<Value *> Vals) -> unsigned {
	if (!isGoodForReduction(Vals))
	return Num;
	return Num + Vals.size();
	});
	if (NumReducedVals < ReductionLimit &&
	(!AllowHorRdxIdenityOptimization \|\|
	all_of(ReducedVals, [](ArrayRef<Value *> RedV) {
	return RedV.size() < 2 \|\| !allConstant(RedV) \|\| !isSplat(RedV);
	}))) {
	for (ReductionOpsType &RdxOps : ReductionOps)
	for (Value *RdxOp : RdxOps)
	V.analyzedReductionRoot(cast<Instruction>(RdxOp));
	return nullptr;
	}

	IRBuilder<TargetFolder> Builder(ReductionRoot->getContext(),
	TargetFolder(DL));
	Builder.SetInsertPoint(cast<Instruction>(ReductionRoot));

	// Track the reduced values in case if they are replaced by extractelement
	// because of the vectorization.
	DenseMap<Value *, WeakTrackingVH> TrackedVals(
	ReducedVals.size() * ReducedVals.front().size() + ExtraArgs.size());
	BoUpSLP::ExtraValueToDebugLocsMap ExternallyUsedValues;
	SmallVector<std::pair<Value , Value >> ReplacedExternals;
	ExternallyUsedValues.reserve(ExtraArgs.size() + 1);
	// The same extra argument may be used several times, so log each attempt
	// to use it.
	for (const std::pair<Instruction , Value > &Pair : ExtraArgs) {
	assert(Pair.first && "DebugLoc must be set.");
	ExternallyUsedValues[Pair.second].push_back(Pair.first);
	TrackedVals.try_emplace(Pair.second, Pair.second);
	}

	// The compare instruction of a min/max is the insertion point for new
	// instructions and may be replaced with a new compare instruction.
	auto &&GetCmpForMinMaxReduction = [](Instruction *RdxRootInst) {
	assert(isa<SelectInst>(RdxRootInst) &&
	"Expected min/max reduction to have select root instruction");
	Value *ScalarCond = cast<SelectInst>(RdxRootInst)->getCondition();
	assert(isa<Instruction>(ScalarCond) &&
	"Expected min/max reduction to have compare condition");
	return cast<Instruction>(ScalarCond);
	};

	// Return new VectorizedTree, based on previous value.
	auto GetNewVectorizedTree = [&](Value VectorizedTree, Value Res) {
	if (VectorizedTree) {
	// Update the final value in the reduction.
	Builder.SetCurrentDebugLocation(
	cast<Instruction>(ReductionOps.front().front())->getDebugLoc());
	if ((isa<PoisonValue>(VectorizedTree) && !isa<PoisonValue>(Res)) \|\|
	(isGuaranteedNotToBePoison(Res) &&
	!isGuaranteedNotToBePoison(VectorizedTree))) {
	auto It = ReducedValsToOps.find(Res);
	if (It != ReducedValsToOps.end() &&
	any_of(It->getSecond(),
	[](Instruction *I) { return isBoolLogicOp(I); }))
	std::swap(VectorizedTree, Res);
	}

	return createOp(Builder, RdxKind, VectorizedTree, Res, "op.rdx",
	ReductionOps);
	}
	// Initialize the final value in the reduction.
	return Res;
	};
	bool AnyBoolLogicOp =
	any_of(ReductionOps.back(), [](Value *V) {
	return isBoolLogicOp(cast<Instruction>(V));
	});
	// The reduction root is used as the insertion point for new instructions,
	// so set it as externally used to prevent it from being deleted.
	ExternallyUsedValues[ReductionRoot];
	SmallDenseSet<Value > IgnoreList(ReductionOps.size()
	ReductionOps.front().size());
	for (ReductionOpsType &RdxOps : ReductionOps)
	for (Value *RdxOp : RdxOps) {
	if (!RdxOp)
	continue;
	IgnoreList.insert(RdxOp);
	}
	// Intersect the fast-math-flags from all reduction operations.
	FastMathFlags RdxFMF;
	RdxFMF.set();
	for (Value *U : IgnoreList)
	if (auto *FPMO = dyn_cast<FPMathOperator>(U))
	RdxFMF &= FPMO->getFastMathFlags();
	bool IsCmpSelMinMax = isCmpSelMinMax(cast<Instruction>(ReductionRoot));

	// Need to track reduced vals, they may be changed during vectorization of
	// subvectors.
	for (ArrayRef<Value *> Candidates : ReducedVals)
	for (Value *V : Candidates)
	TrackedVals.try_emplace(V, V);

	DenseMap<Value *, unsigned> VectorizedVals(ReducedVals.size());
	// List of the values that were reduced in other trees as part of gather
	// nodes and thus requiring extract if fully vectorized in other trees.
	SmallPtrSet<Value *, 4> RequiredExtract;
	Value *VectorizedTree = nullptr;
	bool CheckForReusedReductionOps = false;
	// Try to vectorize elements based on their type.
	SmallVector<InstructionsState> States;
	for (ArrayRef<Value *> RV : ReducedVals)
	States.push_back(getSameOpcode(RV, TLI));
	for (unsigned I = 0, E = ReducedVals.size(); I < E; ++I) {
	ArrayRef<Value *> OrigReducedVals = ReducedVals[I];
	InstructionsState S = States[I];
	SmallVector<Value *> Candidates;
	Candidates.reserve(2 * OrigReducedVals.size());
	DenseMap<Value , Value > TrackedToOrig(2 * OrigReducedVals.size());
	for (unsigned Cnt = 0, Sz = OrigReducedVals.size(); Cnt < Sz; ++Cnt) {
	Value *RdxVal = TrackedVals.find(OrigReducedVals[Cnt])->second;
	// Check if the reduction value was not overriden by the extractelement
	// instruction because of the vectorization and exclude it, if it is not
	// compatible with other values.
	// Also check if the instruction was folded to constant/other value.
	auto *Inst = dyn_cast<Instruction>(RdxVal);
	if ((Inst && isVectorLikeInstWithConstOps(Inst) &&
	(!S.getOpcode() \|\| !S.isOpcodeOrAlt(Inst))) \|\|
	(S.getOpcode() && !Inst))
	continue;
	Candidates.push_back(RdxVal);
	TrackedToOrig.try_emplace(RdxVal, OrigReducedVals[Cnt]);
	}
	bool ShuffledExtracts = false;
	// Try to handle shuffled extractelements.
	if (S.getOpcode() == Instruction::ExtractElement && !S.isAltShuffle() &&
	I + 1 < E) {
	InstructionsState NextS = getSameOpcode(ReducedVals[I + 1], TLI);
	if (NextS.getOpcode() == Instruction::ExtractElement &&
	!NextS.isAltShuffle()) {
	SmallVector<Value *> CommonCandidates(Candidates);
	for (Value *RV : ReducedVals[I + 1]) {
	Value *RdxVal = TrackedVals.find(RV)->second;
	// Check if the reduction value was not overriden by the
	// extractelement instruction because of the vectorization and
	// exclude it, if it is not compatible with other values.
	if (auto *Inst = dyn_cast<Instruction>(RdxVal))
	if (!NextS.getOpcode() \|\| !NextS.isOpcodeOrAlt(Inst))
	continue;
	CommonCandidates.push_back(RdxVal);
	TrackedToOrig.try_emplace(RdxVal, RV);
	}
	SmallVector<int> Mask;
	if (isFixedVectorShuffle(CommonCandidates, Mask)) {
	++I;
	Candidates.swap(CommonCandidates);
	ShuffledExtracts = true;
	}
	}
	}

	// Emit code for constant values.
	if (AllowHorRdxIdenityOptimization && Candidates.size() > 1 &&
	allConstant(Candidates)) {
	Value *Res = Candidates.front();
	++VectorizedVals.try_emplace(Candidates.front(), 0).first->getSecond();
	for (Value *VC : ArrayRef(Candidates).drop_front()) {
	Res = createOp(Builder, RdxKind, Res, VC, "const.rdx", ReductionOps);
	++VectorizedVals.try_emplace(VC, 0).first->getSecond();
	if (auto *ResI = dyn_cast<Instruction>(Res))
	V.analyzedReductionRoot(ResI);
	}
	VectorizedTree = GetNewVectorizedTree(VectorizedTree, Res);
	continue;
	}

	unsigned NumReducedVals = Candidates.size();
	if (NumReducedVals < ReductionLimit &&
	(NumReducedVals < 2 \|\| !AllowHorRdxIdenityOptimization \|\|
	!isSplat(Candidates)))
	continue;

	// Check if we support repeated scalar values processing (optimization of
	// original scalar identity operations on matched horizontal reductions).
	IsSupportedHorRdxIdentityOp =
	AllowHorRdxIdenityOptimization && RdxKind != RecurKind::Mul &&
	RdxKind != RecurKind::FMul && RdxKind != RecurKind::FMulAdd;
	// Gather same values.
	MapVector<Value *, unsigned> SameValuesCounter;
	if (IsSupportedHorRdxIdentityOp)
	for (Value *V : Candidates)
	++SameValuesCounter.insert(std::make_pair(V, 0)).first->second;
	// Used to check if the reduced values used same number of times. In this
	// case the compiler may produce better code. E.g. if reduced values are
	// aabbccdd (8 x values), then the first node of the tree will have a node
	// for 4 x abcd + shuffle <4 x abcd>, <0, 0, 1, 1, 2, 2, 3, 3>.
	// Plus, the final reduction will be performed on <8 x aabbccdd>.
	// Instead compiler may build <4 x abcd> tree immediately, + reduction (4
	// x abcd) * 2.
	// Currently it only handles add/fadd/xor. and/or/min/max do not require
	// this analysis, other operations may require an extra estimation of
	// the profitability.
	bool SameScaleFactor = false;
	bool OptReusedScalars = IsSupportedHorRdxIdentityOp &&
	SameValuesCounter.size() != Candidates.size();
	if (OptReusedScalars) {
	SameScaleFactor =
	(RdxKind == RecurKind::Add \|\| RdxKind == RecurKind::FAdd \|\|
	RdxKind == RecurKind::Xor) &&
	all_of(drop_begin(SameValuesCounter),
	[&SameValuesCounter](const std::pair<Value *, unsigned> &P) {
	return P.second == SameValuesCounter.front().second;
	});
	Candidates.resize(SameValuesCounter.size());
	transform(SameValuesCounter, Candidates.begin(),
	[](const auto &P) { return P.first; });
	NumReducedVals = Candidates.size();
	// Have a reduction of the same element.
	if (NumReducedVals == 1) {
	Value *OrigV = TrackedToOrig.find(Candidates.front())->second;
	unsigned Cnt = SameValuesCounter.lookup(OrigV);
	Value *RedVal =
	emitScaleForReusedOps(Candidates.front(), Builder, Cnt);
	VectorizedTree = GetNewVectorizedTree(VectorizedTree, RedVal);
	VectorizedVals.try_emplace(OrigV, Cnt);
	continue;
	}
	}

	unsigned MaxVecRegSize = V.getMaxVecRegSize();
	unsigned EltSize = V.getVectorElementSize(Candidates[0]);
	unsigned MaxElts =
	RegMaxNumber * llvm::bit_floor(MaxVecRegSize / EltSize);

	unsigned ReduxWidth = std::min<unsigned>(
	llvm::bit_floor(NumReducedVals),
	std::clamp<unsigned>(MaxElts, RedValsMaxNumber,
	RegMaxNumber * RedValsMaxNumber));
	unsigned Start = 0;
	unsigned Pos = Start;
	// Restarts vectorization attempt with lower vector factor.
	unsigned PrevReduxWidth = ReduxWidth;
	bool CheckForReusedReductionOpsLocal = false;
	auto &&AdjustReducedVals = [&Pos, &Start, &ReduxWidth, NumReducedVals,
	&CheckForReusedReductionOpsLocal,
	&PrevReduxWidth, &V,
	&IgnoreList](bool IgnoreVL = false) {
	bool IsAnyRedOpGathered = !IgnoreVL && V.isAnyGathered(IgnoreList);
	if (!CheckForReusedReductionOpsLocal && PrevReduxWidth == ReduxWidth) {
	// Check if any of the reduction ops are gathered. If so, worth
	// trying again with less number of reduction ops.
	CheckForReusedReductionOpsLocal \|= IsAnyRedOpGathered;
	}
	++Pos;
	if (Pos < NumReducedVals - ReduxWidth + 1)
	return IsAnyRedOpGathered;
	Pos = Start;
	ReduxWidth /= 2;
	return IsAnyRedOpGathered;
	};
	bool AnyVectorized = false;
	while (Pos < NumReducedVals - ReduxWidth + 1 &&
	ReduxWidth >= ReductionLimit) {
	// Dependency in tree of the reduction ops - drop this attempt, try
	// later.
	if (CheckForReusedReductionOpsLocal && PrevReduxWidth != ReduxWidth &&
	Start == 0) {
	CheckForReusedReductionOps = true;
	break;
	}
	PrevReduxWidth = ReduxWidth;
	ArrayRef<Value *> VL(std::next(Candidates.begin(), Pos), ReduxWidth);
	// Beeing analyzed already - skip.
	if (V.areAnalyzedReductionVals(VL)) {
	(void)AdjustReducedVals(/IgnoreVL=/true);
	continue;
	}
	// Early exit if any of the reduction values were deleted during
	// previous vectorization attempts.
	if (any_of(VL, [&V](Value *RedVal) {
	auto *RedValI = dyn_cast<Instruction>(RedVal);
	if (!RedValI)
	return false;
	return V.isDeleted(RedValI);
	}))
	break;
	V.buildTree(VL, IgnoreList);
	if (V.isTreeTinyAndNotFullyVectorizable(/ForReduction=/true)) {
	if (!AdjustReducedVals())
	V.analyzedReductionVals(VL);
	continue;
	}
	if (V.isLoadCombineReductionCandidate(RdxKind)) {
	if (!AdjustReducedVals())
	V.analyzedReductionVals(VL);
	continue;
	}
	V.reorderTopToBottom();
	// No need to reorder the root node at all.
	V.reorderBottomToTop(/IgnoreReorder=/true);
	// Keep extracted other reduction values, if they are used in the
	// vectorization trees.
	BoUpSLP::ExtraValueToDebugLocsMap LocalExternallyUsedValues(
	ExternallyUsedValues);
	for (unsigned Cnt = 0, Sz = ReducedVals.size(); Cnt < Sz; ++Cnt) {
	if (Cnt == I \|\| (ShuffledExtracts && Cnt == I - 1))
	continue;
	for (Value *V : ReducedVals[Cnt])
	if (isa<Instruction>(V))
	LocalExternallyUsedValues[TrackedVals[V]];
	}
	if (!IsSupportedHorRdxIdentityOp) {
	// Number of uses of the candidates in the vector of values.
	assert(SameValuesCounter.empty() &&
	"Reused values counter map is not empty");
	for (unsigned Cnt = 0; Cnt < NumReducedVals; ++Cnt) {
	if (Cnt >= Pos && Cnt < Pos + ReduxWidth)
	continue;
	Value *V = Candidates[Cnt];
	Value *OrigV = TrackedToOrig.find(V)->second;
	++SameValuesCounter[OrigV];
	}
	}
	SmallPtrSet<Value *, 4> VLScalars(VL.begin(), VL.end());
	// Gather externally used values.
	SmallPtrSet<Value *, 4> Visited;
	for (unsigned Cnt = 0; Cnt < NumReducedVals; ++Cnt) {
	if (Cnt >= Pos && Cnt < Pos + ReduxWidth)
	continue;
	Value *RdxVal = Candidates[Cnt];
	if (!Visited.insert(RdxVal).second)
	continue;
	// Check if the scalar was vectorized as part of the vectorization
	// tree but not the top node.
	if (!VLScalars.contains(RdxVal) && V.isVectorized(RdxVal)) {
	LocalExternallyUsedValues[RdxVal];
	continue;
	}
	Value *OrigV = TrackedToOrig.find(RdxVal)->second;
	unsigned NumOps =
	VectorizedVals.lookup(RdxVal) + SameValuesCounter[OrigV];
	if (NumOps != ReducedValsToOps.find(OrigV)->second.size())
	LocalExternallyUsedValues[RdxVal];
	}
	// Do not need the list of reused scalars in regular mode anymore.
	if (!IsSupportedHorRdxIdentityOp)
	SameValuesCounter.clear();
	for (Value *RdxVal : VL)
	if (RequiredExtract.contains(RdxVal))
	LocalExternallyUsedValues[RdxVal];
	// Update LocalExternallyUsedValues for the scalar, replaced by
	// extractelement instructions.
	DenseMap<Value , Value > ReplacementToExternal;
	for (const std::pair<Value , Value > &Pair : ReplacedExternals)
	ReplacementToExternal.try_emplace(Pair.second, Pair.first);
	for (const std::pair<Value , Value > &Pair : ReplacedExternals) {
	Value *Ext = Pair.first;
	auto RIt = ReplacementToExternal.find(Ext);
	while (RIt != ReplacementToExternal.end()) {
	Ext = RIt->second;
	RIt = ReplacementToExternal.find(Ext);
	}
	auto *It = ExternallyUsedValues.find(Ext);
	if (It == ExternallyUsedValues.end())
	continue;
	LocalExternallyUsedValues[Pair.second].append(It->second);
	}
	V.buildExternalUses(LocalExternallyUsedValues);

	V.computeMinimumValueSizes();
	V.transformNodes();

	// Estimate cost.
	InstructionCost TreeCost = V.getTreeCost(VL);
	InstructionCost ReductionCost =
	getReductionCost(TTI, VL, IsCmpSelMinMax, ReduxWidth, RdxFMF);
	InstructionCost Cost = TreeCost + ReductionCost;
	LLVM_DEBUG(dbgs() << "SLP: Found cost = " << Cost
	<< " for reduction\n");
	if (!Cost.isValid())
	break;
	if (Cost >= -SLPCostThreshold) {
	V.getORE()->emit([&]() {
	return OptimizationRemarkMissed(
	SV_NAME, "HorSLPNotBeneficial",
	ReducedValsToOps.find(VL[0])->second.front())
	<< "Vectorizing horizontal reduction is possible "
	<< "but not beneficial with cost " << ore::NV("Cost", Cost)
	<< " and threshold "
	<< ore::NV("Threshold", -SLPCostThreshold);
	});
	if (!AdjustReducedVals())
	V.analyzedReductionVals(VL);
	continue;
	}

	LLVM_DEBUG(dbgs() << "SLP: Vectorizing horizontal reduction at cost:"
	<< Cost << ". (HorRdx)\n");
	V.getORE()->emit([&]() {
	return OptimizationRemark(
	SV_NAME, "VectorizedHorizontalReduction",
	ReducedValsToOps.find(VL[0])->second.front())
	<< "Vectorized horizontal reduction with cost "
	<< ore::NV("Cost", Cost) << " and with tree size "
	<< ore::NV("TreeSize", V.getTreeSize());
	});

	Builder.setFastMathFlags(RdxFMF);

	// Emit a reduction. If the root is a select (min/max idiom), the insert
	// point is the compare condition of that select.
	Instruction *RdxRootInst = cast<Instruction>(ReductionRoot);
	Instruction *InsertPt = RdxRootInst;
	if (IsCmpSelMinMax)
	InsertPt = GetCmpForMinMaxReduction(RdxRootInst);

	// Vectorize a tree.
	Value *VectorizedRoot = V.vectorizeTree(LocalExternallyUsedValues,
	ReplacedExternals, InsertPt);

	Builder.SetInsertPoint(InsertPt);

	// To prevent poison from leaking across what used to be sequential,
	// safe, scalar boolean logic operations, the reduction operand must be
	// frozen.
	if ((isBoolLogicOp(RdxRootInst) \|\|
	(AnyBoolLogicOp && VL.size() != TrackedVals.size())) &&
	!isGuaranteedNotToBePoison(VectorizedRoot))
	VectorizedRoot = Builder.CreateFreeze(VectorizedRoot);

	// Emit code to correctly handle reused reduced values, if required.
	if (OptReusedScalars && !SameScaleFactor) {
	VectorizedRoot = emitReusedOps(VectorizedRoot, Builder, V,
	SameValuesCounter, TrackedToOrig);
	}

	Value *ReducedSubTree =
	emitReduction(VectorizedRoot, Builder, ReduxWidth, TTI);
	if (ReducedSubTree->getType() != VL.front()->getType()) {
	assert(ReducedSubTree->getType() != VL.front()->getType() &&
	"Expected different reduction type.");
	ReducedSubTree =
	Builder.CreateIntCast(ReducedSubTree, VL.front()->getType(),
	V.isSignedMinBitwidthRootNode());
	}

	// Improved analysis for add/fadd/xor reductions with same scale factor
	// for all operands of reductions. We can emit scalar ops for them
	// instead.
	if (OptReusedScalars && SameScaleFactor)
	ReducedSubTree = emitScaleForReusedOps(
	ReducedSubTree, Builder, SameValuesCounter.front().second);

	VectorizedTree = GetNewVectorizedTree(VectorizedTree, ReducedSubTree);
	// Count vectorized reduced values to exclude them from final reduction.
	for (Value *RdxVal : VL) {
	Value *OrigV = TrackedToOrig.find(RdxVal)->second;
	if (IsSupportedHorRdxIdentityOp) {
	VectorizedVals.try_emplace(OrigV, SameValuesCounter[RdxVal]);
	continue;
	}
	++VectorizedVals.try_emplace(OrigV, 0).first->getSecond();
	if (!V.isVectorized(RdxVal))
	RequiredExtract.insert(RdxVal);
	}
	Pos += ReduxWidth;
	Start = Pos;
	ReduxWidth = llvm::bit_floor(NumReducedVals - Pos);
	AnyVectorized = true;
	}
	if (OptReusedScalars && !AnyVectorized) {
	for (const std::pair<Value *, unsigned> &P : SameValuesCounter) {
	Value *RedVal = emitScaleForReusedOps(P.first, Builder, P.second);
	VectorizedTree = GetNewVectorizedTree(VectorizedTree, RedVal);
	Value *OrigV = TrackedToOrig.find(P.first)->second;
	VectorizedVals.try_emplace(OrigV, P.second);
	}
	continue;
	}
	}
	if (VectorizedTree) {
	// Reorder operands of bool logical op in the natural order to avoid
	// possible problem with poison propagation. If not possible to reorder
	// (both operands are originally RHS), emit an extra freeze instruction
	// for the LHS operand.
	// I.e., if we have original code like this:
	// RedOp1 = select i1 ?, i1 LHS, i1 false
	// RedOp2 = select i1 RHS, i1 ?, i1 false

	// Then, we swap LHS/RHS to create a new op that matches the poison
	// semantics of the original code.

	// If we have original code like this and both values could be poison:
	// RedOp1 = select i1 ?, i1 LHS, i1 false
	// RedOp2 = select i1 ?, i1 RHS, i1 false

	// Then, we must freeze LHS in the new op.
	auto FixBoolLogicalOps = [&, VectorizedTree](Value &LHS, Value &RHS,
	Instruction *RedOp1,
	Instruction *RedOp2,
	bool InitStep) {
	if (!AnyBoolLogicOp)
	return;
	if (isBoolLogicOp(RedOp1) &&
	((!InitStep && LHS == VectorizedTree) \|\|
	getRdxOperand(RedOp1, 0) == LHS \|\| isGuaranteedNotToBePoison(LHS)))
	return;
	if (isBoolLogicOp(RedOp2) && ((!InitStep && RHS == VectorizedTree) \|\|
	getRdxOperand(RedOp2, 0) == RHS \|\|
	isGuaranteedNotToBePoison(RHS))) {
	std::swap(LHS, RHS);
	return;
	}
	if (LHS != VectorizedTree)
	LHS = Builder.CreateFreeze(LHS);
	};
	// Finish the reduction.
	// Need to add extra arguments and not vectorized possible reduction
	// values.
	// Try to avoid dependencies between the scalar remainders after
	// reductions.
	auto FinalGen =
	[&](ArrayRef<std::pair<Instruction , Value >> InstVals,
	bool InitStep) {
	unsigned Sz = InstVals.size();
	SmallVector<std::pair<Instruction , Value >> ExtraReds(Sz / 2 +
	Sz % 2);
	for (unsigned I = 0, E = (Sz / 2) * 2; I < E; I += 2) {
	Instruction *RedOp = InstVals[I + 1].first;
	Builder.SetCurrentDebugLocation(RedOp->getDebugLoc());
	Value *RdxVal1 = InstVals[I].second;
	Value *StableRdxVal1 = RdxVal1;
	auto It1 = TrackedVals.find(RdxVal1);
	if (It1 != TrackedVals.end())
	StableRdxVal1 = It1->second;
	Value *RdxVal2 = InstVals[I + 1].second;
	Value *StableRdxVal2 = RdxVal2;
	auto It2 = TrackedVals.find(RdxVal2);
	if (It2 != TrackedVals.end())
	StableRdxVal2 = It2->second;
	// To prevent poison from leaking across what used to be
	// sequential, safe, scalar boolean logic operations, the
	// reduction operand must be frozen.
	FixBoolLogicalOps(StableRdxVal1, StableRdxVal2, InstVals[I].first,
	RedOp, InitStep);
	Value *ExtraRed = createOp(Builder, RdxKind, StableRdxVal1,
	StableRdxVal2, "op.rdx", ReductionOps);
	ExtraReds[I / 2] = std::make_pair(InstVals[I].first, ExtraRed);
	}
	if (Sz % 2 == 1)
	ExtraReds[Sz / 2] = InstVals.back();
	return ExtraReds;
	};
	SmallVector<std::pair<Instruction , Value >> ExtraReductions;
	ExtraReductions.emplace_back(cast<Instruction>(ReductionRoot),
	VectorizedTree);
	SmallPtrSet<Value *, 8> Visited;
	for (ArrayRef<Value *> Candidates : ReducedVals) {
	for (Value *RdxVal : Candidates) {
	if (!Visited.insert(RdxVal).second)
	continue;
	unsigned NumOps = VectorizedVals.lookup(RdxVal);
	for (Instruction *RedOp :
	ArrayRef(ReducedValsToOps.find(RdxVal)->second)
	.drop_back(NumOps))
	ExtraReductions.emplace_back(RedOp, RdxVal);
	}
	}
	for (auto &Pair : ExternallyUsedValues) {
	// Add each externally used value to the final reduction.
	for (auto *I : Pair.second)
	ExtraReductions.emplace_back(I, Pair.first);
	}
	// Iterate through all not-vectorized reduction values/extra arguments.
	bool InitStep = true;
	while (ExtraReductions.size() > 1) {
	SmallVector<std::pair<Instruction , Value >> NewReds =
	FinalGen(ExtraReductions, InitStep);
	ExtraReductions.swap(NewReds);
	InitStep = false;
	}
	VectorizedTree = ExtraReductions.front().second;

	ReductionRoot->replaceAllUsesWith(VectorizedTree);

	// The original scalar reduction is expected to have no remaining
	// uses outside the reduction tree itself. Assert that we got this
	// correct, replace internal uses with undef, and mark for eventual
	// deletion.
	#ifndef NDEBUG
	SmallSet<Value *, 4> IgnoreSet;
	for (ArrayRef<Value *> RdxOps : ReductionOps)
	IgnoreSet.insert(RdxOps.begin(), RdxOps.end());
	#endif
	for (ArrayRef<Value *> RdxOps : ReductionOps) {
	for (Value *Ignore : RdxOps) {
	if (!Ignore)
	continue;
	#ifndef NDEBUG
	for (auto *U : Ignore->users()) {
	assert(IgnoreSet.count(U) &&
	"All users must be either in the reduction ops list.");
	}
	#endif
	if (!Ignore->use_empty()) {
	Value *P = PoisonValue::get(Ignore->getType());
	Ignore->replaceAllUsesWith(P);
	}
	}
	V.removeInstructionsAndOperands(RdxOps);
	}
	} else if (!CheckForReusedReductionOps) {
	for (ReductionOpsType &RdxOps : ReductionOps)
	for (Value *RdxOp : RdxOps)
	V.analyzedReductionRoot(cast<Instruction>(RdxOp));
	}
	return VectorizedTree;
	}

	private:
	/// Calculate the cost of a reduction.
	InstructionCost getReductionCost(TargetTransformInfo *TTI,
	ArrayRef<Value *> ReducedVals,
	bool IsCmpSelMinMax, unsigned ReduxWidth,
	FastMathFlags FMF) {
	TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
	Type *ScalarTy = ReducedVals.front()->getType();
	FixedVectorType *VectorTy = getWidenedType(ScalarTy, ReduxWidth);
	InstructionCost VectorCost = 0, ScalarCost;
	// If all of the reduced values are constant, the vector cost is 0, since
	// the reduction value can be calculated at the compile time.
	bool AllConsts = allConstant(ReducedVals);
	auto EvaluateScalarCost = [&](function_ref<InstructionCost()> GenCostFn) {
	InstructionCost Cost = 0;
	// Scalar cost is repeated for N-1 elements.
	int Cnt = ReducedVals.size();
	for (Value *RdxVal : ReducedVals) {
	if (Cnt == 1)
	break;
	--Cnt;
	if (RdxVal->hasNUsesOrMore(IsCmpSelMinMax ? 3 : 2)) {
	Cost += GenCostFn();
	continue;
	}
	InstructionCost ScalarCost = 0;
	for (User *U : RdxVal->users()) {
	auto *RdxOp = cast<Instruction>(U);
	if (hasRequiredNumberOfUses(IsCmpSelMinMax, RdxOp)) {
	ScalarCost += TTI->getInstructionCost(RdxOp, CostKind);
	continue;
	}
	ScalarCost = InstructionCost::getInvalid();
	break;
	}
	if (ScalarCost.isValid())
	Cost += ScalarCost;
	else
	Cost += GenCostFn();
	}
	return Cost;
	};
	switch (RdxKind) {
	case RecurKind::Add:
	case RecurKind::Mul:
	case RecurKind::Or:
	case RecurKind::And:
	case RecurKind::Xor:
	case RecurKind::FAdd:
	case RecurKind::FMul: {
	unsigned RdxOpcode = RecurrenceDescriptor::getOpcode(RdxKind);
	if (!AllConsts)
	VectorCost =
	TTI->getArithmeticReductionCost(RdxOpcode, VectorTy, FMF, CostKind);
	ScalarCost = EvaluateScalarCost([&]() {
	return TTI->getArithmeticInstrCost(RdxOpcode, ScalarTy, CostKind);
	});
	break;
	}
	case RecurKind::FMax:
	case RecurKind::FMin:
	case RecurKind::FMaximum:
	case RecurKind::FMinimum:
	case RecurKind::SMax:
	case RecurKind::SMin:
	case RecurKind::UMax:
	case RecurKind::UMin: {
	Intrinsic::ID Id = getMinMaxReductionIntrinsicOp(RdxKind);
	if (!AllConsts)
	VectorCost = TTI->getMinMaxReductionCost(Id, VectorTy, FMF, CostKind);
	ScalarCost = EvaluateScalarCost([&]() {
	IntrinsicCostAttributes ICA(Id, ScalarTy, {ScalarTy, ScalarTy}, FMF);
	return TTI->getIntrinsicInstrCost(ICA, CostKind);
	});
	break;
	}
	default:
	llvm_unreachable("Expected arithmetic or min/max reduction operation");
	}

	LLVM_DEBUG(dbgs() << "SLP: Adding cost " << VectorCost - ScalarCost
	<< " for reduction of " << shortBundleName(ReducedVals)
	<< " (It is a splitting reduction)\n");
	return VectorCost - ScalarCost;
	}

	/// Emit a horizontal reduction of the vectorized value.
	Value emitReduction(Value VectorizedValue, IRBuilderBase &Builder,
	unsigned ReduxWidth, const TargetTransformInfo *TTI) {
	assert(VectorizedValue && "Need to have a vectorized tree node");
	assert(isPowerOf2_32(ReduxWidth) &&
	"We only handle power-of-two reductions for now");
	assert(RdxKind != RecurKind::FMulAdd &&
	"A call to the llvm.fmuladd intrinsic is not handled yet");

	++NumVectorInstructions;
	return createSimpleTargetReduction(Builder, VectorizedValue, RdxKind);
	}

	/// Emits optimized code for unique scalar value reused \p Cnt times.
	Value emitScaleForReusedOps(Value VectorizedValue, IRBuilderBase &Builder,
	unsigned Cnt) {
	assert(IsSupportedHorRdxIdentityOp &&
	"The optimization of matched scalar identity horizontal reductions "
	"must be supported.");
	switch (RdxKind) {
	case RecurKind::Add: {
	// res = mul vv, n
	Value *Scale = ConstantInt::get(VectorizedValue->getType(), Cnt);
	LLVM_DEBUG(dbgs() << "SLP: Add (to-mul) " << Cnt << "of "
	<< VectorizedValue << ". (HorRdx)\n");
	return Builder.CreateMul(VectorizedValue, Scale);
	}
	case RecurKind::Xor: {
	// res = n % 2 ? 0 : vv
	LLVM_DEBUG(dbgs() << "SLP: Xor " << Cnt << "of " << VectorizedValue
	<< ". (HorRdx)\n");
	if (Cnt % 2 == 0)
	return Constant::getNullValue(VectorizedValue->getType());
	return VectorizedValue;
	}
	case RecurKind::FAdd: {
	// res = fmul v, n
	Value *Scale = ConstantFP::get(VectorizedValue->getType(), Cnt);
	LLVM_DEBUG(dbgs() << "SLP: FAdd (to-fmul) " << Cnt << "of "
	<< VectorizedValue << ". (HorRdx)\n");
	return Builder.CreateFMul(VectorizedValue, Scale);
	}
	case RecurKind::And:
	case RecurKind::Or:
	case RecurKind::SMax:
	case RecurKind::SMin:
	case RecurKind::UMax:
	case RecurKind::UMin:
	case RecurKind::FMax:
	case RecurKind::FMin:
	case RecurKind::FMaximum:
	case RecurKind::FMinimum:
	// res = vv
	return VectorizedValue;
	case RecurKind::Mul:
	case RecurKind::FMul:
	case RecurKind::FMulAdd:
	case RecurKind::IAnyOf:
	case RecurKind::FAnyOf:
	case RecurKind::None:
	llvm_unreachable("Unexpected reduction kind for repeated scalar.");
	}
	return nullptr;
	}

	/// Emits actual operation for the scalar identity values, found during
	/// horizontal reduction analysis.
	Value emitReusedOps(Value VectorizedValue, IRBuilderBase &Builder,
	BoUpSLP &R,
	const MapVector<Value *, unsigned> &SameValuesCounter,
	const DenseMap<Value , Value > &TrackedToOrig) {
	assert(IsSupportedHorRdxIdentityOp &&
	"The optimization of matched scalar identity horizontal reductions "
	"must be supported.");
	ArrayRef<Value *> VL = R.getRootNodeScalars();
	auto *VTy = cast<FixedVectorType>(VectorizedValue->getType());
	if (VTy->getElementType() != VL.front()->getType()) {
	VectorizedValue = Builder.CreateIntCast(
	VectorizedValue,
	getWidenedType(VL.front()->getType(), VTy->getNumElements()),
	R.isSignedMinBitwidthRootNode());
	}
	switch (RdxKind) {
	case RecurKind::Add: {
	// root = mul prev_root, <1, 1, n, 1>
	SmallVector<Constant *> Vals;
	for (Value *V : VL) {
	unsigned Cnt = SameValuesCounter.lookup(TrackedToOrig.find(V)->second);
	Vals.push_back(ConstantInt::get(V->getType(), Cnt, /IsSigned=/false));
	}
	auto *Scale = ConstantVector::get(Vals);
	LLVM_DEBUG(dbgs() << "SLP: Add (to-mul) " << Scale << "of "
	<< VectorizedValue << ". (HorRdx)\n");
	return Builder.CreateMul(VectorizedValue, Scale);
	}
	case RecurKind::And:
	case RecurKind::Or:
	// No need for multiple or/and(s).
	LLVM_DEBUG(dbgs() << "SLP: And/or of same " << VectorizedValue
	<< ". (HorRdx)\n");
	return VectorizedValue;
	case RecurKind::SMax:
	case RecurKind::SMin:
	case RecurKind::UMax:
	case RecurKind::UMin:
	case RecurKind::FMax:
	case RecurKind::FMin:
	case RecurKind::FMaximum:
	case RecurKind::FMinimum:
	// No need for multiple min/max(s) of the same value.
	LLVM_DEBUG(dbgs() << "SLP: Max/min of same " << VectorizedValue
	<< ". (HorRdx)\n");
	return VectorizedValue;
	case RecurKind::Xor: {
	// Replace values with even number of repeats with 0, since
	// x xor x = 0.
	// root = shuffle prev_root, zeroinitalizer, <0, 1, 2, vf, 4, vf, 5, 6,
	// 7>, if elements 4th and 6th elements have even number of repeats.
	SmallVector<int> Mask(
	cast<FixedVectorType>(VectorizedValue->getType())->getNumElements(),
	PoisonMaskElem);
	std::iota(Mask.begin(), Mask.end(), 0);
	bool NeedShuffle = false;
	for (unsigned I = 0, VF = VL.size(); I < VF; ++I) {
	Value *V = VL[I];
	unsigned Cnt = SameValuesCounter.lookup(TrackedToOrig.find(V)->second);
	if (Cnt % 2 == 0) {
	Mask[I] = VF;
	NeedShuffle = true;
	}
	}
	LLVM_DEBUG(dbgs() << "SLP: Xor <"; for (int I
	: Mask) dbgs()
	<< I << " ";
	dbgs() << "> of " << VectorizedValue << ". (HorRdx)\n");
	if (NeedShuffle)
	VectorizedValue = Builder.CreateShuffleVector(
	VectorizedValue,
	ConstantVector::getNullValue(VectorizedValue->getType()), Mask);
	return VectorizedValue;
	}
	case RecurKind::FAdd: {
	// root = fmul prev_root, <1.0, 1.0, n.0, 1.0>
	SmallVector<Constant *> Vals;
	for (Value *V : VL) {
	unsigned Cnt = SameValuesCounter.lookup(TrackedToOrig.find(V)->second);
	Vals.push_back(ConstantFP::get(V->getType(), Cnt));
	}
	auto *Scale = ConstantVector::get(Vals);
	return Builder.CreateFMul(VectorizedValue, Scale);
	}
	case RecurKind::Mul:
	case RecurKind::FMul:
	case RecurKind::FMulAdd:
	case RecurKind::IAnyOf:
	case RecurKind::FAnyOf:
	case RecurKind::None:
	llvm_unreachable("Unexpected reduction kind for reused scalars.");
	}
	return nullptr;
	}
	};
	} // end anonymous namespace

	/// Gets recurrence kind from the specified value.
	static RecurKind getRdxKind(Value *V) {
	return HorizontalReduction::getRdxKind(V);
	}
	static std::optional<unsigned> getAggregateSize(Instruction *InsertInst) {
	if (auto *IE = dyn_cast<InsertElementInst>(InsertInst))
	return cast<FixedVectorType>(IE->getType())->getNumElements();

	unsigned AggregateSize = 1;
	auto *IV = cast<InsertValueInst>(InsertInst);
	Type *CurrentType = IV->getType();
	do {
	if (auto *ST = dyn_cast<StructType>(CurrentType)) {
	for (auto *Elt : ST->elements())
	if (Elt != ST->getElementType(0)) // check homogeneity
	return std::nullopt;
	AggregateSize *= ST->getNumElements();
	CurrentType = ST->getElementType(0);
	} else if (auto *AT = dyn_cast<ArrayType>(CurrentType)) {
	AggregateSize *= AT->getNumElements();
	CurrentType = AT->getElementType();
	} else if (auto *VT = dyn_cast<FixedVectorType>(CurrentType)) {
	AggregateSize *= VT->getNumElements();
	return AggregateSize;
	} else if (CurrentType->isSingleValueType()) {
	return AggregateSize;
	} else {
	return std::nullopt;
	}
	} while (true);
	}

	static void findBuildAggregate_rec(Instruction *LastInsertInst,
	TargetTransformInfo *TTI,
	SmallVectorImpl<Value *> &BuildVectorOpds,
	SmallVectorImpl<Value *> &InsertElts,
	unsigned OperandOffset) {
	do {
	Value *InsertedOperand = LastInsertInst->getOperand(1);
	std::optional<unsigned> OperandIndex =
	getElementIndex(LastInsertInst, OperandOffset);
	if (!OperandIndex)
	return;
	if (isa<InsertElementInst, InsertValueInst>(InsertedOperand)) {
	findBuildAggregate_rec(cast<Instruction>(InsertedOperand), TTI,
	BuildVectorOpds, InsertElts, *OperandIndex);

	} else {
	BuildVectorOpds[*OperandIndex] = InsertedOperand;
	InsertElts[*OperandIndex] = LastInsertInst;
	}
	LastInsertInst = dyn_cast<Instruction>(LastInsertInst->getOperand(0));
	} while (LastInsertInst != nullptr &&
	isa<InsertValueInst, InsertElementInst>(LastInsertInst) &&
	LastInsertInst->hasOneUse());
	}

	/// Recognize construction of vectors like
	/// %ra = insertelement <4 x float> poison, float %s0, i32 0
	/// %rb = insertelement <4 x float> %ra, float %s1, i32 1
	/// %rc = insertelement <4 x float> %rb, float %s2, i32 2
	/// %rd = insertelement <4 x float> %rc, float %s3, i32 3
	/// starting from the last insertelement or insertvalue instruction.
	///
	/// Also recognize homogeneous aggregates like {<2 x float>, <2 x float>},
	/// {{float, float}, {float, float}}, [2 x {float, float}] and so on.
	/// See llvm/test/Transforms/SLPVectorizer/X86/pr42022.ll for examples.
	///
	/// Assume LastInsertInst is of InsertElementInst or InsertValueInst type.
	///
	/// \return true if it matches.
	static bool findBuildAggregate(Instruction *LastInsertInst,
	TargetTransformInfo *TTI,
	SmallVectorImpl<Value *> &BuildVectorOpds,
	SmallVectorImpl<Value *> &InsertElts) {

	assert((isa<InsertElementInst>(LastInsertInst) \|\|
	isa<InsertValueInst>(LastInsertInst)) &&
	"Expected insertelement or insertvalue instruction!");

	assert((BuildVectorOpds.empty() && InsertElts.empty()) &&
	"Expected empty result vectors!");

	std::optional<unsigned> AggregateSize = getAggregateSize(LastInsertInst);
	if (!AggregateSize)
	return false;
	BuildVectorOpds.resize(*AggregateSize);
	InsertElts.resize(*AggregateSize);

	findBuildAggregate_rec(LastInsertInst, TTI, BuildVectorOpds, InsertElts, 0);
	llvm::erase(BuildVectorOpds, nullptr);
	llvm::erase(InsertElts, nullptr);
	if (BuildVectorOpds.size() >= 2)
	return true;

	return false;
	}

	/// Try and get a reduction instruction from a phi node.
	///
	/// Given a phi node \p P in a block \p ParentBB, consider possible reductions
	/// if they come from either \p ParentBB or a containing loop latch.
	///
	/// \returns A candidate reduction value if possible, or \code nullptr \endcode
	/// if not possible.
	static Instruction getReductionInstr(const DominatorTree DT, PHINode *P,
	BasicBlock ParentBB, LoopInfo LI) {
	// There are situations where the reduction value is not dominated by the
	// reduction phi. Vectorizing such cases has been reported to cause
	// miscompiles. See PR25787.
	auto DominatedReduxValue = [&](Value *R) {
	return isa<Instruction>(R) &&
	DT->dominates(P->getParent(), cast<Instruction>(R)->getParent());
	};

	Instruction *Rdx = nullptr;

	// Return the incoming value if it comes from the same BB as the phi node.
	if (P->getIncomingBlock(0) == ParentBB) {
	Rdx = dyn_cast<Instruction>(P->getIncomingValue(0));
	} else if (P->getIncomingBlock(1) == ParentBB) {
	Rdx = dyn_cast<Instruction>(P->getIncomingValue(1));
	}

	if (Rdx && DominatedReduxValue(Rdx))
	return Rdx;

	// Otherwise, check whether we have a loop latch to look at.
	Loop *BBL = LI->getLoopFor(ParentBB);
	if (!BBL)
	return nullptr;
	BasicBlock *BBLatch = BBL->getLoopLatch();
	if (!BBLatch)
	return nullptr;

	// There is a loop latch, return the incoming value if it comes from
	// that. This reduction pattern occasionally turns up.
	if (P->getIncomingBlock(0) == BBLatch) {
	Rdx = dyn_cast<Instruction>(P->getIncomingValue(0));
	} else if (P->getIncomingBlock(1) == BBLatch) {
	Rdx = dyn_cast<Instruction>(P->getIncomingValue(1));
	}

	if (Rdx && DominatedReduxValue(Rdx))
	return Rdx;

	return nullptr;
	}

	static bool matchRdxBop(Instruction I, Value &V0, Value *&V1) {
	if (match(I, m_BinOp(m_Value(V0), m_Value(V1))))
	return true;
	if (match(I, m_Intrinsic<Intrinsic::maxnum>(m_Value(V0), m_Value(V1))))
	return true;
	if (match(I, m_Intrinsic<Intrinsic::minnum>(m_Value(V0), m_Value(V1))))
	return true;
	if (match(I, m_Intrinsic<Intrinsic::maximum>(m_Value(V0), m_Value(V1))))
	return true;
	if (match(I, m_Intrinsic<Intrinsic::minimum>(m_Value(V0), m_Value(V1))))
	return true;
	if (match(I, m_Intrinsic<Intrinsic::smax>(m_Value(V0), m_Value(V1))))
	return true;
	if (match(I, m_Intrinsic<Intrinsic::smin>(m_Value(V0), m_Value(V1))))
	return true;
	if (match(I, m_Intrinsic<Intrinsic::umax>(m_Value(V0), m_Value(V1))))
	return true;
	if (match(I, m_Intrinsic<Intrinsic::umin>(m_Value(V0), m_Value(V1))))
	return true;
	return false;
	}

	/// We could have an initial reduction that is not an add.
	/// r *= v1 + v2 + v3 + v4
	/// In such a case start looking for a tree rooted in the first '+'.
	/// \Returns the new root if found, which may be nullptr if not an instruction.
	static Instruction tryGetSecondaryReductionRoot(PHINode Phi,
	Instruction *Root) {
	assert((isa<BinaryOperator>(Root) \|\| isa<SelectInst>(Root) \|\|
	isa<IntrinsicInst>(Root)) &&
	"Expected binop, select, or intrinsic for reduction matching");
	Value *LHS =
	Root->getOperand(HorizontalReduction::getFirstOperandIndex(Root));
	Value *RHS =
	Root->getOperand(HorizontalReduction::getFirstOperandIndex(Root) + 1);
	if (LHS == Phi)
	return dyn_cast<Instruction>(RHS);
	if (RHS == Phi)
	return dyn_cast<Instruction>(LHS);
	return nullptr;
	}

	/// \p Returns the first operand of \p I that does not match \p Phi. If
	/// operand is not an instruction it returns nullptr.
	static Instruction getNonPhiOperand(Instruction I, PHINode *Phi) {
	Value *Op0 = nullptr;
	Value *Op1 = nullptr;
	if (!matchRdxBop(I, Op0, Op1))
	return nullptr;
	return dyn_cast<Instruction>(Op0 == Phi ? Op1 : Op0);
	}

	/// \Returns true if \p I is a candidate instruction for reduction vectorization.
	static bool isReductionCandidate(Instruction *I) {
	bool IsSelect = match(I, m_Select(m_Value(), m_Value(), m_Value()));
	Value B0 = nullptr, B1 = nullptr;
	bool IsBinop = matchRdxBop(I, B0, B1);
	return IsBinop \|\| IsSelect;
	}

	bool SLPVectorizerPass::vectorizeHorReduction(
	PHINode P, Instruction Root, BasicBlock BB, BoUpSLP &R, TargetTransformInfo TTI,
	SmallVectorImpl<WeakTrackingVH> &PostponedInsts) {
	if (!ShouldVectorizeHor)
	return false;
	bool TryOperandsAsNewSeeds = P && isa<BinaryOperator>(Root);

	if (Root->getParent() != BB \|\| isa<PHINode>(Root))
	return false;

	// If we can find a secondary reduction root, use that instead.
	auto SelectRoot = [&]() {
	if (TryOperandsAsNewSeeds && isReductionCandidate(Root) &&
	HorizontalReduction::getRdxKind(Root) != RecurKind::None)
	if (Instruction *NewRoot = tryGetSecondaryReductionRoot(P, Root))
	return NewRoot;
	return Root;
	};

	// Start analysis starting from Root instruction. If horizontal reduction is
	// found, try to vectorize it. If it is not a horizontal reduction or
	// vectorization is not possible or not effective, and currently analyzed
	// instruction is a binary operation, try to vectorize the operands, using
	// pre-order DFS traversal order. If the operands were not vectorized, repeat
	// the same procedure considering each operand as a possible root of the
	// horizontal reduction.
	// Interrupt the process if the Root instruction itself was vectorized or all
	// sub-trees not higher that RecursionMaxDepth were analyzed/vectorized.
	// If a horizintal reduction was not matched or vectorized we collect
	// instructions for possible later attempts for vectorization.
	std::queue<std::pair<Instruction *, unsigned>> Stack;
	Stack.emplace(SelectRoot(), 0);
	SmallPtrSet<Value *, 8> VisitedInstrs;
	bool Res = false;
	auto &&TryToReduce = [this, TTI, &R](Instruction Inst) -> Value {
	if (R.isAnalyzedReductionRoot(Inst))
	return nullptr;
	if (!isReductionCandidate(Inst))
	return nullptr;
	HorizontalReduction HorRdx;
	if (!HorRdx.matchAssociativeReduction(R, Inst, SE, DL, *TLI))
	return nullptr;
	return HorRdx.tryToReduce(R, DL, TTI, TLI);
	};
	auto TryAppendToPostponedInsts = [&](Instruction *FutureSeed) {
	if (TryOperandsAsNewSeeds && FutureSeed == Root) {
	FutureSeed = getNonPhiOperand(Root, P);
	if (!FutureSeed)
	return false;
	}
	// Do not collect CmpInst or InsertElementInst/InsertValueInst as their
	// analysis is done separately.
	if (!isa<CmpInst, InsertElementInst, InsertValueInst>(FutureSeed))
	PostponedInsts.push_back(FutureSeed);
	return true;
	};

	while (!Stack.empty()) {
	Instruction *Inst;
	unsigned Level;
	std::tie(Inst, Level) = Stack.front();
	Stack.pop();
	// Do not try to analyze instruction that has already been vectorized.
	// This may happen when we vectorize instruction operands on a previous
	// iteration while stack was populated before that happened.
	if (R.isDeleted(Inst))
	continue;
	if (Value *VectorizedV = TryToReduce(Inst)) {
	Res = true;
	if (auto *I = dyn_cast<Instruction>(VectorizedV)) {
	// Try to find another reduction.
	Stack.emplace(I, Level);
	continue;
	}
	if (R.isDeleted(Inst))
	continue;
	} else {
	// We could not vectorize `Inst` so try to use it as a future seed.
	if (!TryAppendToPostponedInsts(Inst)) {
	assert(Stack.empty() && "Expected empty stack");
	break;
	}
	}

	// Try to vectorize operands.
	// Continue analysis for the instruction from the same basic block only to
	// save compile time.
	if (++Level < RecursionMaxDepth)
	for (auto *Op : Inst->operand_values())
	if (VisitedInstrs.insert(Op).second)
	if (auto *I = dyn_cast<Instruction>(Op))
	// Do not try to vectorize CmpInst operands, this is done
	// separately.
	if (!isa<PHINode, CmpInst, InsertElementInst, InsertValueInst>(I) &&
	!R.isDeleted(I) && I->getParent() == BB)
	Stack.emplace(I, Level);
	}
	return Res;
	}

	bool SLPVectorizerPass::vectorizeRootInstruction(PHINode P, Instruction Root,
	BasicBlock *BB, BoUpSLP &R,
	TargetTransformInfo *TTI) {
	SmallVector<WeakTrackingVH> PostponedInsts;
	bool Res = vectorizeHorReduction(P, Root, BB, R, TTI, PostponedInsts);
	Res \|= tryToVectorize(PostponedInsts, R);
	return Res;
	}

	bool SLPVectorizerPass::tryToVectorize(ArrayRef<WeakTrackingVH> Insts,
	BoUpSLP &R) {
	bool Res = false;
	for (Value *V : Insts)
	if (auto *Inst = dyn_cast<Instruction>(V); Inst && !R.isDeleted(Inst))
	Res \|= tryToVectorize(Inst, R);
	return Res;
	}

	bool SLPVectorizerPass::vectorizeInsertValueInst(InsertValueInst *IVI,
	BasicBlock *BB, BoUpSLP &R,
	bool MaxVFOnly) {
	if (!R.canMapToVector(IVI->getType()))
	return false;

	SmallVector<Value *, 16> BuildVectorOpds;
	SmallVector<Value *, 16> BuildVectorInsts;
	if (!findBuildAggregate(IVI, TTI, BuildVectorOpds, BuildVectorInsts))
	return false;

	if (MaxVFOnly && BuildVectorOpds.size() == 2) {
	R.getORE()->emit([&]() {
	return OptimizationRemarkMissed(SV_NAME, "NotPossible", IVI)
	<< "Cannot SLP vectorize list: only 2 elements of buildvalue, "
	"trying reduction first.";
	});
	return false;
	}
	LLVM_DEBUG(dbgs() << "SLP: array mappable to vector: " << *IVI << "\n");
	// Aggregate value is unlikely to be processed in vector register.
	return tryToVectorizeList(BuildVectorOpds, R, MaxVFOnly);
	}

	bool SLPVectorizerPass::vectorizeInsertElementInst(InsertElementInst *IEI,
	BasicBlock *BB, BoUpSLP &R,
	bool MaxVFOnly) {
	SmallVector<Value *, 16> BuildVectorInsts;
	SmallVector<Value *, 16> BuildVectorOpds;
	SmallVector<int> Mask;
	if (!findBuildAggregate(IEI, TTI, BuildVectorOpds, BuildVectorInsts) \|\|
	(llvm::all_of(BuildVectorOpds, IsaPred<ExtractElementInst, UndefValue>) &&
	isFixedVectorShuffle(BuildVectorOpds, Mask)))
	return false;

	if (MaxVFOnly && BuildVectorInsts.size() == 2) {
	R.getORE()->emit([&]() {
	return OptimizationRemarkMissed(SV_NAME, "NotPossible", IEI)
	<< "Cannot SLP vectorize list: only 2 elements of buildvector, "
	"trying reduction first.";
	});
	return false;
	}
	LLVM_DEBUG(dbgs() << "SLP: array mappable to vector: " << *IEI << "\n");
	return tryToVectorizeList(BuildVectorInsts, R, MaxVFOnly);
	}

	template <typename T>
	static bool tryToVectorizeSequence(
	SmallVectorImpl<T > &Incoming, function_ref<bool(T , T *)> Comparator,
	function_ref<bool(T , T )> AreCompatible,
	function_ref<bool(ArrayRef<T *>, bool)> TryToVectorizeHelper,
	bool MaxVFOnly, BoUpSLP &R) {
	bool Changed = false;
	// Sort by type, parent, operands.
	stable_sort(Incoming, Comparator);

	// Try to vectorize elements base on their type.
	SmallVector<T *> Candidates;
	SmallVector<T *> VL;
	for (auto IncIt = Incoming.begin(), E = Incoming.end(); IncIt != E;
	VL.clear()) {
	// Look for the next elements with the same type, parent and operand
	// kinds.
	auto I = dyn_cast<Instruction>(IncIt);
	if (!I \|\| R.isDeleted(I)) {
	++IncIt;
	continue;
	}
	auto *SameTypeIt = IncIt;
	while (SameTypeIt != E && (!isa<Instruction>(*SameTypeIt) \|\|
	R.isDeleted(cast<Instruction>(*SameTypeIt)) \|\|
	AreCompatible(SameTypeIt, IncIt))) {
	auto I = dyn_cast<Instruction>(SameTypeIt);
	++SameTypeIt;
	if (I && !R.isDeleted(I))
	VL.push_back(cast<T>(I));
	}

	// Try to vectorize them.
	unsigned NumElts = VL.size();
	LLVM_DEBUG(dbgs() << "SLP: Trying to vectorize starting at nodes ("
	<< NumElts << ")\n");
	// The vectorization is a 3-state attempt:
	// 1. Try to vectorize instructions with the same/alternate opcodes with the
	// size of maximal register at first.
	// 2. Try to vectorize remaining instructions with the same type, if
	// possible. This may result in the better vectorization results rather than
	// if we try just to vectorize instructions with the same/alternate opcodes.
	// 3. Final attempt to try to vectorize all instructions with the
	// same/alternate ops only, this may result in some extra final
	// vectorization.
	if (NumElts > 1 && TryToVectorizeHelper(ArrayRef(VL), MaxVFOnly)) {
	// Success start over because instructions might have been changed.
	Changed = true;
	VL.swap(Candidates);
	Candidates.clear();
	for (T *V : VL) {
	if (auto *I = dyn_cast<Instruction>(V); I && !R.isDeleted(I))
	Candidates.push_back(V);
	}
	} else {
	/// \Returns the minimum number of elements that we will attempt to
	/// vectorize.
	auto GetMinNumElements = [&R](Value *V) {
	unsigned EltSize = R.getVectorElementSize(V);
	return std::max(2U, R.getMaxVecRegSize() / EltSize);
	};
	if (NumElts < GetMinNumElements(*IncIt) &&
	(Candidates.empty() \|\|
	Candidates.front()->getType() == (*IncIt)->getType())) {
	for (T *V : VL) {
	if (auto *I = dyn_cast<Instruction>(V); I && !R.isDeleted(I))
	Candidates.push_back(V);
	}
	}
	}
	// Final attempt to vectorize instructions with the same types.
	if (Candidates.size() > 1 &&
	(SameTypeIt == E \|\| (SameTypeIt)->getType() != (IncIt)->getType())) {
	if (TryToVectorizeHelper(Candidates, /MaxVFOnly=/false)) {
	// Success start over because instructions might have been changed.
	Changed = true;
	} else if (MaxVFOnly) {
	// Try to vectorize using small vectors.
	SmallVector<T *> VL;
	for (auto It = Candidates.begin(), End = Candidates.end(); It != End;
	VL.clear()) {
	auto I = dyn_cast<Instruction>(It);
	if (!I \|\| R.isDeleted(I)) {
	++It;
	continue;
	}
	auto *SameTypeIt = It;
	while (SameTypeIt != End &&
	(!isa<Instruction>(*SameTypeIt) \|\|
	R.isDeleted(cast<Instruction>(*SameTypeIt)) \|\|
	AreCompatible(SameTypeIt, It))) {
	auto I = dyn_cast<Instruction>(SameTypeIt);
	++SameTypeIt;
	if (I && !R.isDeleted(I))
	VL.push_back(cast<T>(I));
	}
	unsigned NumElts = VL.size();
	if (NumElts > 1 && TryToVectorizeHelper(ArrayRef(VL),
	/MaxVFOnly=/false))
	Changed = true;
	It = SameTypeIt;
	}
	}
	Candidates.clear();
	}

	// Start over at the next instruction of a different type (or the end).
	IncIt = SameTypeIt;
	}
	return Changed;
	}

	/// Compare two cmp instructions. If IsCompatibility is true, function returns
	/// true if 2 cmps have same/swapped predicates and mos compatible corresponding
	/// operands. If IsCompatibility is false, function implements strict weak
	/// ordering relation between two cmp instructions, returning true if the first
	/// instruction is "less" than the second, i.e. its predicate is less than the
	/// predicate of the second or the operands IDs are less than the operands IDs
	/// of the second cmp instruction.
	template <bool IsCompatibility>
	static bool compareCmp(Value V, Value V2, TargetLibraryInfo &TLI,
	const DominatorTree &DT) {
	assert(isValidElementType(V->getType()) &&
	isValidElementType(V2->getType()) &&
	"Expected valid element types only.");
	if (V == V2)
	return IsCompatibility;
	auto *CI1 = cast<CmpInst>(V);
	auto *CI2 = cast<CmpInst>(V2);
	if (CI1->getOperand(0)->getType()->getTypeID() <
	CI2->getOperand(0)->getType()->getTypeID())
	return !IsCompatibility;
	if (CI1->getOperand(0)->getType()->getTypeID() >
	CI2->getOperand(0)->getType()->getTypeID())
	return false;
	CmpInst::Predicate Pred1 = CI1->getPredicate();
	CmpInst::Predicate Pred2 = CI2->getPredicate();
	CmpInst::Predicate SwapPred1 = CmpInst::getSwappedPredicate(Pred1);
	CmpInst::Predicate SwapPred2 = CmpInst::getSwappedPredicate(Pred2);
	CmpInst::Predicate BasePred1 = std::min(Pred1, SwapPred1);
	CmpInst::Predicate BasePred2 = std::min(Pred2, SwapPred2);
	if (BasePred1 < BasePred2)
	return !IsCompatibility;
	if (BasePred1 > BasePred2)
	return false;
	// Compare operands.
	bool CI1Preds = Pred1 == BasePred1;
	bool CI2Preds = Pred2 == BasePred1;
	for (int I = 0, E = CI1->getNumOperands(); I < E; ++I) {
	auto *Op1 = CI1->getOperand(CI1Preds ? I : E - I - 1);
	auto *Op2 = CI2->getOperand(CI2Preds ? I : E - I - 1);
	if (Op1 == Op2)
	continue;
	if (Op1->getValueID() < Op2->getValueID())
	return !IsCompatibility;
	if (Op1->getValueID() > Op2->getValueID())
	return false;
	if (auto *I1 = dyn_cast<Instruction>(Op1))
	if (auto *I2 = dyn_cast<Instruction>(Op2)) {
	if (IsCompatibility) {
	if (I1->getParent() != I2->getParent())
	return false;
	} else {
	// Try to compare nodes with same parent.
	DomTreeNodeBase<BasicBlock> *NodeI1 = DT.getNode(I1->getParent());
	DomTreeNodeBase<BasicBlock> *NodeI2 = DT.getNode(I2->getParent());
	if (!NodeI1)
	return NodeI2 != nullptr;
	if (!NodeI2)
	return false;
	assert((NodeI1 == NodeI2) ==
	(NodeI1->getDFSNumIn() == NodeI2->getDFSNumIn()) &&
	"Different nodes should have different DFS numbers");
	if (NodeI1 != NodeI2)
	return NodeI1->getDFSNumIn() < NodeI2->getDFSNumIn();
	}
	InstructionsState S = getSameOpcode({I1, I2}, TLI);
	if (S.getOpcode() && (IsCompatibility \|\| !S.isAltShuffle()))
	continue;
	if (IsCompatibility)
	return false;
	if (I1->getOpcode() != I2->getOpcode())
	return I1->getOpcode() < I2->getOpcode();
	}
	}
	return IsCompatibility;
	}

	template <typename ItT>
	bool SLPVectorizerPass::vectorizeCmpInsts(iterator_range<ItT> CmpInsts,
	BasicBlock *BB, BoUpSLP &R) {
	bool Changed = false;
	// Try to find reductions first.
	for (CmpInst *I : CmpInsts) {
	if (R.isDeleted(I))
	continue;
	for (Value *Op : I->operands())
	if (auto *RootOp = dyn_cast<Instruction>(Op))
	Changed \|= vectorizeRootInstruction(nullptr, RootOp, BB, R, TTI);
	}
	// Try to vectorize operands as vector bundles.
	for (CmpInst *I : CmpInsts) {
	if (R.isDeleted(I))
	continue;
	Changed \|= tryToVectorize(I, R);
	}
	// Try to vectorize list of compares.
	// Sort by type, compare predicate, etc.
	auto CompareSorter = [&](Value V, Value V2) {
	if (V == V2)
	return false;
	return compareCmp<false>(V, V2, TLI, DT);
	};

	auto AreCompatibleCompares = [&](Value V1, Value V2) {
	if (V1 == V2)
	return true;
	return compareCmp<true>(V1, V2, TLI, DT);
	};

	SmallVector<Value *> Vals;
	for (Instruction *V : CmpInsts)
	if (!R.isDeleted(V) && isValidElementType(V->getType()))
	Vals.push_back(V);
	if (Vals.size() <= 1)
	return Changed;
	Changed \|= tryToVectorizeSequence<Value>(
	Vals, CompareSorter, AreCompatibleCompares,
	[this, &R](ArrayRef<Value *> Candidates, bool MaxVFOnly) {
	// Exclude possible reductions from other blocks.
	bool ArePossiblyReducedInOtherBlock = any_of(Candidates, [](Value *V) {
	return any_of(V->users(), [V](User *U) {
	auto *Select = dyn_cast<SelectInst>(U);
	return Select &&
	Select->getParent() != cast<Instruction>(V)->getParent();
	});
	});
	if (ArePossiblyReducedInOtherBlock)
	return false;
	return tryToVectorizeList(Candidates, R, MaxVFOnly);
	},
	/MaxVFOnly=/true, R);
	return Changed;
	}

	bool SLPVectorizerPass::vectorizeInserts(InstSetVector &Instructions,
	BasicBlock *BB, BoUpSLP &R) {
	assert(all_of(Instructions, IsaPred<InsertElementInst, InsertValueInst>) &&
	"This function only accepts Insert instructions");
	bool OpsChanged = false;
	SmallVector<WeakTrackingVH> PostponedInsts;
	for (auto *I : reverse(Instructions)) {
	// pass1 - try to match and vectorize a buildvector sequence for MaxVF only.
	if (R.isDeleted(I) \|\| isa<CmpInst>(I))
	continue;
	if (auto *LastInsertValue = dyn_cast<InsertValueInst>(I)) {
	OpsChanged \|=
	vectorizeInsertValueInst(LastInsertValue, BB, R, /MaxVFOnly=/true);
	} else if (auto *LastInsertElem = dyn_cast<InsertElementInst>(I)) {
	OpsChanged \|=
	vectorizeInsertElementInst(LastInsertElem, BB, R, /MaxVFOnly=/true);
	}
	// pass2 - try to vectorize reductions only
	if (R.isDeleted(I))
	continue;
	OpsChanged \|= vectorizeHorReduction(nullptr, I, BB, R, TTI, PostponedInsts);
	if (R.isDeleted(I) \|\| isa<CmpInst>(I))
	continue;
	// pass3 - try to match and vectorize a buildvector sequence.
	if (auto *LastInsertValue = dyn_cast<InsertValueInst>(I)) {
	OpsChanged \|=
	vectorizeInsertValueInst(LastInsertValue, BB, R, /MaxVFOnly=/false);
	} else if (auto *LastInsertElem = dyn_cast<InsertElementInst>(I)) {
	OpsChanged \|= vectorizeInsertElementInst(LastInsertElem, BB, R,
	/MaxVFOnly=/false);
	}
	}
	// Now try to vectorize postponed instructions.
	OpsChanged \|= tryToVectorize(PostponedInsts, R);

	Instructions.clear();
	return OpsChanged;
	}

	bool SLPVectorizerPass::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) {
	bool Changed = false;
	SmallVector<Value *, 4> Incoming;
	SmallPtrSet<Value *, 16> VisitedInstrs;
	// Maps phi nodes to the non-phi nodes found in the use tree for each phi
	// node. Allows better to identify the chains that can be vectorized in the
	// better way.
	DenseMap<Value , SmallVector<Value , 4>> PHIToOpcodes;
	auto PHICompare = [this, &PHIToOpcodes](Value V1, Value V2) {
	assert(isValidElementType(V1->getType()) &&
	isValidElementType(V2->getType()) &&
	"Expected vectorizable types only.");
	// It is fine to compare type IDs here, since we expect only vectorizable
	// types, like ints, floats and pointers, we don't care about other type.
	if (V1->getType()->getTypeID() < V2->getType()->getTypeID())
	return true;
	if (V1->getType()->getTypeID() > V2->getType()->getTypeID())
	return false;
	ArrayRef<Value *> Opcodes1 = PHIToOpcodes[V1];
	ArrayRef<Value *> Opcodes2 = PHIToOpcodes[V2];
	if (Opcodes1.size() < Opcodes2.size())
	return true;
	if (Opcodes1.size() > Opcodes2.size())
	return false;
	for (int I = 0, E = Opcodes1.size(); I < E; ++I) {
	{
	// Instructions come first.
	auto *I1 = dyn_cast<Instruction>(Opcodes1[I]);
	auto *I2 = dyn_cast<Instruction>(Opcodes2[I]);
	if (I1 && I2) {
	DomTreeNodeBase<BasicBlock> *NodeI1 = DT->getNode(I1->getParent());
	DomTreeNodeBase<BasicBlock> *NodeI2 = DT->getNode(I2->getParent());
	if (!NodeI1)
	return NodeI2 != nullptr;
	if (!NodeI2)
	return false;
	assert((NodeI1 == NodeI2) ==
	(NodeI1->getDFSNumIn() == NodeI2->getDFSNumIn()) &&
	"Different nodes should have different DFS numbers");
	if (NodeI1 != NodeI2)
	return NodeI1->getDFSNumIn() < NodeI2->getDFSNumIn();
	InstructionsState S = getSameOpcode({I1, I2}, *TLI);
	if (S.getOpcode() && !S.isAltShuffle())
	continue;
	return I1->getOpcode() < I2->getOpcode();
	}
	if (I1)
	return true;
	if (I2)
	return false;
	}
	{
	// Non-undef constants come next.
	bool C1 = isa<Constant>(Opcodes1[I]) && !isa<UndefValue>(Opcodes1[I]);
	bool C2 = isa<Constant>(Opcodes2[I]) && !isa<UndefValue>(Opcodes2[I]);
	if (C1 && C2)
	continue;
	if (C1)
	return true;
	if (C2)
	return false;
	}
	bool U1 = isa<UndefValue>(Opcodes1[I]);
	bool U2 = isa<UndefValue>(Opcodes2[I]);
	{
	// Non-constant non-instructions come next.
	if (!U1 && !U2) {
	auto ValID1 = Opcodes1[I]->getValueID();
	auto ValID2 = Opcodes2[I]->getValueID();
	if (ValID1 == ValID2)
	continue;
	if (ValID1 < ValID2)
	return true;
	if (ValID1 > ValID2)
	return false;
	}
	if (!U1)
	return true;
	if (!U2)
	return false;
	}
	// Undefs come last.
	assert(U1 && U2 && "The only thing left should be undef & undef.");
	continue;
	}
	return false;
	};
	auto AreCompatiblePHIs = [&PHIToOpcodes, this, &R](Value V1, Value V2) {
	if (V1 == V2)
	return true;
	if (V1->getType() != V2->getType())
	return false;
	ArrayRef<Value *> Opcodes1 = PHIToOpcodes[V1];
	ArrayRef<Value *> Opcodes2 = PHIToOpcodes[V2];
	if (Opcodes1.size() != Opcodes2.size())
	return false;
	for (int I = 0, E = Opcodes1.size(); I < E; ++I) {
	// Undefs are compatible with any other value.
	if (isa<UndefValue>(Opcodes1[I]) \|\| isa<UndefValue>(Opcodes2[I]))
	continue;
	if (auto *I1 = dyn_cast<Instruction>(Opcodes1[I]))
	if (auto *I2 = dyn_cast<Instruction>(Opcodes2[I])) {
	if (R.isDeleted(I1) \|\| R.isDeleted(I2))
	return false;
	if (I1->getParent() != I2->getParent())
	return false;
	InstructionsState S = getSameOpcode({I1, I2}, *TLI);
	if (S.getOpcode())
	continue;
	return false;
	}
	if (isa<Constant>(Opcodes1[I]) && isa<Constant>(Opcodes2[I]))
	continue;
	if (Opcodes1[I]->getValueID() != Opcodes2[I]->getValueID())
	return false;
	}
	return true;
	};

	bool HaveVectorizedPhiNodes = false;
	do {
	// Collect the incoming values from the PHIs.
	Incoming.clear();
	for (Instruction &I : *BB) {
	auto *P = dyn_cast<PHINode>(&I);
	if (!P \|\| P->getNumIncomingValues() > MaxPHINumOperands)
	break;

	// No need to analyze deleted, vectorized and non-vectorizable
	// instructions.
	if (!VisitedInstrs.count(P) && !R.isDeleted(P) &&
	isValidElementType(P->getType()))
	Incoming.push_back(P);
	}

	if (Incoming.size() <= 1)
	break;

	// Find the corresponding non-phi nodes for better matching when trying to
	// build the tree.
	for (Value *V : Incoming) {
	SmallVectorImpl<Value *> &Opcodes =
	PHIToOpcodes.try_emplace(V).first->getSecond();
	if (!Opcodes.empty())
	continue;
	SmallVector<Value *, 4> Nodes(1, V);
	SmallPtrSet<Value *, 4> Visited;
	while (!Nodes.empty()) {
	auto *PHI = cast<PHINode>(Nodes.pop_back_val());
	if (!Visited.insert(PHI).second)
	continue;
	for (Value *V : PHI->incoming_values()) {
	if (auto *PHI1 = dyn_cast<PHINode>((V))) {
	Nodes.push_back(PHI1);
	continue;
	}
	Opcodes.emplace_back(V);
	}
	}
	}

	HaveVectorizedPhiNodes = tryToVectorizeSequence<Value>(
	Incoming, PHICompare, AreCompatiblePHIs,
	[this, &R](ArrayRef<Value *> Candidates, bool MaxVFOnly) {
	return tryToVectorizeList(Candidates, R, MaxVFOnly);
	},
	/MaxVFOnly=/true, R);
	Changed \|= HaveVectorizedPhiNodes;
	if (HaveVectorizedPhiNodes && any_of(PHIToOpcodes, [&](const auto &P) {
	auto *PHI = dyn_cast<PHINode>(P.first);
	return !PHI \|\| R.isDeleted(PHI);
	}))
	PHIToOpcodes.clear();
	VisitedInstrs.insert(Incoming.begin(), Incoming.end());
	} while (HaveVectorizedPhiNodes);

	VisitedInstrs.clear();

	InstSetVector PostProcessInserts;
	SmallSetVector<CmpInst *, 8> PostProcessCmps;
	// Vectorizes Inserts in `PostProcessInserts` and if `VecctorizeCmps` is true
	// also vectorizes `PostProcessCmps`.
	auto VectorizeInsertsAndCmps = [&](bool VectorizeCmps) {
	bool Changed = vectorizeInserts(PostProcessInserts, BB, R);
	if (VectorizeCmps) {
	Changed \|= vectorizeCmpInsts(reverse(PostProcessCmps), BB, R);
	PostProcessCmps.clear();
	}
	PostProcessInserts.clear();
	return Changed;
	};
	// Returns true if `I` is in `PostProcessInserts` or `PostProcessCmps`.
	auto IsInPostProcessInstrs = [&](Instruction *I) {
	if (auto *Cmp = dyn_cast<CmpInst>(I))
	return PostProcessCmps.contains(Cmp);
	return isa<InsertElementInst, InsertValueInst>(I) &&
	PostProcessInserts.contains(I);
	};
	// Returns true if `I` is an instruction without users, like terminator, or
	// function call with ignored return value, store. Ignore unused instructions
	// (basing on instruction type, except for CallInst and InvokeInst).
	auto HasNoUsers = [](Instruction *I) {
	return I->use_empty() &&
	(I->getType()->isVoidTy() \|\| isa<CallInst, InvokeInst>(I));
	};
	for (BasicBlock::iterator It = BB->begin(), E = BB->end(); It != E; ++It) {
	// Skip instructions with scalable type. The num of elements is unknown at
	// compile-time for scalable type.
	if (isa<ScalableVectorType>(It->getType()))
	continue;

	// Skip instructions marked for the deletion.
	if (R.isDeleted(&*It))
	continue;
	// We may go through BB multiple times so skip the one we have checked.
	if (!VisitedInstrs.insert(&*It).second) {
	if (HasNoUsers(&*It) &&
	VectorizeInsertsAndCmps(/VectorizeCmps=/It->isTerminator())) {
	// We would like to start over since some instructions are deleted
	// and the iterator may become invalid value.
	Changed = true;
	It = BB->begin();
	E = BB->end();
	}
	continue;
	}

	if (isa<DbgInfoIntrinsic>(It))
	continue;

	// Try to vectorize reductions that use PHINodes.
	if (PHINode *P = dyn_cast<PHINode>(It)) {
	// Check that the PHI is a reduction PHI.
	if (P->getNumIncomingValues() == 2) {
	// Try to match and vectorize a horizontal reduction.
	Instruction *Root = getReductionInstr(DT, P, BB, LI);
	if (Root && vectorizeRootInstruction(P, Root, BB, R, TTI)) {
	Changed = true;
	It = BB->begin();
	E = BB->end();
	continue;
	}
	}
	// Try to vectorize the incoming values of the PHI, to catch reductions
	// that feed into PHIs.
	for (unsigned I : seq<unsigned>(P->getNumIncomingValues())) {
	// Skip if the incoming block is the current BB for now. Also, bypass
	// unreachable IR for efficiency and to avoid crashing.
	// TODO: Collect the skipped incoming values and try to vectorize them
	// after processing BB.
	if (BB == P->getIncomingBlock(I) \|\|
	!DT->isReachableFromEntry(P->getIncomingBlock(I)))
	continue;

	// Postponed instructions should not be vectorized here, delay their
	// vectorization.
	if (auto *PI = dyn_cast<Instruction>(P->getIncomingValue(I));
	PI && !IsInPostProcessInstrs(PI)) {
	bool Res = vectorizeRootInstruction(nullptr, PI,
	P->getIncomingBlock(I), R, TTI);
	Changed \|= Res;
	if (Res && R.isDeleted(P)) {
	It = BB->begin();
	E = BB->end();
	break;
	}
	}
	}
	continue;
	}

	if (HasNoUsers(&*It)) {
	bool OpsChanged = false;
	auto *SI = dyn_cast<StoreInst>(It);
	bool TryToVectorizeRoot = ShouldStartVectorizeHorAtStore \|\| !SI;
	if (SI) {
	auto *I = Stores.find(getUnderlyingObject(SI->getPointerOperand()));
	// Try to vectorize chain in store, if this is the only store to the
	// address in the block.
	// TODO: This is just a temporarily solution to save compile time. Need
	// to investigate if we can safely turn on slp-vectorize-hor-store
	// instead to allow lookup for reduction chains in all non-vectorized
	// stores (need to check side effects and compile time).
	TryToVectorizeRoot \|= (I == Stores.end() \|\| I->second.size() == 1) &&
	SI->getValueOperand()->hasOneUse();
	}
	if (TryToVectorizeRoot) {
	for (auto *V : It->operand_values()) {
	// Postponed instructions should not be vectorized here, delay their
	// vectorization.
	if (auto *VI = dyn_cast<Instruction>(V);
	VI && !IsInPostProcessInstrs(VI))
	// Try to match and vectorize a horizontal reduction.
	OpsChanged \|= vectorizeRootInstruction(nullptr, VI, BB, R, TTI);
	}
	}
	// Start vectorization of post-process list of instructions from the
	// top-tree instructions to try to vectorize as many instructions as
	// possible.
	OpsChanged \|=
	VectorizeInsertsAndCmps(/VectorizeCmps=/It->isTerminator());
	if (OpsChanged) {
	// We would like to start over since some instructions are deleted
	// and the iterator may become invalid value.
	Changed = true;
	It = BB->begin();
	E = BB->end();
	continue;
	}
	}

	if (isa<InsertElementInst, InsertValueInst>(It))
	PostProcessInserts.insert(&*It);
	else if (isa<CmpInst>(It))
	PostProcessCmps.insert(cast<CmpInst>(&*It));
	}

	return Changed;
	}

	bool SLPVectorizerPass::vectorizeGEPIndices(BasicBlock *BB, BoUpSLP &R) {
	auto Changed = false;
	for (auto &Entry : GEPs) {
	// If the getelementptr list has fewer than two elements, there's nothing
	// to do.
	if (Entry.second.size() < 2)
	continue;

	LLVM_DEBUG(dbgs() << "SLP: Analyzing a getelementptr list of length "
	<< Entry.second.size() << ".\n");

	// Process the GEP list in chunks suitable for the target's supported
	// vector size. If a vector register can't hold 1 element, we are done. We
	// are trying to vectorize the index computations, so the maximum number of
	// elements is based on the size of the index expression, rather than the
	// size of the GEP itself (the target's pointer size).
	auto It = find_if(Entry.second, [&](GetElementPtrInst GEP) {
	return !R.isDeleted(GEP);
	});
	if (It == Entry.second.end())
	continue;
	unsigned MaxVecRegSize = R.getMaxVecRegSize();
	unsigned EltSize = R.getVectorElementSize((It)->idx_begin());
	if (MaxVecRegSize < EltSize)
	continue;

	unsigned MaxElts = MaxVecRegSize / EltSize;
	for (unsigned BI = 0, BE = Entry.second.size(); BI < BE; BI += MaxElts) {
	auto Len = std::min<unsigned>(BE - BI, MaxElts);
	ArrayRef<GetElementPtrInst *> GEPList(&Entry.second[BI], Len);

	// Initialize a set a candidate getelementptrs. Note that we use a
	// SetVector here to preserve program order. If the index computations
	// are vectorizable and begin with loads, we want to minimize the chance
	// of having to reorder them later.
	SetVector<Value *> Candidates(GEPList.begin(), GEPList.end());

	// Some of the candidates may have already been vectorized after we
	// initially collected them or their index is optimized to constant value.
	// If so, they are marked as deleted, so remove them from the set of
	// candidates.
	Candidates.remove_if([&R](Value *I) {
	return R.isDeleted(cast<Instruction>(I)) \|\|
	isa<Constant>(cast<GetElementPtrInst>(I)->idx_begin()->get());
	});

	// Remove from the set of candidates all pairs of getelementptrs with
	// constant differences. Such getelementptrs are likely not good
	// candidates for vectorization in a bottom-up phase since one can be
	// computed from the other. We also ensure all candidate getelementptr
	// indices are unique.
	for (int I = 0, E = GEPList.size(); I < E && Candidates.size() > 1; ++I) {
	auto *GEPI = GEPList[I];
	if (!Candidates.count(GEPI))
	continue;
	auto *SCEVI = SE->getSCEV(GEPList[I]);
	for (int J = I + 1; J < E && Candidates.size() > 1; ++J) {
	auto *GEPJ = GEPList[J];
	auto *SCEVJ = SE->getSCEV(GEPList[J]);
	if (isa<SCEVConstant>(SE->getMinusSCEV(SCEVI, SCEVJ))) {
	Candidates.remove(GEPI);
	Candidates.remove(GEPJ);
	} else if (GEPI->idx_begin()->get() == GEPJ->idx_begin()->get()) {
	Candidates.remove(GEPJ);
	}
	}
	}

	// We break out of the above computation as soon as we know there are
	// fewer than two candidates remaining.
	if (Candidates.size() < 2)
	continue;

	// Add the single, non-constant index of each candidate to the bundle. We
	// ensured the indices met these constraints when we originally collected
	// the getelementptrs.
	SmallVector<Value *, 16> Bundle(Candidates.size());
	auto BundleIndex = 0u;
	for (auto *V : Candidates) {
	auto *GEP = cast<GetElementPtrInst>(V);
	auto *GEPIdx = GEP->idx_begin()->get();
	assert(GEP->getNumIndices() == 1 && !isa<Constant>(GEPIdx));
	Bundle[BundleIndex++] = GEPIdx;
	}

	// Try and vectorize the indices. We are currently only interested in
	// gather-like cases of the form:
	//
	// ... = g[a[0] - b[0]] + g[a[1] - b[1]] + ...
	//
	// where the loads of "a", the loads of "b", and the subtractions can be
	// performed in parallel. It's likely that detecting this pattern in a
	// bottom-up phase will be simpler and less costly than building a
	// full-blown top-down phase beginning at the consecutive loads.
	Changed \|= tryToVectorizeList(Bundle, R);
	}
	}
	return Changed;
	}

	bool SLPVectorizerPass::vectorizeStoreChains(BoUpSLP &R) {
	bool Changed = false;
	// Sort by type, base pointers and values operand. Value operands must be
	// compatible (have the same opcode, same parent), otherwise it is
	// definitely not profitable to try to vectorize them.
	auto &&StoreSorter = [this](StoreInst V, StoreInst V2) {
	if (V->getValueOperand()->getType()->getTypeID() <
	V2->getValueOperand()->getType()->getTypeID())
	return true;
	if (V->getValueOperand()->getType()->getTypeID() >
	V2->getValueOperand()->getType()->getTypeID())
	return false;
	if (V->getPointerOperandType()->getTypeID() <
	V2->getPointerOperandType()->getTypeID())
	return true;
	if (V->getPointerOperandType()->getTypeID() >
	V2->getPointerOperandType()->getTypeID())
	return false;
	// UndefValues are compatible with all other values.
	if (isa<UndefValue>(V->getValueOperand()) \|\|
	isa<UndefValue>(V2->getValueOperand()))
	return false;
	if (auto *I1 = dyn_cast<Instruction>(V->getValueOperand()))
	if (auto *I2 = dyn_cast<Instruction>(V2->getValueOperand())) {
	DomTreeNodeBase<llvm::BasicBlock> *NodeI1 =
	DT->getNode(I1->getParent());
	DomTreeNodeBase<llvm::BasicBlock> *NodeI2 =
	DT->getNode(I2->getParent());
	assert(NodeI1 && "Should only process reachable instructions");
	assert(NodeI2 && "Should only process reachable instructions");
	assert((NodeI1 == NodeI2) ==
	(NodeI1->getDFSNumIn() == NodeI2->getDFSNumIn()) &&
	"Different nodes should have different DFS numbers");
	if (NodeI1 != NodeI2)
	return NodeI1->getDFSNumIn() < NodeI2->getDFSNumIn();
	InstructionsState S = getSameOpcode({I1, I2}, *TLI);
	if (S.getOpcode())
	return false;
	return I1->getOpcode() < I2->getOpcode();
	}
	if (isa<Constant>(V->getValueOperand()) &&
	isa<Constant>(V2->getValueOperand()))
	return false;
	return V->getValueOperand()->getValueID() <
	V2->getValueOperand()->getValueID();
	};

	auto &&AreCompatibleStores = [this](StoreInst V1, StoreInst V2) {
	if (V1 == V2)
	return true;
	if (V1->getValueOperand()->getType() != V2->getValueOperand()->getType())
	return false;
	if (V1->getPointerOperandType() != V2->getPointerOperandType())
	return false;
	// Undefs are compatible with any other value.
	if (isa<UndefValue>(V1->getValueOperand()) \|\|
	isa<UndefValue>(V2->getValueOperand()))
	return true;
	if (auto *I1 = dyn_cast<Instruction>(V1->getValueOperand()))
	if (auto *I2 = dyn_cast<Instruction>(V2->getValueOperand())) {
	if (I1->getParent() != I2->getParent())
	return false;
	InstructionsState S = getSameOpcode({I1, I2}, *TLI);
	return S.getOpcode() > 0;
	}
	if (isa<Constant>(V1->getValueOperand()) &&
	isa<Constant>(V2->getValueOperand()))
	return true;
	return V1->getValueOperand()->getValueID() ==
	V2->getValueOperand()->getValueID();
	};

	// Attempt to sort and vectorize each of the store-groups.
	DenseSet<std::tuple<Value , Value , Value , Value , unsigned>> Attempted;
	for (auto &Pair : Stores) {
	if (Pair.second.size() < 2)
	continue;

	LLVM_DEBUG(dbgs() << "SLP: Analyzing a store chain of length "
	<< Pair.second.size() << ".\n");

	if (!isValidElementType(Pair.second.front()->getValueOperand()->getType()))
	continue;

	// Reverse stores to do bottom-to-top analysis. This is important if the
	// values are stores to the same addresses several times, in this case need
	// to follow the stores order (reversed to meet the memory dependecies).
	SmallVector<StoreInst *> ReversedStores(Pair.second.rbegin(),
	Pair.second.rend());
	Changed \|= tryToVectorizeSequence<StoreInst>(
	ReversedStores, StoreSorter, AreCompatibleStores,
	[&](ArrayRef<StoreInst *> Candidates, bool) {
	return vectorizeStores(Candidates, R, Attempted);
	},
	/MaxVFOnly=/false, R);
	}
	return Changed;
	}
	diff --git a/lib/clang/include/VCSVersion.inc b/lib/clang/include/VCSVersion.inc
	index 5c86affe3309..cac840266348 100644
	--- a/lib/clang/include/VCSVersion.inc
	+++ b/lib/clang/include/VCSVersion.inc
	@@ -1,8 +1,8 @@
	-#define LLVM_REVISION "llvmorg-19.1.0-rc4-0-g0c641568515a"
	+#define LLVM_REVISION "llvmorg-19.1.0-0-ga4bf6cd7cfb1"
	#define LLVM_REPOSITORY "https://github.com/llvm/llvm-project.git"

	-#define CLANG_REVISION "llvmorg-19.1.0-rc4-0-g0c641568515a"
	+#define CLANG_REVISION "llvmorg-19.1.0-0-ga4bf6cd7cfb1"
	#define CLANG_REPOSITORY "https://github.com/llvm/llvm-project.git"

	-#define LLDB_REVISION "llvmorg-19.1.0-rc4-0-g0c641568515a"
	+#define LLDB_REVISION "llvmorg-19.1.0-0-ga4bf6cd7cfb1"
	#define LLDB_REPOSITORY "https://github.com/llvm/llvm-project.git"
	diff --git a/lib/clang/include/clang/Basic/Version.inc b/lib/clang/include/clang/Basic/Version.inc
	index 834f0b9f93fb..f5e540cec4fd 100644
	--- a/lib/clang/include/clang/Basic/Version.inc
	+++ b/lib/clang/include/clang/Basic/Version.inc
	@@ -1,8 +1,8 @@
	-#define CLANG_VERSION 19.1.0-rc4
	-#define CLANG_VERSION_STRING "19.1.0-rc4"
	+#define CLANG_VERSION 19.1.0
	+#define CLANG_VERSION_STRING "19.1.0"
	#define CLANG_VERSION_MAJOR 19
	#define CLANG_VERSION_MAJOR_STRING "19"
	#define CLANG_VERSION_MINOR 1
	#define CLANG_VERSION_PATCHLEVEL 0

	#define CLANG_VENDOR "FreeBSD "
	diff --git a/lib/clang/include/lld/Common/Version.inc b/lib/clang/include/lld/Common/Version.inc
	index 1d00a072c541..508683a6285d 100644
	--- a/lib/clang/include/lld/Common/Version.inc
	+++ b/lib/clang/include/lld/Common/Version.inc
	@@ -1,4 +1,4 @@
	// Local identifier in __FreeBSD_version style
	#define LLD_FREEBSD_VERSION 1500001

	-#define LLD_VERSION_STRING "19.1.0 (FreeBSD llvmorg-19.1.0-rc4-0-g0c641568515a-" __XSTRING(LLD_FREEBSD_VERSION) ")"
	+#define LLD_VERSION_STRING "19.1.0 (FreeBSD llvmorg-19.1.0-0-ga4bf6cd7cfb1-" __XSTRING(LLD_FREEBSD_VERSION) ")"
	diff --git a/lib/clang/include/lldb/Version/Version.inc b/lib/clang/include/lldb/Version/Version.inc
	index bd8cfa6fea9a..19d9c6aa9fea 100644
	--- a/lib/clang/include/lldb/Version/Version.inc
	+++ b/lib/clang/include/lldb/Version/Version.inc
	@@ -1,6 +1,6 @@
	-#define LLDB_VERSION 19.1.0-rc4
	-#define LLDB_VERSION_STRING "19.1.0-rc4"
	+#define LLDB_VERSION 19.1.0
	+#define LLDB_VERSION_STRING "19.1.0"
	#define LLDB_VERSION_MAJOR 19
	#define LLDB_VERSION_MINOR 1
	#define LLDB_VERSION_PATCH 0
	/* #undef LLDB_FULL_VERSION_STRING */
	diff --git a/lib/clang/include/llvm/Config/config.h b/lib/clang/include/llvm/Config/config.h
	index 4135d560630c..6ba63cec315a 100644
	--- a/lib/clang/include/llvm/Config/config.h
	+++ b/lib/clang/include/llvm/Config/config.h
	@@ -1,372 +1,372 @@
	#ifndef CONFIG_H
	#define CONFIG_H

	// Include this header only under the llvm source tree.
	// This is a private header.

	/* Exported configuration */
	#include "llvm/Config/llvm-config.h"

	/* Bug report URL. */
	#define BUG_REPORT_URL "https://bugs.freebsd.org/submit/"

	/* Define to 1 to enable backtraces, and to 0 otherwise. */
	#define ENABLE_BACKTRACES 1

	/* Define to 1 to enable crash overrides, and to 0 otherwise. */
	#define ENABLE_CRASH_OVERRIDES 1

	/* Define to 1 to enable crash memory dumps, and to 0 otherwise. */
	#define LLVM_ENABLE_CRASH_DUMPS 0

	/* Define to 1 to prefer forward slashes on Windows, and to 0 prefer
	backslashes. */
	#define LLVM_WINDOWS_PREFER_FORWARD_SLASH 0

	/* Define to 1 if you have the `backtrace' function. */
	#define HAVE_BACKTRACE TRUE

	#define BACKTRACE_HEADER <execinfo.h>

	/* Define to 1 if you have the <CrashReporterClient.h> header file. */
	/* #undef HAVE_CRASHREPORTERCLIENT_H */

	/* can use __crashreporter_info__ */
	#if defined(__APPLE__)
	#define HAVE_CRASHREPORTER_INFO 1
	#else
	#define HAVE_CRASHREPORTER_INFO 0
	#endif

	/* Define to 1 if you have the declaration of `arc4random', and to 0 if you
	don't. */
	#define HAVE_DECL_ARC4RANDOM 1

	/* Define to 1 if you have the declaration of `FE_ALL_EXCEPT', and to 0 if you
	don't. */
	#define HAVE_DECL_FE_ALL_EXCEPT 1

	/* Define to 1 if you have the declaration of `FE_INEXACT', and to 0 if you
	don't. */
	#define HAVE_DECL_FE_INEXACT 1

	/* Define to 1 if you have the declaration of `strerror_s', and to 0 if you
	don't. */
	#define HAVE_DECL_STRERROR_S 0

	/* Define to 1 if you have the <dlfcn.h> header file. */
	#define HAVE_DLFCN_H 1

	/* Define if dlopen() is available on this platform. */
	#define HAVE_DLOPEN 1

	/* Define if dladdr() is available on this platform. */
	#define HAVE_DLADDR 1

	#if !defined(__arm__) \|\| defined(__USING_SJLJ_EXCEPTIONS__) \|\| defined(__ARM_DWARF_EH__)
	/* Define to 1 if we can register EH frames on this platform. */
	#define HAVE_REGISTER_FRAME 1

	/* Define to 1 if we can deregister EH frames on this platform. */
	#define HAVE_DEREGISTER_FRAME 1
	#endif // !arm \|\| USING_SJLJ_EXCEPTIONS \|\| ARM_DWARF_EH_

	/* Define if __unw_add_dynamic_fde() is available on this platform. */
	/* #undef HAVE_UNW_ADD_DYNAMIC_FDE */

	/* Define to 1 if you have the <errno.h> header file. */
	#define HAVE_ERRNO_H 1

	/* Define to 1 if you have the <fcntl.h> header file. */
	#define HAVE_FCNTL_H 1

	/* Define to 1 if you have the <fenv.h> header file. */
	#define HAVE_FENV_H 1

	/* Define if libffi is available on this platform. */
	/* #undef HAVE_FFI_CALL */

	/* Define to 1 if you have the <ffi/ffi.h> header file. */
	/* #undef HAVE_FFI_FFI_H */

	/* Define to 1 if you have the <ffi.h> header file. */
	/* #undef HAVE_FFI_H */

	/* Define to 1 if you have the `futimens' function. */
	#define HAVE_FUTIMENS 1

	/* Define to 1 if you have the `futimes' function. */
	#define HAVE_FUTIMES 1

	/* Define to 1 if you have the `getpagesize' function. */
	#define HAVE_GETPAGESIZE 1

	/* Define to 1 if you have the `getrlimit' function. */
	#define HAVE_GETRLIMIT 1

	/* Define to 1 if you have the `getrusage' function. */
	#define HAVE_GETRUSAGE 1

	/* Define to 1 if you have the `isatty' function. */
	#define HAVE_ISATTY 1

	/* Define to 1 if you have the `edit' library (-ledit). */
	#define HAVE_LIBEDIT TRUE

	/* Define to 1 if you have the `pfm' library (-lpfm). */
	/* #undef HAVE_LIBPFM */

	/* Define to 1 if the `perf_branch_entry' struct has field cycles. */
	/* #undef LIBPFM_HAS_FIELD_CYCLES */

	/* Define to 1 if you have the `psapi' library (-lpsapi). */
	/* #undef HAVE_LIBPSAPI */

	/* Define to 1 if you have the `pthread' library (-lpthread). */
	#define HAVE_LIBPTHREAD 1

	/* Define to 1 if you have the `pthread_getname_np' function. */
	#define HAVE_PTHREAD_GETNAME_NP 1

	/* Define to 1 if you have the `pthread_setname_np' function. */
	#define HAVE_PTHREAD_SETNAME_NP 1

	/* Define to 1 if you have the <link.h> header file. */
	#if __has_include(<link.h>)
	#define HAVE_LINK_H 1
	#else
	#define HAVE_LINK_H 0
	#endif

	/* Define to 1 if you have the <mach/mach.h> header file. */
	#if __has_include(<mach/mach.h>)
	#define HAVE_MACH_MACH_H 1
	#endif

	/* Define to 1 if you have the `mallctl' function. */
	#if defined(__FreeBSD__)
	#define HAVE_MALLCTL 1
	#endif

	/* Define to 1 if you have the `mallinfo' function. */
	#if defined(__linux__)
	#define HAVE_MALLINFO 1
	#endif

	/* Define to 1 if you have the `mallinfo2' function. */
	/* #undef HAVE_MALLINFO2 */

	/* Define to 1 if you have the <malloc/malloc.h> header file. */
	#if __has_include(<malloc/malloc.h>)
	#define HAVE_MALLOC_MALLOC_H 1
	#endif

	/* Define to 1 if you have the `malloc_zone_statistics' function. */
	#if defined(__APPLE__)
	#define HAVE_MALLOC_ZONE_STATISTICS 1
	#endif

	/* Define to 1 if you have the `posix_spawn' function. */
	#define HAVE_POSIX_SPAWN 1

	/* Define to 1 if you have the `pread' function. */
	#define HAVE_PREAD 1

	/* Define to 1 if you have the <pthread.h> header file. */
	#define HAVE_PTHREAD_H 1

	/* Have pthread_mutex_lock */
	#define HAVE_PTHREAD_MUTEX_LOCK 1

	/* Have pthread_rwlock_init */
	#define HAVE_PTHREAD_RWLOCK_INIT 1

	/* Define to 1 if you have the `sbrk' function. */
	#define HAVE_SBRK 1

	/* Define to 1 if you have the `setenv' function. */
	#define HAVE_SETENV 1

	/* Define to 1 if you have the `setrlimit' function. */
	#define HAVE_SETRLIMIT 1

	/* Define to 1 if you have the `sigaltstack' function. */
	#define HAVE_SIGALTSTACK 1

	/* Define to 1 if you have the <signal.h> header file. */
	#define HAVE_SIGNAL_H 1

	/* Define to 1 if you have the `strerror_r' function. */
	#define HAVE_STRERROR_R 1

	/* Define to 1 if you have the `sysconf' function. */
	#define HAVE_SYSCONF 1

	/* Define to 1 if you have the <sys/ioctl.h> header file. */
	#define HAVE_SYS_IOCTL_H 1

	/* Define to 1 if you have the <sys/mman.h> header file. */
	#define HAVE_SYS_MMAN_H 1

	/* Define to 1 if you have the <sys/param.h> header file. */
	#define HAVE_SYS_PARAM_H 1

	/* Define to 1 if you have the <sys/resource.h> header file. */
	#define HAVE_SYS_RESOURCE_H 1

	/* Define to 1 if you have the <sys/stat.h> header file. */
	#define HAVE_SYS_STAT_H 1

	/* Define to 1 if you have the <sys/time.h> header file. */
	#define HAVE_SYS_TIME_H 1

	/* Define to 1 if stat struct has st_mtimespec member .*/
	#if !defined(__linux__)
	#define HAVE_STRUCT_STAT_ST_MTIMESPEC_TV_NSEC 1
	#endif

	/* Define to 1 if stat struct has st_mtim member. */
	#if !defined(__APPLE__)
	#define HAVE_STRUCT_STAT_ST_MTIM_TV_NSEC 1
	#endif

	/* Define to 1 if you have the <sys/types.h> header file. */
	#define HAVE_SYS_TYPES_H 1

	/* Define to 1 if you have the <termios.h> header file. */
	#define HAVE_TERMIOS_H 1

	/* Define to 1 if you have the <unistd.h> header file. */
	#define HAVE_UNISTD_H 1

	/* Define to 1 if you have the <valgrind/valgrind.h> header file. */
	/* #undef HAVE_VALGRIND_VALGRIND_H */

	/* Have host's _alloca */
	/* #undef HAVE__ALLOCA */

	/* Define to 1 if you have the `_chsize_s' function. */
	/* #undef HAVE__CHSIZE_S */

	/* Define to 1 if you have the `_Unwind_Backtrace' function. */
	#define HAVE__UNWIND_BACKTRACE 1

	/* Have host's __alloca */
	/* #undef HAVE___ALLOCA */

	/* Have host's __ashldi3 */
	/* #undef HAVE___ASHLDI3 */

	/* Have host's __ashrdi3 */
	/* #undef HAVE___ASHRDI3 */

	/* Have host's __chkstk */
	/* #undef HAVE___CHKSTK */

	/* Have host's __chkstk_ms */
	/* #undef HAVE___CHKSTK_MS */

	/* Have host's __cmpdi2 */
	/* #undef HAVE___CMPDI2 */

	/* Have host's __divdi3 */
	/* #undef HAVE___DIVDI3 */

	/* Have host's __fixdfdi */
	/* #undef HAVE___FIXDFDI */

	/* Have host's __fixsfdi */
	/* #undef HAVE___FIXSFDI */

	/* Have host's __floatdidf */
	/* #undef HAVE___FLOATDIDF */

	/* Have host's __lshrdi3 */
	/* #undef HAVE___LSHRDI3 */

	/* Have host's __main */
	/* #undef HAVE___MAIN */

	/* Have host's __moddi3 */
	/* #undef HAVE___MODDI3 */

	/* Have host's __udivdi3 */
	/* #undef HAVE___UDIVDI3 */

	/* Have host's __umoddi3 */
	/* #undef HAVE___UMODDI3 */

	/* Have host's ___chkstk */
	/* #undef HAVE____CHKSTK */

	/* Have host's ___chkstk_ms */
	/* #undef HAVE____CHKSTK_MS */

	/* Linker version detected at compile time. */
	/* #undef HOST_LINK_VERSION */

	/* Define if overriding target triple is enabled */
	/* #undef LLVM_TARGET_TRIPLE_ENV */

	/* Whether tools show host and target info when invoked with --version */
	#define LLVM_VERSION_PRINTER_SHOW_HOST_TARGET_INFO 1

	/* Whether tools show optional build config flags when invoked with --version */
	#define LLVM_VERSION_PRINTER_SHOW_BUILD_CONFIG 1

	/* Define if libxml2 is supported on this platform. */
	/* #undef LLVM_ENABLE_LIBXML2 */

	/* Define to the extension used for shared libraries, say, ".so". */
	#if defined(__APPLE__)
	#define LTDL_SHLIB_EXT ".dylib"
	#else
	#define LTDL_SHLIB_EXT ".so"
	#endif

	/* Define to the extension used for plugin libraries, say, ".so". */
	#if defined(__APPLE__)
	#define LLVM_PLUGIN_EXT ".dylib"
	#else
	#define LLVM_PLUGIN_EXT ".so"
	#endif

	/* Define to the address where bug reports for this package should be sent. */
	#define PACKAGE_BUGREPORT "https://bugs.freebsd.org/submit/"

	/* Define to the full name of this package. */
	#define PACKAGE_NAME "LLVM"

	/* Define to the full name and version of this package. */
	-#define PACKAGE_STRING "LLVM 19.1.0-rc4"
	+#define PACKAGE_STRING "LLVM 19.1.0"

	/* Define to the version of this package. */
	-#define PACKAGE_VERSION "19.1.0-rc4"
	+#define PACKAGE_VERSION "19.1.0"

	/* Define to the vendor of this package. */
	/* #undef PACKAGE_VENDOR */

	/* Define to a function implementing stricmp */
	/* #undef stricmp */

	/* Define to a function implementing strdup */
	/* #undef strdup */

	/* Whether GlobalISel rule coverage is being collected */
	#define LLVM_GISEL_COV_ENABLED 0

	/* Define to the default GlobalISel coverage file prefix */
	/* #undef LLVM_GISEL_COV_PREFIX */

	/* Whether Timers signpost passes in Xcode Instruments */
	#if defined(__APPLE__)
	#define LLVM_SUPPORT_XCODE_SIGNPOSTS 1
	#else
	#define LLVM_SUPPORT_XCODE_SIGNPOSTS 0
	#endif

	/* #undef HAVE_PROC_PID_RUSAGE */

	#define HAVE_BUILTIN_THREAD_POINTER 1

	#endif
	diff --git a/lib/clang/include/llvm/Config/llvm-config.h b/lib/clang/include/llvm/Config/llvm-config.h
	index 0d4ff6133d95..523243f5a434 100644
	--- a/lib/clang/include/llvm/Config/llvm-config.h
	+++ b/lib/clang/include/llvm/Config/llvm-config.h
	@@ -1,232 +1,232 @@
	/===------- llvm/Config/llvm-config.h - llvm configuration -------- C --===/
	/* */
	/* Part of the LLVM Project, under the Apache License v2.0 with LLVM */
	/* Exceptions. */
	/* See https://llvm.org/LICENSE.txt for license information. */
	/* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception */
	/* */
	/===----------------------------------------------------------------------===/

	/* This file enumerates variables from the LLVM configuration so that they
	can be in exported headers and won't override package specific directives.
	This is a C header that can be included in the llvm-c headers. */

	#ifndef LLVM_CONFIG_H
	#define LLVM_CONFIG_H

	/* Define if LLVM_ENABLE_DUMP is enabled */
	/* #undef LLVM_ENABLE_DUMP */

	/* Target triple LLVM will generate code for by default */
	/* Doesn't use `cmakedefine` because it is allowed to be empty. */
	/* #undef LLVM_DEFAULT_TARGET_TRIPLE */

	/* Define if threads enabled */
	#define LLVM_ENABLE_THREADS 1

	/* Has gcc/MSVC atomic intrinsics */
	#define LLVM_HAS_ATOMICS 1

	/* Host triple LLVM will be executed on */
	/* #undef LLVM_HOST_TRIPLE */

	/* LLVM architecture name for the native architecture, if available */
	/* #undef LLVM_NATIVE_ARCH */

	/* LLVM name for the native AsmParser init function, if available */
	/* #undef LLVM_NATIVE_ASMPARSER */

	/* LLVM name for the native AsmPrinter init function, if available */
	/* #undef LLVM_NATIVE_ASMPRINTER */

	/* LLVM name for the native Disassembler init function, if available */
	/* #undef LLVM_NATIVE_DISASSEMBLER */

	/* LLVM name for the native Target init function, if available */
	/* #undef LLVM_NATIVE_TARGET */

	/* LLVM name for the native TargetInfo init function, if available */
	/* #undef LLVM_NATIVE_TARGETINFO */

	/* LLVM name for the native target MC init function, if available */
	/* #undef LLVM_NATIVE_TARGETMC */

	/* LLVM name for the native target MCA init function, if available */
	/* #undef LLVM_NATIVE_TARGETMCA */

	/* Define if the AArch64 target is built in */
	#ifdef LLVM_TARGET_ENABLE_AARCH64
	#define LLVM_HAS_AARCH64_TARGET 1
	#else
	#define LLVM_HAS_AARCH64_TARGET 0
	#endif

	/* Define if the AMDGPU target is built in */
	#define LLVM_HAS_AMDGPU_TARGET 0

	/* Define if the ARC target is built in */
	#define LLVM_HAS_ARC_TARGET 0

	/* Define if the ARM target is built in */
	#ifdef LLVM_TARGET_ENABLE_ARM
	#define LLVM_HAS_ARM_TARGET 1
	#else
	#define LLVM_HAS_ARM_TARGET 0
	#endif

	/* Define if the AVR target is built in */
	#define LLVM_HAS_AVR_TARGET 0

	/* Define if the BPF target is built in */
	#ifdef LLVM_TARGET_ENABLE_BPF
	#define LLVM_HAS_BPF_TARGET 1
	#else
	#define LLVM_HAS_BPF_TARGET 0
	#endif

	/* Define if the CSKY target is built in */
	#define LLVM_HAS_CSKY_TARGET 0

	/* Define if the DirectX target is built in */
	#define LLVM_HAS_DIRECTX_TARGET 0

	/* Define if the Hexagon target is built in */
	#define LLVM_HAS_HEXAGON_TARGET 0

	/* Define if the Lanai target is built in */
	#define LLVM_HAS_LANAI_TARGET 0

	/* Define if the LoongArch target is built in */
	#define LLVM_HAS_LOONGARCH_TARGET 0

	/* Define if the M68k target is built in */
	#define LLVM_HAS_M68K_TARGET 0

	/* Define if the Mips target is built in */
	#ifdef LLVM_TARGET_ENABLE_MIPS
	#define LLVM_HAS_MIPS_TARGET 1
	#else
	#define LLVM_HAS_MIPS_TARGET 0
	#endif

	/* Define if the MSP430 target is built in */
	#define LLVM_HAS_MSP430_TARGET 0

	/* Define if the NVPTX target is built in */
	#define LLVM_HAS_NVPTX_TARGET 0

	/* Define if the PowerPC target is built in */
	#ifdef LLVM_TARGET_ENABLE_POWERPC
	#define LLVM_HAS_POWERPC_TARGET 1
	#else
	#define LLVM_HAS_POWERPC_TARGET 0
	#endif

	/* Define if the RISCV target is built in */
	#ifdef LLVM_TARGET_ENABLE_RISCV
	#define LLVM_HAS_RISCV_TARGET 1
	#else
	#define LLVM_HAS_RISCV_TARGET 0
	#endif

	/* Define if the Sparc target is built in */
	#define LLVM_HAS_SPARC_TARGET 0

	/* Define if the SPIRV target is built in */
	#define LLVM_HAS_SPIRV_TARGET 0

	/* Define if the SystemZ target is built in */
	#define LLVM_HAS_SYSTEMZ_TARGET 0

	/* Define if the VE target is built in */
	#define LLVM_HAS_VE_TARGET 0

	/* Define if the WebAssembly target is built in */
	#define LLVM_HAS_WEBASSEMBLY_TARGET 0

	/* Define if the X86 target is built in */
	#ifdef LLVM_TARGET_ENABLE_X86
	#define LLVM_HAS_X86_TARGET 1
	#else
	#define LLVM_HAS_X86_TARGET 0
	#endif

	/* Define if the XCore target is built in */
	#define LLVM_HAS_XCORE_TARGET 0

	/* Define if the Xtensa target is built in */
	#define LLVM_HAS_XTENSA_TARGET 0

	/* Define if this is Unixish platform */
	#define LLVM_ON_UNIX 1

	/* Define if we have the Intel JIT API runtime support library */
	#define LLVM_USE_INTEL_JITEVENTS 0

	/* Define if we have the oprofile JIT-support library */
	#define LLVM_USE_OPROFILE 0

	/* Define if we have the perf JIT-support library */
	#define LLVM_USE_PERF 0

	/* Major version of the LLVM API */
	#define LLVM_VERSION_MAJOR 19

	/* Minor version of the LLVM API */
	#define LLVM_VERSION_MINOR 1

	/* Patch version of the LLVM API */
	#define LLVM_VERSION_PATCH 0

	/* LLVM version string */
	-#define LLVM_VERSION_STRING "19.1.0-rc4"
	+#define LLVM_VERSION_STRING "19.1.0"

	/* Whether LLVM records statistics for use with GetStatistics(),
	* PrintStatistics() or PrintStatisticsJSON()
	*/
	#define LLVM_FORCE_ENABLE_STATS 0

	/* Define if we have z3 and want to build it */
	/* #undef LLVM_WITH_Z3 */

	/* Define if we have curl and want to use it */
	/* #undef LLVM_ENABLE_CURL */

	/* Define if we have cpp-httplib and want to use it */
	/* #undef LLVM_ENABLE_HTTPLIB */

	/* Define if zlib compression is available */
	#define LLVM_ENABLE_ZLIB 1

	/* Define if zstd compression is available */
	#define LLVM_ENABLE_ZSTD 1

	/* Define if LLVM is using tflite */
	/* #undef LLVM_HAVE_TFLITE */

	/* Define to 1 if you have the <sysexits.h> header file. */
	#define HAVE_SYSEXITS_H 1

	/* Define if building libLLVM shared library */
	/* #undef LLVM_BUILD_LLVM_DYLIB */

	/* Define if building LLVM with BUILD_SHARED_LIBS */
	/* #undef LLVM_BUILD_SHARED_LIBS */

	/* Define if building LLVM with LLVM_FORCE_USE_OLD_TOOLCHAIN_LIBS */
	/* #undef LLVM_FORCE_USE_OLD_TOOLCHAIN */

	/* Define if llvm_unreachable should be optimized with undefined behavior
	* in non assert builds */
	#define LLVM_UNREACHABLE_OPTIMIZE 1

	/* Define to 1 if you have the DIA SDK installed, and to 0 if you don't. */
	#define LLVM_ENABLE_DIA_SDK 0

	/* Define if plugins enabled */
	/* #undef LLVM_ENABLE_PLUGINS */

	/* Define if logf128 is available */
	/* #undef LLVM_HAS_LOGF128 */

	#endif
	diff --git a/lib/clang/include/llvm/Support/VCSRevision.h b/lib/clang/include/llvm/Support/VCSRevision.h
	index 94081b2dd066..7c6e264916f0 100644
	--- a/lib/clang/include/llvm/Support/VCSRevision.h
	+++ b/lib/clang/include/llvm/Support/VCSRevision.h
	@@ -1,2 +1,2 @@
	-#define LLVM_REVISION "llvmorg-19.1.0-rc4-0-g0c641568515a"
	+#define LLVM_REVISION "llvmorg-19.1.0-0-ga4bf6cd7cfb1"
	#define LLVM_REPOSITORY "https://github.com/llvm/llvm-project.git"

File Metadata

Mime Type: application/octet-stream
Expires: Sun, Oct 27, 3:39 PM (1 d, 22 h)
Storage Engine: chunks
Storage Format: Chunks
Storage Handle: 4Q8TEFlvUXHy
Default Alt Text: (5 MB)

No OneTemporaryActions

View Options

File Metadata

Event Timeline

No OneTemporary
Actions